diff --git a/CMakeLists.txt b/CMakeLists.txt
index 752dbe79b..be401781b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -38,17 +38,11 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
 # Options
 #
 set(TRITON_VERSION "0.0.0" CACHE STRING "Version for the clients")
-set(PERF_ANALYZER_VERSION ${TRITON_VERSION} CACHE STRING "Build Version for Perf Analyzer")
 option(TRITON_ENABLE_CC_HTTP "Build C++ HTTP client libraries" OFF)
 option(TRITON_ENABLE_CC_GRPC "Build C++ GRPC client libraries" OFF)
 option(TRITON_ENABLE_PYTHON_HTTP "Enable Python HTTP client libraries" OFF)
 option(TRITON_ENABLE_PYTHON_GRPC "Enable Python GRPC client libraries" OFF)
 option(TRITON_ENABLE_JAVA_HTTP "Enable JAVA HTTP client libraries" OFF)
-option(TRITON_ENABLE_PERF_ANALYZER "Enable Performance Analyzer" OFF)
-option(TRITON_ENABLE_PERF_ANALYZER_C_API "Enable Performance Analyzer C API" OFF)
-option(TRITON_ENABLE_PERF_ANALYZER_TFS "Enable TensorFlow Serving support for Performance Analyzer" OFF)
-option(TRITON_ENABLE_PERF_ANALYZER_TS "Enable TorchServe support for Performance Analyzer" OFF)
-option(TRITON_ENABLE_PERF_ANALYZER_OPENAI "Enable OpenAI support for Performance Analyzer" OFF)
 option(TRITON_ENABLE_EXAMPLES "Include examples in build" OFF)
 option(TRITON_ENABLE_TESTS "Include tests in build" OFF)
 option(TRITON_ENABLE_GPU "Enable GPU support in libraries" OFF)
@@ -131,28 +125,15 @@ else()
   set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${TRITON_THIRD_PARTY_INSTALL_PREFIX}/protobuf/${LIB_DIR}/cmake/protobuf")
 endif()
 
-if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER OR TRITON_ENABLE_PERF_ANALYZER_C_API)
+if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC)
 
   set(_cc_client_depends re2)
   if(${TRITON_ENABLE_CC_HTTP})
     set(_cc_client_depends ${_cc_client_depends} curl)
   endif() # TRITON_ENABLE_CC_HTTP
-  if(${TRITON_ENABLE_CC_GRPC} OR ${TRITON_ENABLE_PERF_ANALYZER})
+  if(${TRITON_ENABLE_CC_GRPC})
     set(_cc_client_depends ${_cc_client_depends} grpc protobuf)
-  endif() # TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
-
-  if(NOT ${TRITON_ENABLE_PERF_ANALYZER} AND ${TRITON_ENABLE_PERF_ANALYZER_C_API})
-    message(FATAL_ERROR "TRITON_ENABLE_PERF_ANALYZER_C_API=ON requires TRITON_ENABLE_PERF_ANALYZER=ON")
-  endif() # NOT TRITON_ENABLE_PERF_ANALYZER AND TRITON_ENABLE_PERF_ANALYZER_C_API
-  if(NOT ${TRITON_ENABLE_PERF_ANALYZER} AND ${TRITON_ENABLE_PERF_ANALYZER_TFS})
-    message(FATAL_ERROR "TRITON_ENABLE_PERF_ANALYZER_TFS=ON requires TRITON_ENABLE_PERF_ANALYZER=ON")
-  endif() # NOT TRITON_ENABLE_PERF_ANALYZER AND TRITON_ENABLE_PERF_ANALYZER_TFS
-  if(NOT ${TRITON_ENABLE_PERF_ANALYZER} AND ${TRITON_ENABLE_PERF_ANALYZER_TS})
-    message(FATAL_ERROR "TRITON_ENABLE_PERF_ANALYZER_TS=ON requires TRITON_ENABLE_PERF_ANALYZER=ON")
-  endif() # NOT TRITON_ENABLE_PERF_ANALYZER AND TRITON_ENABLE_PERF_ANALYZER_TS
-  if(NOT ${TRITON_ENABLE_PERF_ANALYZER} AND ${TRITON_ENABLE_PERF_ANALYZER_OPENAI})
-    message(FATAL_ERROR "TRITON_ENABLE_PERF_ANALYZER_OPENAI=ON requires TRITON_ENABLE_PERF_ANALYZER=ON")
-  endif() # NOT TRITON_ENABLE_PERF_ANALYZER AND TRITON_ENABLE_PERF_ANALYZER_OPENAI
+  endif() # TRITON_ENABLE_CC_GRPC
 
   ExternalProject_Add(cc-clients
     PREFIX cc-clients
@@ -172,14 +153,8 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
       -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION}
       -DTRITON_COMMON_REPO_TAG:STRING=${TRITON_COMMON_REPO_TAG}
       -DTRITON_CORE_REPO_TAG:STRING=${TRITON_CORE_REPO_TAG}
-      -DPERF_ANALYZER_VERSION:STRING=${PERF_ANALYZER_VERSION}
       -DTRITON_ENABLE_CC_HTTP:BOOL=${TRITON_ENABLE_CC_HTTP}
       -DTRITON_ENABLE_CC_GRPC:BOOL=${TRITON_ENABLE_CC_GRPC}
-      -DTRITON_ENABLE_PERF_ANALYZER:BOOL=${TRITON_ENABLE_PERF_ANALYZER}
-      -DTRITON_ENABLE_PERF_ANALYZER_C_API:BOOL=${TRITON_ENABLE_PERF_ANALYZER_C_API}
-      -DTRITON_ENABLE_PERF_ANALYZER_TFS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TFS}
-      -DTRITON_ENABLE_PERF_ANALYZER_TS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TS}
-      -DTRITON_ENABLE_PERF_ANALYZER_OPENAI:BOOL=${TRITON_ENABLE_PERF_ANALYZER_OPENAI}
       -DTRITON_ENABLE_EXAMPLES:BOOL=${TRITON_ENABLE_EXAMPLES}
       -DTRITON_ENABLE_TESTS:BOOL=${TRITON_ENABLE_TESTS}
       -DTRITON_ENABLE_GPU:BOOL=${TRITON_ENABLE_GPU}
@@ -189,16 +164,13 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
       -DCMAKE_INSTALL_PREFIX:PATH=${TRITON_INSTALL_PREFIX}
     DEPENDS ${_cc_client_depends}
   )
-endif() # TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
+endif() # TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC
 
 if(TRITON_ENABLE_PYTHON_HTTP OR TRITON_ENABLE_PYTHON_GRPC)
   set(_py_client_depends re2)
   if(${TRITON_ENABLE_PYTHON_GRPC})
     set(_py_client_depends ${_py_client_depends} grpc protobuf)
   endif() # TRITON_ENABLE_PYTHON_GRPC
-  if(${TRITON_ENABLE_PERF_ANALYZER})
-    set(_py_client_depends ${_py_client_depends} cc-clients)
-  endif() # TRITON_ENABLE_PERF_ANALYZER
 
   ExternalProject_Add(python-clients
     PREFIX python-clients
@@ -219,11 +191,6 @@ if(TRITON_ENABLE_PYTHON_HTTP OR TRITON_ENABLE_PYTHON_GRPC)
       -DTRITON_VERSION:STRING=${TRITON_VERSION}
       -DTRITON_ENABLE_PYTHON_HTTP:BOOL=${TRITON_ENABLE_PYTHON_HTTP}
       -DTRITON_ENABLE_PYTHON_GRPC:BOOL=${TRITON_ENABLE_PYTHON_GRPC}
-      -DTRITON_ENABLE_PERF_ANALYZER:BOOL=${TRITON_ENABLE_PERF_ANALYZER}
-      -DTRITON_ENABLE_PERF_ANALYZER_C_API:BOOL=${TRITON_ENABLE_PERF_ANALYZER_C_API}
-      -DTRITON_ENABLE_PERF_ANALYZER_TFS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TFS}
-      -DTRITON_ENABLE_PERF_ANALYZER_TS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TS}
-      -DTRITON_ENABLE_PERF_ANALYZER_OPENAI:BOOL=${TRITON_ENABLE_PERF_ANALYZER_OPENAI}
       -DTRITON_ENABLE_EXAMPLES:BOOL=${TRITON_ENABLE_EXAMPLES}
       -DTRITON_ENABLE_TESTS:BOOL=${TRITON_ENABLE_TESTS}
       -DTRITON_PACKAGE_PERF_ANALYZER:BOOL=${TRITON_PACKAGE_PERF_ANALYZER}
diff --git a/README.md b/README.md
index ea710bbb7..bfe36fb77 100644
--- a/README.md
+++ b/README.md
@@ -146,24 +146,9 @@ The components of the install packages are:
 * grpc [ `service_pb2`, `service_pb2_grpc`, `model_config_pb2` ]
 * utils [ linux distribution will include `shared_memory` and `cuda_shared_memory`]
 
-The Linux version of the package also includes the
-[perf_analyzer](src/c++/perf_analyzer/README.md)
-binary. The perf_analyzer binary is built on Ubuntu 20.04 and may not
-run on other Linux distributions. To run the perf_analyzer the
-following dependency must be installed:
-
-```bash
-$ sudo apt update
-$ sudo apt install libb64-dev
-```
-
-To reiterate, the installation on windows will not include perf_analyzer
-nor shared_memory/cuda_shared_memory components.
-
 ### Download From GitHub
 
-The client libraries and the perf_analyzer executable can be
-downloaded from the [Triton GitHub release
+The client libraries can be downloaded from the [Triton GitHub release
 page](https://github.com/triton-inference-server/server/releases)
 corresponding to the release you are interested in. The client
 libraries are found in the "Assets" section of the release page in a
@@ -186,15 +171,6 @@ include/, the Python wheel files in python/, and the jar files in
 java/.  The bin/ and python/ directories contain the built examples
 that you can learn more about below.
 
-The perf_analyzer binary is built on Ubuntu 20.04 and may not run on
-other Linux distributions. To use the C++ libraries or perf_analyzer
-executable you must install some dependencies.
-
-```bash
-$ apt-get update
-$ apt-get install curl libcurl4-openssl-dev libb64-dev
-```
-
 ### Download Docker Image From NGC
 
 A Docker image containing the client libraries and examples is
@@ -254,17 +230,6 @@ because Triton on Windows does not yet support all the build options.
 
 Use *cmake* to configure the build. You should adjust the flags depending on
 the components of Triton Client you are working and would like to build.
-For example, if you want to build Perf Analyzer with Triton C API, you can use \
-`-DTRITON_ENABLE_PERF_ANALYZER=ON -DTRITON_ENABLE_PERF_ANALYZER_C_API=ON`. You can
-also use `TRITON_ENABLE_PERF_ANALYZER_TFS` and `TRITON_ENABLE_PERF_ANALYZER_TS` flags
-to enable/disable support for TensorFlow Serving and TorchServe backend respectively in perf analyzer. \
-The following command demonstrate how to build client with all the features:
-
-```
-$ mkdir build
-$ cd build
-$ cmake -DCMAKE_INSTALL_PREFIX=`pwd`/install -DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_CC_GRPC=ON -DTRITON_ENABLE_PERF_ANALYZER=ON -DTRITON_ENABLE_PERF_ANALYZER_C_API=ON -DTRITON_ENABLE_PERF_ANALYZER_TFS=ON -DTRITON_ENABLE_PERF_ANALYZER_TS=ON -DTRITON_ENABLE_PYTHON_HTTP=ON -DTRITON_ENABLE_PYTHON_GRPC=ON -DTRITON_ENABLE_JAVA_HTTP=ON -DTRITON_ENABLE_GPU=ON -DTRITON_ENABLE_EXAMPLES=ON -DTRITON_ENABLE_TESTS=ON ..
-```
 
 If you are building on a release branch (or on a development branch
 that is based off of a release branch), then you must also use
diff --git a/src/c++/CMakeLists.txt b/src/c++/CMakeLists.txt
index a54253172..71c433850 100644
--- a/src/c++/CMakeLists.txt
+++ b/src/c++/CMakeLists.txt
@@ -39,7 +39,6 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
 #
 option(TRITON_ENABLE_CC_HTTP "Build C++ HTTP client libraries" OFF)
 option(TRITON_ENABLE_CC_GRPC "Build C++ GRPC client libraries" OFF)
-option(TRITON_ENABLE_PERF_ANALYZER "Enable Performance Analyzer" OFF)
 option(TRITON_ENABLE_EXAMPLES "Include examples in build" OFF)
 option(TRITON_ENABLE_TESTS "Include tests in build" OFF)
 option(TRITON_ENABLE_GPU "Enable GPU support in libraries" OFF)
@@ -71,26 +70,16 @@ FetchContent_Declare(
   URL https://github.com/google/googletest/archive/9406a60c7839052e4944ea4dbc8344762a89f9bd.zip
 )
 
-if(TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER)
+if(TRITON_ENABLE_CC_GRPC)
   set(TRITON_COMMON_ENABLE_PROTOBUF ON)
   set(TRITON_COMMON_ENABLE_GRPC ON)
+endif() # TRITON_ENABLE_CC_GRPC
 
-  if(TRITON_ENABLE_PERF_ANALYZER)
-    FetchContent_Declare(
-      repo-core
-      GIT_REPOSITORY ${TRITON_REPO_ORGANIZATION}/core.git
-      GIT_TAG ${TRITON_CORE_REPO_TAG}
-      GIT_SHALLOW ON
-    )
-    FetchContent_MakeAvailable(repo-core)
-  endif() # TRITON_ENABLE_PERF_ANALYZER
-endif() # TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
-
-if(NOT TRITON_ENABLE_PERF_ANALYZER AND NOT TRITON_ENABLE_CC_HTTP AND NOT TRITON_ENABLE_EXAMPLES)
+if(NOT TRITON_ENABLE_CC_HTTP AND NOT TRITON_ENABLE_EXAMPLES)
   set(TRITON_COMMON_ENABLE_JSON OFF)
 endif()
 
-if(TRITON_ENABLE_TESTS OR TRITON_ENABLE_PERF_ANALYZER)
+if(TRITON_ENABLE_TESTS)
   FetchContent_MakeAvailable(googletest)
 endif()
 FetchContent_MakeAvailable(repo-common)
@@ -111,33 +100,33 @@ endif() # TRITON_ENABLE_GPU
 #
 # libcurl
 #
-if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER)
+if(TRITON_ENABLE_CC_HTTP)
   find_package(CURL REQUIRED)
   message(STATUS "Using curl ${CURL_VERSION}")
-endif() # TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER
+endif() # TRITON_ENABLE_CC_HTTP
 
 #
 # Protobuf
 #
-if(TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER)
+if(TRITON_ENABLE_CC_GRPC)
   set(protobuf_MODULE_COMPATIBLE TRUE CACHE BOOL "protobuf_MODULE_COMPATIBLE" FORCE)
   find_package(Protobuf CONFIG REQUIRED)
   message(STATUS "Using protobuf ${Protobuf_VERSION}")
   include_directories(${Protobuf_INCLUDE_DIRS})
-endif() # TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
+endif() # TRITON_ENABLE_CC_GRPC
 
 #
 # GRPC
 #
-if(TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER)
+if(TRITON_ENABLE_CC_GRPC)
   find_package(gRPC CONFIG REQUIRED)
   message(STATUS "Using gRPC ${gRPC_VERSION}")
   include_directories($<TARGET_PROPERTY:gRPC::grpc,INTERFACE_INCLUDE_DIRECTORIES>)
-endif() # TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
+endif() # TRITON_ENABLE_CC_GRPC
 
-if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER)
+if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC)
   add_subdirectory(library)
-endif() # TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
+endif() # TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC
 
 if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC)
   if(TRITON_ENABLE_EXAMPLES)
@@ -148,7 +137,3 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC)
     add_subdirectory(tests)
   endif() # TRITON_ENABLE_TESTS
 endif() # TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC
-
-if(TRITON_ENABLE_PERF_ANALYZER)
-  add_subdirectory(perf_analyzer)
-endif() # TRITON_ENABLE_PERF_ANALYZER
diff --git a/src/c++/library/CMakeLists.txt b/src/c++/library/CMakeLists.txt
index 7a62971e5..a0ed94374 100644
--- a/src/c++/library/CMakeLists.txt
+++ b/src/c++/library/CMakeLists.txt
@@ -45,7 +45,7 @@ target_include_directories(
 #
 # json_utils
 #
-if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER OR TRITON_ENABLE_EXAMPLES)
+if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_EXAMPLES)
   find_package(RapidJSON CONFIG REQUIRED)
   add_library(
       json-utils-library EXCLUDE_FROM_ALL OBJECT
@@ -111,7 +111,7 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER OR TRITON_ENABLE_EXAMPLE
     RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
   )
 
-endif() # TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER OR TRITON_ENABLE_EXAMPLES
+endif() # TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_EXAMPLES
 
 #
 # shm_utils
@@ -176,7 +176,7 @@ install(
   RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
 )
 
-if(TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER)
+if(TRITON_ENABLE_CC_GRPC)
   #
   # libgrpcclient.so and libgrpcclient_static.a
   #
@@ -350,9 +350,9 @@ if(TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER)
       ${CMAKE_CURRENT_SOURCE_DIR}/grpc_client.h
       DESTINATION include
   )
-endif() # TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
+endif() # TRITON_ENABLE_CC_GRPC
 
-if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER)
+if(TRITON_ENABLE_CC_HTTP)
   if(${TRITON_ENABLE_ZLIB})
     find_package(ZLIB REQUIRED)
   endif() # TRITON_ENABLE_ZLIB
@@ -494,9 +494,9 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER)
       ${CMAKE_CURRENT_SOURCE_DIR}/http_client.h
       DESTINATION include
   )
-endif() # TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER
+endif() # TRITON_ENABLE_CC_HTTP
 
-if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER)
+if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC)
   install(
       FILES
       ${CMAKE_CURRENT_SOURCE_DIR}/common.h
@@ -508,7 +508,7 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
   include(GNUInstallDirs)
   set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TritonClient)
 
-  if(TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER)
+  if(TRITON_ENABLE_CC_GRPC)
     install(
       TARGETS
         grpcclient
@@ -519,9 +519,9 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
       ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
       RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
   )
-  endif() # TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
+  endif() # TRITON_ENABLE_CC_GRPC
 
-  if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER)
+  if(TRITON_ENABLE_CC_HTTP)
     install(
       TARGETS
         httpclient
@@ -532,7 +532,7 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
       ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
       RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
   )
-  endif() # TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER
+  endif() # TRITON_ENABLE_CC_HTTP
 
   install(
     EXPORT
@@ -573,4 +573,4 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
 
   export(PACKAGE TritonClient)
 
-endif() # TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
+endif() # TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC
diff --git a/src/c++/perf_analyzer/CMakeLists.txt b/src/c++/perf_analyzer/CMakeLists.txt
deleted file mode 100644
index b81795e38..000000000
--- a/src/c++/perf_analyzer/CMakeLists.txt
+++ /dev/null
@@ -1,262 +0,0 @@
-# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-cmake_minimum_required (VERSION 3.18)
-
-if(WIN32)
-  message("perf_analyzer is not currently supported on Windows because "
-          "is requires functionalities that are UNIX specific.")
-else()
-
-add_subdirectory(client_backend)
-
-find_package(Git REQUIRED)
-
-execute_process(WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
-  COMMAND "${GIT_EXECUTABLE}" log -n 1 --abbrev-commit --format=format:%h
-  RESULT_VARIABLE RETURN_CODE
-  OUTPUT_VARIABLE GIT_SHA)
-if(NOT RETURN_CODE EQUAL "0")
-  set(GIT_SHA "unknown")
-endif()
-
-set(
-  PERF_ANALYZER_SRCS
-  command_line_parser.cc
-  perf_analyzer.cc
-  model_parser.cc
-  perf_utils.cc
-  load_manager.cc
-  data_loader.cc
-  concurrency_manager.cc
-  request_rate_manager.cc
-  load_worker.cc
-  concurrency_worker.cc
-  request_rate_worker.cc
-  custom_load_manager.cc
-  infer_context.cc
-  inference_profiler.cc
-  report_writer.cc
-  mpi_utils.cc
-  metrics_manager.cc
-  infer_data_manager_base.cc
-  infer_data_manager.cc
-  infer_data_manager_shm.cc
-  sequence_manager.cc
-  profile_data_collector.cc
-  profile_data_exporter.cc
-  periodic_concurrency_manager.cc
-  periodic_concurrency_worker.cc
-)
-
-set(
-  PERF_ANALYZER_HDRS
-  command_line_parser.h
-  perf_analyzer.h
-  model_parser.h
-  perf_utils.h
-  load_manager.h
-  data_loader.h
-  concurrency_manager.h
-  request_rate_manager.h
-  custom_load_manager.h
-  iworker.h
-  load_worker.h
-  request_rate_worker.h
-  concurrency_worker.h
-  infer_context.h
-  inference_profiler.h
-  report_writer.h
-  mpi_utils.h
-  doctest.h
-  constants.h
-  metrics.h
-  metrics_manager.h
-  infer_data_manager_factory.h
-  iinfer_data_manager.h
-  infer_data_manager.h
-  infer_data_manager_shm.h
-  infer_data_manager_base.h
-  infer_data.h
-  sequence_manager.h
-  sequence_status.h
-  ictx_id_tracker.h
-  concurrency_ctx_id_tracker.h
-  fifo_ctx_id_tracker.h
-  rand_ctx_id_tracker.h
-  request_record.h
-  profile_data_collector.h
-  profile_data_exporter.h
-  periodic_concurrency_manager.h
-  periodic_concurrency_worker.h
-  thread_config.h
-)
-
-add_executable(
-  perf_analyzer
-  main.cc
-  ${PERF_ANALYZER_SRCS}
-  ${PERF_ANALYZER_HDRS}
-  $<TARGET_OBJECTS:json-utils-library>
-)
-target_link_libraries(
-  perf_analyzer
-  PRIVATE
-    client-backend-library
-    -lb64
-    ${CMAKE_DL_LIBS}
-)
-
-target_compile_definitions(
-  perf_analyzer
-  PRIVATE
-    PERF_ANALYZER_VERSION=${PERF_ANALYZER_VERSION}
-    GIT_SHA=${GIT_SHA}
-)
-
-# If gpu is enabled then compile with CUDA dependencies
-if(TRITON_ENABLE_GPU)
-  target_compile_definitions(
-    perf_analyzer
-    PUBLIC TRITON_ENABLE_GPU=1
-  )
-
-  target_link_libraries(
-    perf_analyzer
-    PRIVATE CUDA::cudart
-  )
-endif()
-
-if(TRITON_ENABLE_PERF_ANALYZER_C_API)
-  target_compile_definitions(
-    client-backend-library
-    PUBLIC TRITON_ENABLE_PERF_ANALYZER_C_API=1
-  )
-endif()
-
-if(TRITON_ENABLE_PERF_ANALYZER_TFS)
-  target_compile_definitions(
-    client-backend-library
-    PUBLIC TRITON_ENABLE_PERF_ANALYZER_TFS=1
-  )
-endif()
-
-if(TRITON_ENABLE_PERF_ANALYZER_TS)
-  target_compile_definitions(
-    client-backend-library
-    PUBLIC TRITON_ENABLE_PERF_ANALYZER_TS=1
-  )
-endif()
-
-if(TRITON_ENABLE_PERF_ANALYZER_OPENAI)
-  target_compile_definitions(
-    client-backend-library
-    PUBLIC TRITON_ENABLE_PERF_ANALYZER_OPENAI=1
-  )
-endif()
-
-install(
-  TARGETS perf_analyzer
-  RUNTIME DESTINATION bin
-)
-
-target_compile_definitions(perf_analyzer PUBLIC DOCTEST_CONFIG_DISABLE)
-
-# Creating perf_client link to perf_analyzer binary for backwards compatibility.
-install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink ./perf_analyzer perf_client
-  WORKING_DIRECTORY ${CMAKE_INSTALL_PREFIX}/bin/)")
-install(CODE "message(\"-- Created symlink: perf_client -> ./perf_analyzer\")")
-
-
-
-set(PERF_ANALYZER_UNIT_TESTS_SRCS ${PERF_ANALYZER_SRCS})
-list(PREPEND PERF_ANALYZER_UNIT_TESTS_SRCS perf_analyzer_unit_tests.cc)
-set(PERF_ANALYZER_UNIT_TESTS_HDRS ${PERF_ANALYZER_HDRS})
-
-add_executable(
-  perf_analyzer_unit_tests
-  ${PERF_ANALYZER_UNIT_TESTS_SRCS}
-  ${PERF_ANALYZER_UNIT_TESTS_HDRS}
-  mock_inference_profiler.h
-  mock_model_parser.h
-  test_utils.h
-  client_backend/mock_client_backend.h
-  mock_concurrency_worker.h
-  mock_data_loader.h
-  mock_infer_context.h
-  mock_infer_data_manager.h
-  mock_request_rate_worker.h
-  mock_sequence_manager.h
-  mock_profile_data_collector.h
-  mock_profile_data_exporter.h
-  test_dataloader.cc
-  test_inference_profiler.cc
-  test_command_line_parser.cc
-  test_idle_timer.cc
-  test_load_manager_base.h
-  test_load_manager.cc
-  test_model_parser.cc
-  test_metrics_manager.cc
-  test_perf_utils.cc
-  test_report_writer.cc
-  client_backend/triton/test_triton_client_backend.cc
-  test_request_rate_manager.cc
-  test_concurrency_manager.cc
-  test_custom_load_manager.cc
-  test_sequence_manager.cc
-  test_infer_context.cc
-  test_ctx_id_tracker.cc
-  test_profile_data_collector.cc
-  test_profile_data_exporter.cc
-  $<TARGET_OBJECTS:json-utils-library>
-)
-
-# -Wno-write-strings is needed for the unit tests in order to statically create
-# input argv cases in the CommandLineParser unit test
-#
-set_target_properties(perf_analyzer_unit_tests
-  PROPERTIES COMPILE_FLAGS "-Wno-write-strings")
-
-target_link_libraries(
-  perf_analyzer_unit_tests
-  PRIVATE
-    gmock
-    client-backend-library
-    -lb64
-)
-
-target_include_directories(
-  perf_analyzer_unit_tests
-  PRIVATE
-    client_backend
-)
-
-install(
-  TARGETS perf_analyzer_unit_tests
-  RUNTIME DESTINATION bin
-)
-
-endif()
diff --git a/src/c++/perf_analyzer/README.md b/src/c++/perf_analyzer/README.md
index e910f4663..1686f99f5 100644
--- a/src/c++/perf_analyzer/README.md
+++ b/src/c++/perf_analyzer/README.md
@@ -1,171 +1,30 @@
 <!--
-Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
- * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
- * Neither the name of NVIDIA CORPORATION nor the names of its
-   contributors may be used to endorse or promote products derived
-   from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 -->
 
-# Triton Performance Analyzer
-
-Triton Performance Analyzer is CLI tool which can help you optimize the
-inference performance of models running on Triton Inference Server by measuring
-changes in performance as you experiment with different optimization strategies.
-
-<br>
-
-# Features
-
-### Inference Load Modes
-
-- [Concurrency Mode](docs/inference_load_modes.md#concurrency-mode) simlulates
-  load by maintaining a specific concurrency of outgoing requests to the
-  server
-
-- [Request Rate Mode](docs/inference_load_modes.md#request-rate-mode) simulates
-  load by sending consecutive requests at a specific rate to the server
-
-- [Custom Interval Mode](docs/inference_load_modes.md#custom-interval-mode)
-  simulates load by sending consecutive requests at specific intervals to the
-  server
-
-### Performance Measurement Modes
-
-- [Time Windows Mode](docs/measurements_metrics.md#time-windows) measures model
-  performance repeatedly over a specific time interval until performance has
-  stabilized
-
-- [Count Windows Mode](docs/measurements_metrics.md#count-windows) measures
-  model performance repeatedly over a specific number of requests until
-  performance has stabilized
-
-### Other Features
-
-- [Sequence Models](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/architecture.md#stateful-models),
-  [Ensemble Models](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/architecture.md#ensemble-models),
-  and
-  [Decoupled Models](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/decoupled_models.md)
-  can be profiled in addition to standard/stateless/coupled models
-
-- [Input Data](docs/input_data.md) to model inferences can be auto-generated or
-  specified as well as verifying output
-
-- [TensorFlow Serving](docs/benchmarking.md#benchmarking-tensorflow-serving) and
-  [TorchServe](docs/benchmarking.md#benchmarking-torchserve) can be used as the
-  inference server in addition to the default Triton server
-
-<br>
-
-# Quick Start
-
-The steps below will guide you on how to start using Perf Analyzer.
-
-### Step 1: Start Triton Container
-
-```bash
-export RELEASE=<yy.mm> # e.g. to use the release from the end of February of 2023, do `export RELEASE=23.02`
-
-docker pull nvcr.io/nvidia/tritonserver:${RELEASE}-py3
-
-docker run --gpus all --rm -it --net host nvcr.io/nvidia/tritonserver:${RELEASE}-py3
-```
-
-### Step 2: Download `simple` Model
-
-```bash
-# inside triton container
-git clone --depth 1 https://github.com/triton-inference-server/server
-
-mkdir model_repository ; cp -r server/docs/examples/model_repository/simple model_repository
-```
-
-### Step 3: Start Triton Server
-
-```bash
-# inside triton container
-tritonserver --model-repository $(pwd)/model_repository &> server.log &
-
-# confirm server is ready, look for 'HTTP/1.1 200 OK'
-curl -v localhost:8000/v2/health/ready
-
-# detach (CTRL-p CTRL-q)
-```
-
-### Step 4: Start Triton SDK Container
-
-```bash
-docker pull nvcr.io/nvidia/tritonserver:${RELEASE}-py3-sdk
-
-docker run --gpus all --rm -it --net host nvcr.io/nvidia/tritonserver:${RELEASE}-py3-sdk
-```
-
-### Step 5: Run Perf Analyzer
-
-```bash
-# inside sdk container
-perf_analyzer -m simple
-```
-
-See the full [quick start guide](docs/quick_start.md) for additional tips on
-how to analyze output.
-
-<br>
-
-# Documentation
-
-- [Installation](docs/install.md)
-- [Perf Analyzer CLI](docs/cli.md)
-- [Inference Load Modes](docs/inference_load_modes.md)
-- [Input Data](docs/input_data.md)
-- [Measurements & Metrics](docs/measurements_metrics.md)
-- [Benchmarking](docs/benchmarking.md)
-
-<br>
-
-# Contributing
-
-Contributions to Triton Perf Analyzer are more than welcome. To contribute
-please review the [contribution
-guidelines](https://github.com/triton-inference-server/server/blob/main/CONTRIBUTING.md),
-then fork and create a pull request.
-
-<br>
-
-# Reporting problems, asking questions
-
-We appreciate any feedback, questions or bug reporting regarding this
-project. When help with code is needed, follow the process outlined in
-the Stack Overflow (https://stackoverflow.com/help/mcve)
-document. Ensure posted examples are:
-
-- minimal - use as little code as possible that still produces the
-  same problem
-
-- complete - provide all parts needed to reproduce the problem. Check
-  if you can strip external dependency and still show the problem. The
-  less time we spend on reproducing problems the more time we have to
-  fix it
-
-- verifiable - test the code you're about to provide to make sure it
-  reproduces the problem. Remove all other problems that are not
-  related to your request/question.
+Perf Analyzer documentation has been relocated to
+[here](https://github.com/triton-inference-server/perf_analyzer/blob/main/README.md).
diff --git a/src/c++/perf_analyzer/base_queue_ctx_id_tracker.h b/src/c++/perf_analyzer/base_queue_ctx_id_tracker.h
deleted file mode 100644
index ba0f17813..000000000
--- a/src/c++/perf_analyzer/base_queue_ctx_id_tracker.h
+++ /dev/null
@@ -1,67 +0,0 @@
-// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <queue>
-
-#include "ictx_id_tracker.h"
-
-namespace triton { namespace perfanalyzer {
-
-// Base class for CtxIdTrackers that track available IDs via a queue
-//
-class BaseQueueCtxIdTracker : public ICtxIdTracker {
- public:
-  BaseQueueCtxIdTracker() = default;
-
-  void Restore(size_t id) override { free_ctx_ids_.push(id); }
-
-  size_t Get() override
-  {
-    if (!IsAvailable()) {
-      throw std::runtime_error("free ctx id list is empty");
-    }
-
-    size_t ctx_id = free_ctx_ids_.front();
-    free_ctx_ids_.pop();
-    return ctx_id;
-  }
-
-  bool IsAvailable() override { return free_ctx_ids_.size() > 0; }
-
- protected:
-  std::queue<size_t> free_ctx_ids_;
-
-  // Erase all entries in the tracking queue
-  //
-  void Clear()
-  {
-    std::queue<size_t> empty;
-    std::swap(free_ctx_ids_, empty);
-  }
-};
-
-}};  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/client_backend/CMakeLists.txt b/src/c++/perf_analyzer/client_backend/CMakeLists.txt
deleted file mode 100644
index 2c780ee22..000000000
--- a/src/c++/perf_analyzer/client_backend/CMakeLists.txt
+++ /dev/null
@@ -1,113 +0,0 @@
-# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-cmake_minimum_required (VERSION 3.18)
-
-# fixme
-add_definitions(-DCURL_STATICLIB)
-
-add_subdirectory(triton)
-
-if(TRITON_ENABLE_PERF_ANALYZER_C_API)
-  add_subdirectory(triton_c_api)
-endif()
-
-if(TRITON_ENABLE_PERF_ANALYZER_TFS)
-  add_subdirectory(tensorflow_serving)
-endif()
-
-if(TRITON_ENABLE_PERF_ANALYZER_TS)
-  add_subdirectory(torchserve)
-endif()
-
-if(TRITON_ENABLE_PERF_ANALYZER_OPENAI)
-  add_subdirectory(openai)
-endif()
-
-set(
-  CLIENT_BACKEND_SRCS
-  client_backend.cc
-)
-
-set(
-  CLIENT_BACKEND_HDRS
-  client_backend.h
-)
-
-if(TRITON_ENABLE_PERF_ANALYZER_C_API)
-  set(CAPI_LIBRARY $<TARGET_OBJECTS:triton-c-api-backend-library>)
-  set(CAPI_TARGET_LINK_LIBRARY PUBLIC $<TARGET_PROPERTY:triton-c-api-backend-library,LINK_LIBRARIES>)
-  set(CAPI_TARGET_INCLUDE_DIRECTORY PRIVATE $<TARGET_PROPERTY:triton-c-api-backend-library,INCLUDE_DIRECTORIES>)
-endif()
-
-if(TRITON_ENABLE_PERF_ANALYZER_TFS)
-  set(TFS_LIBRARY $<TARGET_OBJECTS:tfs-client-backend-library>)
-  set(TFS_TARGET_LINK_LIBRARY PUBLIC $<TARGET_PROPERTY:tfs-client-backend-library,LINK_LIBRARIES>)
-  set(TFS_TARGET_INCLUDE_DIRECTORY PRIVATE $<TARGET_PROPERTY:tfs-client-backend-library,INCLUDE_DIRECTORIES>)
-endif()
-
-if(TRITON_ENABLE_PERF_ANALYZER_TS)
-  set(TS_LIBRARY $<TARGET_OBJECTS:ts-client-backend-library>)
-  set(TS_TARGET_LINK_LIBRARY PUBLIC $<TARGET_PROPERTY:ts-client-backend-library,LINK_LIBRARIES>)
-  set(TS_TARGET_INCLUDE_DIRECTORY PRIVATE $<TARGET_PROPERTY:ts-client-backend-library,INCLUDE_DIRECTORIES>)
-endif()
-
-if(TRITON_ENABLE_PERF_ANALYZER_OPENAI)
-  set(OPENAI_LIBRARY $<TARGET_OBJECTS:openai-client-backend-library>)
-  set(OPENAI_TARGET_LINK_LIBRARY PUBLIC $<TARGET_PROPERTY:openai-client-backend-library,LINK_LIBRARIES>)
-  set(OPENAI_TARGET_INCLUDE_DIRECTORY PRIVATE $<TARGET_PROPERTY:openai-client-backend-library,INCLUDE_DIRECTORIES>)
-endif()
-
-add_library(
-  client-backend-library
-  ${CLIENT_BACKEND_SRCS}
-  ${CLIENT_BACKEND_HDRS}
-  $<TARGET_OBJECTS:triton-client-backend-library>
-  $<TARGET_OBJECTS:shm-utils-library>
-  ${CAPI_LIBRARY}
-  ${TFS_LIBRARY}
-  ${TS_LIBRARY}
-  ${OPENAI_LIBRARY}
-)
-
-target_link_libraries(
-  client-backend-library
-  PUBLIC triton-common-json        # from repo-common
-  PUBLIC $<TARGET_PROPERTY:triton-client-backend-library,LINK_LIBRARIES>
-  ${CAPI_TARGET_LINK_LIBRARY}
-  ${TFS_TARGET_LINK_LIBRARY}
-  ${TS_TARGET_LINK_LIBRARY}
-  ${OPENAI_TARGET_LINK_LIBRARY}
-)
-
-target_include_directories(
-  client-backend-library
-  PRIVATE $<TARGET_PROPERTY:triton-client-backend-library,INCLUDE_DIRECTORIES>
-  ${CAPI_TARGET_INCLUDE_DIRECTORY}
-  ${TFS_TARGET_INCLUDE_DIRECTORY}
-  ${TS_TARGET_INCLUDE_DIRECTORY}
-  ${OPENAI_TARGET_INCLUDE_DIRECTORY}
-)
diff --git a/src/c++/perf_analyzer/client_backend/client_backend.cc b/src/c++/perf_analyzer/client_backend/client_backend.cc
deleted file mode 100644
index 09af5e5e5..000000000
--- a/src/c++/perf_analyzer/client_backend/client_backend.cc
+++ /dev/null
@@ -1,582 +0,0 @@
-// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "client_backend.h"
-
-#include "triton/triton_client_backend.h"
-
-#ifdef TRITON_ENABLE_PERF_ANALYZER_C_API
-#include "triton_c_api/triton_c_api_backend.h"
-#endif  // TRITON_ENABLE_PERF_ANALYZER_C_API
-
-#ifdef TRITON_ENABLE_PERF_ANALYZER_OPENAI
-#include "openai/openai_client_backend.h"
-#endif  // TRITON_ENABLE_PERF_ANALYZER_OPENAI
-
-#ifdef TRITON_ENABLE_PERF_ANALYZER_TFS
-#include "tensorflow_serving/tfserve_client_backend.h"
-#endif  // TRITON_ENABLE_PERF_ANALYZER_TFS
-
-#ifdef TRITON_ENABLE_PERF_ANALYZER_TS
-#include "torchserve/torchserve_client_backend.h"
-#endif  // TRITON_ENABLE_PERF_ANALYZER_TS
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-
-//================================================
-
-const Error Error::Success("", pa::SUCCESS);
-const Error Error::Failure("", pa::GENERIC_ERROR);
-
-Error::Error() : msg_(""), error_(pa::SUCCESS) {}
-
-Error::Error(const std::string& msg, const uint32_t err)
-    : msg_(msg), error_(err)
-{
-}
-
-Error::Error(const std::string& msg) : msg_(msg)
-{
-  error_ = pa::GENERIC_ERROR;
-}
-
-std::ostream&
-operator<<(std::ostream& out, const Error& err)
-{
-  if (!err.msg_.empty()) {
-    out << err.msg_ << std::endl;
-  }
-  return out;
-}
-
-//================================================
-
-std::string
-BackendKindToString(const BackendKind kind)
-{
-  switch (kind) {
-    case TRITON:
-      return std::string("TRITON");
-      break;
-    case TENSORFLOW_SERVING:
-      return std::string("TENSORFLOW_SERVING");
-      break;
-    case TORCHSERVE:
-      return std::string("TORCHSERVE");
-      break;
-    case TRITON_C_API:
-      return std::string("TRITON_C_API");
-      break;
-    case OPENAI:
-      return std::string("OPENAI");
-      break;
-    default:
-      return std::string("UNKNOWN");
-      break;
-  }
-}
-
-grpc_compression_algorithm
-BackendToGrpcType(const GrpcCompressionAlgorithm compression_algorithm)
-{
-  switch (compression_algorithm) {
-    case COMPRESS_DEFLATE:
-      return grpc_compression_algorithm::GRPC_COMPRESS_DEFLATE;
-    case COMPRESS_GZIP:
-      return grpc_compression_algorithm::GRPC_COMPRESS_GZIP;
-    default:
-      return grpc_compression_algorithm::GRPC_COMPRESS_NONE;
-  }
-}
-
-//================================================
-
-//
-// ClientBackendFactory
-//
-Error
-ClientBackendFactory::Create(
-    const BackendKind kind, const std::string& url, const std::string& endpoint,
-    const ProtocolType protocol, const SslOptionsBase& ssl_options,
-    const std::map<std::string, std::vector<std::string>> trace_options,
-    const GrpcCompressionAlgorithm compression_algorithm,
-    std::shared_ptr<Headers> http_headers,
-    const std::string& triton_server_path,
-    const std::string& model_repository_path, const bool verbose,
-    const std::string& metrics_url, const cb::TensorFormat input_tensor_format,
-    const cb::TensorFormat output_tensor_format,
-    std::shared_ptr<ClientBackendFactory>* factory)
-{
-  factory->reset(new ClientBackendFactory(
-      kind, url, endpoint, protocol, ssl_options, trace_options,
-      compression_algorithm, http_headers, triton_server_path,
-      model_repository_path, verbose, metrics_url, input_tensor_format,
-      output_tensor_format));
-  return Error::Success;
-}
-
-Error
-ClientBackendFactory::CreateClientBackend(
-    std::unique_ptr<ClientBackend>* client_backend)
-{
-  RETURN_IF_CB_ERROR(ClientBackend::Create(
-      kind_, url_, endpoint_, protocol_, ssl_options_, trace_options_,
-      compression_algorithm_, http_headers_, verbose_, triton_server_path,
-      model_repository_path_, metrics_url_, input_tensor_format_,
-      output_tensor_format_, client_backend));
-  return Error::Success;
-}
-
-const BackendKind&
-ClientBackendFactory::Kind()
-{
-  return kind_;
-}
-
-//
-// ClientBackend
-//
-Error
-ClientBackend::Create(
-    const BackendKind kind, const std::string& url, const std::string& endpoint,
-    const ProtocolType protocol, const SslOptionsBase& ssl_options,
-    const std::map<std::string, std::vector<std::string>> trace_options,
-    const GrpcCompressionAlgorithm compression_algorithm,
-    std::shared_ptr<Headers> http_headers, const bool verbose,
-    const std::string& triton_server_path,
-    const std::string& model_repository_path, const std::string& metrics_url,
-    const TensorFormat input_tensor_format,
-    const TensorFormat output_tensor_format,
-    std::unique_ptr<ClientBackend>* client_backend)
-{
-  std::unique_ptr<ClientBackend> local_backend;
-  if (kind == TRITON) {
-    RETURN_IF_CB_ERROR(tritonremote::TritonClientBackend::Create(
-        url, protocol, ssl_options, trace_options,
-        BackendToGrpcType(compression_algorithm), http_headers, verbose,
-        metrics_url, input_tensor_format, output_tensor_format,
-        &local_backend));
-  }
-#ifdef TRITON_ENABLE_PERF_ANALYZER_OPENAI
-  else if (kind == OPENAI) {
-    RETURN_IF_CB_ERROR(openai::OpenAiClientBackend::Create(
-        url, endpoint, protocol, http_headers, verbose, &local_backend));
-  }
-#endif  // TRITON_ENABLE_PERF_ANALYZER_OPENAI
-#ifdef TRITON_ENABLE_PERF_ANALYZER_TFS
-  else if (kind == TENSORFLOW_SERVING) {
-    RETURN_IF_CB_ERROR(tfserving::TFServeClientBackend::Create(
-        url, protocol, BackendToGrpcType(compression_algorithm), http_headers,
-        verbose, &local_backend));
-  }
-#endif  // TRITON_ENABLE_PERF_ANALYZER_TFS
-#ifdef TRITON_ENABLE_PERF_ANALYZER_TS
-  else if (kind == TORCHSERVE) {
-    RETURN_IF_CB_ERROR(torchserve::TorchServeClientBackend::Create(
-        url, protocol, http_headers, verbose, &local_backend));
-  }
-#endif  // TRITON_ENABLE_PERF_ANALYZER_TS
-#ifdef TRITON_ENABLE_PERF_ANALYZER_C_API
-  else if (kind == TRITON_C_API) {
-    RETURN_IF_CB_ERROR(tritoncapi::TritonCApiClientBackend::Create(
-        triton_server_path, model_repository_path, verbose, &local_backend));
-  }
-#endif  // TRITON_ENABLE_PERF_ANALYZER_C_API
-  else {
-    return Error("unsupported client backend requested", pa::GENERIC_ERROR);
-  }
-
-  *client_backend = std::move(local_backend);
-
-  return Error::Success;
-}
-
-Error
-ClientBackend::ServerExtensions(std::set<std::string>* server_extensions)
-{
-  return Error(
-      "client backend of kind " + BackendKindToString(kind_) +
-          " does not support ServerExtensions API",
-      pa::GENERIC_ERROR);
-}
-
-Error
-ClientBackend::ModelMetadata(
-    rapidjson::Document* model_metadata, const std::string& model_name,
-    const std::string& model_version)
-{
-  return Error(
-      "client backend of kind " + BackendKindToString(kind_) +
-          " does not support ModelMetadata API",
-      pa::GENERIC_ERROR);
-}
-
-Error
-ClientBackend::ModelConfig(
-    rapidjson::Document* model_config, const std::string& model_name,
-    const std::string& model_version)
-{
-  return Error(
-      "client backend of kind " + BackendKindToString(kind_) +
-          " does not support ModelConfig API",
-      pa::GENERIC_ERROR);
-}
-
-Error
-ClientBackend::Infer(
-    InferResult** result, const InferOptions& options,
-    const std::vector<InferInput*>& inputs,
-    const std::vector<const InferRequestedOutput*>& outputs)
-{
-  return Error(
-      "client backend of kind " + BackendKindToString(kind_) +
-          " does not support Infer API",
-      pa::GENERIC_ERROR);
-}
-
-Error
-ClientBackend::AsyncInfer(
-    OnCompleteFn callback, const InferOptions& options,
-    const std::vector<InferInput*>& inputs,
-    const std::vector<const InferRequestedOutput*>& outputs)
-{
-  return Error(
-      "client backend of kind " + BackendKindToString(kind_) +
-          " does not support AsyncInfer API",
-      pa::GENERIC_ERROR);
-}
-
-Error
-ClientBackend::StartStream(OnCompleteFn callback, bool enable_stats)
-{
-  return Error(
-      "client backend of kind " + BackendKindToString(kind_) +
-          " does not support StartStream API",
-      pa::GENERIC_ERROR);
-}
-
-Error
-ClientBackend::AsyncStreamInfer(
-    const InferOptions& options, const std::vector<InferInput*>& inputs,
-    const std::vector<const InferRequestedOutput*>& outputs)
-{
-  return Error(
-      "client backend of kind " + BackendKindToString(kind_) +
-          " does not support AsyncStreamInfer API",
-      pa::GENERIC_ERROR);
-}
-
-Error
-ClientBackend::ClientInferStat(InferStat* infer_stat)
-{
-  return Error(
-      "client backend of kind " + BackendKindToString(kind_) +
-          " does not support ClientInferStat API",
-      pa::GENERIC_ERROR);
-}
-
-Error
-ClientBackend::ModelInferenceStatistics(
-    std::map<ModelIdentifier, ModelStatistics>* model_stats,
-    const std::string& model_name, const std::string& model_version)
-{
-  return Error(
-      "client backend of kind " + BackendKindToString(kind_) +
-          " does not support ModelInferenceStatistics API",
-      pa::GENERIC_ERROR);
-}
-
-Error
-ClientBackend::Metrics(triton::perfanalyzer::Metrics& metrics)
-{
-  return Error(
-      "client backend of kind " + BackendKindToString(kind_) +
-          " does not support Metrics API",
-      pa::GENERIC_ERROR);
-}
-
-Error
-ClientBackend::UnregisterAllSharedMemory()
-{
-  return Error(
-      "client backend of kind " + BackendKindToString(kind_) +
-          " does not support UnregisterAllSharedMemory API",
-      pa::GENERIC_ERROR);
-}
-
-Error
-ClientBackend::RegisterSystemSharedMemory(
-    const std::string& name, const std::string& key, const size_t byte_size)
-{
-  return Error(
-      "client backend of kind " + BackendKindToString(kind_) +
-          " does not support RegisterSystemSharedMemory API",
-      pa::GENERIC_ERROR);
-}
-
-Error
-ClientBackend::RegisterCudaSharedMemory(
-    const std::string& name, const cudaIpcMemHandle_t& handle,
-    const size_t byte_size)
-{
-  return Error(
-      "client backend of kind " + BackendKindToString(kind_) +
-          " does not support RegisterCudaSharedMemory API",
-      pa::GENERIC_ERROR);
-}
-
-Error
-ClientBackend::RegisterCudaMemory(
-    const std::string& name, void* handle, const size_t byte_size)
-{
-  return Error(
-      "client backend of kind " + BackendKindToString(kind_) +
-          " does not support RegisterCudaMemory API",
-      pa::GENERIC_ERROR);
-}
-
-Error
-ClientBackend::RegisterSystemMemory(
-    const std::string& name, void* memory_ptr, const size_t byte_size)
-{
-  return Error(
-      "client backend of kind " + BackendKindToString(kind_) +
-          " does not support RegisterCudaMemory API",
-      pa::GENERIC_ERROR);
-}
-
-//
-// Shared Memory Utilities
-//
-Error
-ClientBackend::CreateSharedMemoryRegion(
-    std::string shm_key, size_t byte_size, int* shm_fd)
-{
-  return Error(
-      "client backend of kind " + BackendKindToString(kind_) +
-          " does not support CreateSharedMemoryRegion()",
-      pa::GENERIC_ERROR);
-}
-
-
-Error
-ClientBackend::MapSharedMemory(
-    int shm_fd, size_t offset, size_t byte_size, void** shm_addr)
-{
-  return Error(
-      "client backend of kind " + BackendKindToString(kind_) +
-          " does not support MapSharedMemory()",
-      pa::GENERIC_ERROR);
-}
-
-
-Error
-ClientBackend::CloseSharedMemory(int shm_fd)
-{
-  return Error(
-      "client backend of kind " + BackendKindToString(kind_) +
-          " does not support CloseSharedMemory()",
-      pa::GENERIC_ERROR);
-}
-
-Error
-ClientBackend::UnlinkSharedMemoryRegion(std::string shm_key)
-{
-  return Error(
-      "client backend of kind " + BackendKindToString(kind_) +
-          " does not support UnlinkSharedMemoryRegion()",
-      pa::GENERIC_ERROR);
-}
-
-Error
-ClientBackend::UnmapSharedMemory(void* shm_addr, size_t byte_size)
-{
-  return Error(
-      "client backend of kind " + BackendKindToString(kind_) +
-          " does not support UnmapSharedMemory()",
-      pa::GENERIC_ERROR);
-}
-
-
-ClientBackend::ClientBackend(const BackendKind kind) : kind_(kind) {}
-
-//
-// InferInput
-//
-Error
-InferInput::Create(
-    InferInput** infer_input, const BackendKind kind, const std::string& name,
-    const std::vector<int64_t>& dims, const std::string& datatype)
-{
-  if (kind == TRITON) {
-    RETURN_IF_CB_ERROR(tritonremote::TritonInferInput::Create(
-        infer_input, name, dims, datatype));
-  }
-#ifdef TRITON_ENABLE_PERF_ANALYZER_OPENAI
-  else if (kind == OPENAI) {
-    RETURN_IF_CB_ERROR(
-        openai::OpenAiInferInput::Create(infer_input, name, dims, datatype));
-  }
-#endif  // TRITON_ENABLE_PERF_ANALYZER_OPENAI
-#ifdef TRITON_ENABLE_PERF_ANALYZER_TFS
-  else if (kind == TENSORFLOW_SERVING) {
-    RETURN_IF_CB_ERROR(tfserving::TFServeInferInput::Create(
-        infer_input, name, dims, datatype));
-  }
-#endif  // TRITON_ENABLE_PERF_ANALYZER_TFS
-#ifdef TRITON_ENABLE_PERF_ANALYZER_TS
-  else if (kind == TORCHSERVE) {
-    RETURN_IF_CB_ERROR(torchserve::TorchServeInferInput::Create(
-        infer_input, name, dims, datatype));
-  }
-#endif  // TRITON_ENABLE_PERF_ANALYZER_TS
-#ifdef TRITON_ENABLE_PERF_ANALYZER_C_API
-  else if (kind == TRITON_C_API) {
-    RETURN_IF_CB_ERROR(tritoncapi::TritonCApiInferInput::Create(
-        infer_input, name, dims, datatype));
-  }
-#endif  // TRITON_ENABLE_PERF_ANALYZER_C_API
-  else {
-    return Error(
-        "unsupported client backend provided to create InferInput object",
-        pa::GENERIC_ERROR);
-  }
-
-  return Error::Success;
-}
-
-Error
-InferInput::SetShape(const std::vector<int64_t>& shape)
-{
-  return Error(
-      "client backend of kind " + BackendKindToString(kind_) +
-          " does not support SetShape() for InferInput",
-      pa::GENERIC_ERROR);
-}
-
-Error
-InferInput::Reset()
-{
-  return Error(
-      "client backend of kind " + BackendKindToString(kind_) +
-          " does not support Reset() for InferInput",
-      pa::GENERIC_ERROR);
-}
-
-Error
-InferInput::AppendRaw(const uint8_t* input, size_t input_byte_size)
-{
-  return Error(
-      "client backend of kind " + BackendKindToString(kind_) +
-          " does not support AppendRaw() for InferInput",
-      pa::GENERIC_ERROR);
-}
-
-Error
-InferInput::SetSharedMemory(
-    const std::string& name, size_t byte_size, size_t offset)
-{
-  return Error(
-      "client backend of kind " + BackendKindToString(kind_) +
-          " does not support SetSharedMemory() for InferInput",
-      pa::GENERIC_ERROR);
-}
-
-Error
-InferInput::RawData(const uint8_t** buf, size_t* byte_size)
-{
-  return Error(
-      "client backend of kind " + BackendKindToString(kind_) +
-          " does not support RawData() for InferInput",
-      pa::GENERIC_ERROR);
-}
-
-InferInput::InferInput(
-    const BackendKind kind, const std::string& name,
-    const std::string& datatype)
-    : kind_(kind), name_(name), datatype_(datatype)
-{
-}
-
-//
-// InferRequestedOutput
-//
-Error
-InferRequestedOutput::Create(
-    InferRequestedOutput** infer_output, const BackendKind kind,
-    const std::string& name, const std::string& datatype,
-    const size_t class_count)
-{
-  if (kind == TRITON) {
-    RETURN_IF_CB_ERROR(tritonremote::TritonInferRequestedOutput::Create(
-        infer_output, name, class_count, datatype));
-  }
-#ifdef TRITON_ENABLE_PERF_ANALYZER_OPENAI
-  else if (kind == OPENAI) {
-    RETURN_IF_CB_ERROR(openai::OpenAiInferRequestedOutput::Create(
-        infer_output, name, datatype));
-  }
-#endif  // TRITON_ENABLE_PERF_ANALYZER_OPENAI
-#ifdef TRITON_ENABLE_PERF_ANALYZER_TFS
-  else if (kind == TENSORFLOW_SERVING) {
-    RETURN_IF_CB_ERROR(
-        tfserving::TFServeInferRequestedOutput::Create(infer_output, name));
-  }
-#endif  // TRITON_ENABLE_PERF_ANALYZER_TFS
-#ifdef TRITON_ENABLE_PERF_ANALYZER_C_API
-  else if (kind == TRITON_C_API) {
-    RETURN_IF_CB_ERROR(tritoncapi::TritonCApiInferRequestedOutput::Create(
-        infer_output, name, class_count, datatype));
-  }
-#endif  // TRITON_ENABLE_PERF_ANALYZER_C_API
-  else {
-    return Error(
-        "unsupported client backend provided to create InferRequestedOutput "
-        "object",
-        pa::GENERIC_ERROR);
-  }
-
-  return Error::Success;
-}
-
-Error
-InferRequestedOutput::SetSharedMemory(
-    const std::string& region_name, size_t byte_size, size_t offset)
-{
-  return Error(
-      "client backend of kind " + BackendKindToString(kind_) +
-          " does not support SetSharedMemory() for InferRequestedOutput",
-      pa::GENERIC_ERROR);
-}
-
-InferRequestedOutput::InferRequestedOutput(
-    const BackendKind kind, const std::string& name,
-    const std::string& datatype)
-    : kind_(kind), name_(name), datatype_(datatype)
-{
-}
-
-}}}  // namespace triton::perfanalyzer::clientbackend
diff --git a/src/c++/perf_analyzer/client_backend/client_backend.h b/src/c++/perf_analyzer/client_backend/client_backend.h
deleted file mode 100644
index 06f68c2e3..000000000
--- a/src/c++/perf_analyzer/client_backend/client_backend.h
+++ /dev/null
@@ -1,675 +0,0 @@
-// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <rapidjson/document.h>
-#include <rapidjson/rapidjson.h>
-
-#include <functional>
-#include <iostream>
-#include <map>
-#include <memory>
-#include <set>
-#include <string>
-#include <vector>
-
-#include "../constants.h"
-#include "../metrics.h"
-#include "../perf_analyzer_exception.h"
-#include "ipc.h"
-
-namespace pa = triton::perfanalyzer;
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-
-#define RETURN_IF_CB_ERROR(S)                                         \
-  do {                                                                \
-    const triton::perfanalyzer::clientbackend::Error& status__ = (S); \
-    if (!status__.IsOk()) {                                           \
-      return status__;                                                \
-    }                                                                 \
-  } while (false)
-
-#define RETURN_IF_ERROR(S)                                     \
-  do {                                                         \
-    triton::perfanalyzer::clientbackend::Error status__ = (S); \
-    if (!status__.IsOk()) {                                    \
-      return status__;                                         \
-    }                                                          \
-  } while (false)
-
-#define FAIL_IF_ERR(X, MSG)                                        \
-  {                                                                \
-    triton::perfanalyzer::clientbackend::Error err = (X);          \
-    if (!err.IsOk()) {                                             \
-      std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
-      exit(err.Err());                                             \
-    }                                                              \
-  }                                                                \
-  while (false)
-
-#define THROW_IF_ERROR(S, MSG)                                          \
-  do {                                                                  \
-    triton::perfanalyzer::clientbackend::Error status__ = (S);          \
-    if (!status__.IsOk()) {                                             \
-      std::cerr << "error: " << (MSG) << ": " << status__ << std::endl; \
-      throw PerfAnalyzerException(GENERIC_ERROR);                       \
-    }                                                                   \
-  } while (false)
-
-//==============================================================================
-/// Error status reported by backends
-///
-class Error {
- public:
-  /// Create an error
-  explicit Error();
-
-  /// Create an error with the specified message and error code.
-  /// \param msg The message for the error
-  /// \param err The error code for the error
-  explicit Error(const std::string& msg, const uint32_t err);
-
-  /// Create an error with the specified message.
-  /// \param msg The message for the error
-  explicit Error(const std::string& msg);
-
-  /// Accessor for the message of this error.
-  /// \return The message for the error. Empty if no error.
-  const std::string& Message() const { return msg_; }
-
-  /// Accessor for the error code.
-  /// \return The error code for the error. 0 if no error.
-  const uint32_t Err() const { return error_; }
-
-  /// Does this error indicate OK status?
-  /// \return True if this error indicates "ok"/"success", false if
-  /// error indicates a failure.
-  bool IsOk() const { return error_ == 0; }
-
-  /// Convenience "success" value. Can be used as Error::Success to
-  /// indicate no error.
-  static const Error Success;
-
-  /// Convenience "failure" value. Can be used as Error::Failure to
-  /// indicate a generic error.
-  static const Error Failure;
-
- private:
-  friend std::ostream& operator<<(std::ostream&, const Error&);
-  std::string msg_{""};
-  uint32_t error_{pa::SUCCESS};
-};
-
-//===================================================================================
-
-class ClientBackend;
-class InferInput;
-class InferRequestedOutput;
-class InferResult;
-
-enum BackendKind {
-  TRITON = 0,
-  TENSORFLOW_SERVING = 1,
-  TORCHSERVE = 2,
-  TRITON_C_API = 3,
-  OPENAI = 4
-};
-std::string BackendKindToString(const BackendKind kind);
-
-enum ProtocolType { HTTP = 0, GRPC = 1, UNKNOWN = 2 };
-enum GrpcCompressionAlgorithm {
-  COMPRESS_NONE = 0,
-  COMPRESS_DEFLATE = 1,
-  COMPRESS_GZIP = 2
-};
-enum class TensorFormat { BINARY, JSON, UNKNOWN };
-typedef std::map<std::string, std::string> Headers;
-
-using OnCompleteFn = std::function<void(InferResult*)>;
-using ModelIdentifier = std::pair<std::string, std::string>;
-
-struct InferStat {
-  /// Total number of requests completed.
-  size_t completed_request_count;
-
-  /// Time from the request start until the response is completely
-  /// received.
-  uint64_t cumulative_total_request_time_ns;
-
-  /// Time from the request start until the last byte is sent.
-  uint64_t cumulative_send_time_ns;
-
-  /// Time from receiving first byte of the response until the
-  /// response is completely received.
-  uint64_t cumulative_receive_time_ns;
-
-  /// Create a new InferStat object with zero-ed statistics.
-  InferStat()
-      : completed_request_count(0), cumulative_total_request_time_ns(0),
-        cumulative_send_time_ns(0), cumulative_receive_time_ns(0)
-  {
-  }
-};
-
-// Per model statistics
-struct ModelStatistics {
-  uint64_t success_count_;
-  uint64_t inference_count_;
-  uint64_t execution_count_;
-  uint64_t queue_count_;
-  uint64_t compute_input_count_;
-  uint64_t compute_infer_count_;
-  uint64_t compute_output_count_;
-  uint64_t cache_hit_count_;
-  uint64_t cache_miss_count_;
-  uint64_t cumm_time_ns_;
-  uint64_t queue_time_ns_;
-  uint64_t compute_input_time_ns_;
-  uint64_t compute_infer_time_ns_;
-  uint64_t compute_output_time_ns_;
-  uint64_t cache_hit_time_ns_;
-  uint64_t cache_miss_time_ns_;
-};
-
-///
-/// Structure to hold Request parameter data for Inference Request.
-///
-struct RequestParameter {
-  std::string name;
-  std::string value;
-  std::string type;
-};
-
-//==============================================================================
-/// Structure to hold options for Inference Request.
-///
-struct InferOptions {
-  explicit InferOptions(const std::string& model_name)
-      : model_name_(model_name), model_version_(""), request_id_(""),
-        sequence_id_(0), sequence_id_str_(""), sequence_start_(false),
-        sequence_end_(false), triton_enable_empty_final_response_(true)
-  {
-  }
-  /// The name of the model to run inference.
-  std::string model_name_;
-  /// The version of the model.
-  std::string model_version_;
-  /// The model signature name for TF models.
-  std::string model_signature_name_;
-  /// An identifier for the request.
-  std::string request_id_;
-  /// The unique identifier for the sequence being represented by the
-  /// object. Default value is 0 which means that the request does not
-  /// belong to a sequence. If this value is set, then sequence_id_str_
-  /// MUST be set to "".
-  uint64_t sequence_id_;
-  /// The unique identifier for the sequence being represented by the
-  /// object. Default value is "" which means that the request does not
-  /// belong to a sequence. If this value is set, then sequence_id_ MUST
-  /// be set to 0.
-  std::string sequence_id_str_;
-  /// Indicates whether the request being added marks the start of the
-  /// sequence. Default value is False. This argument is ignored if
-  /// 'sequence_id' is 0.
-  bool sequence_start_;
-  /// Indicates whether the request being added marks the end of the
-  /// sequence. Default value is False. This argument is ignored if
-  /// 'sequence_id' is 0.
-  bool sequence_end_;
-  /// Whether to tell Triton to enable an empty final response.
-  bool triton_enable_empty_final_response_;
-
-  /// Additional parameters to pass to the model
-  std::unordered_map<std::string, RequestParameter> request_parameters_;
-};
-
-struct SslOptionsBase {
-  bool ssl_grpc_use_ssl = false;
-  std::string ssl_grpc_root_certifications_file = "";
-  std::string ssl_grpc_private_key_file = "";
-  std::string ssl_grpc_certificate_chain_file = "";
-  long ssl_https_verify_peer = 1L;
-  long ssl_https_verify_host = 2L;
-  std::string ssl_https_ca_certificates_file = "";
-  std::string ssl_https_client_certificate_file = "";
-  std::string ssl_https_client_certificate_type = "";
-  std::string ssl_https_private_key_file = "";
-  std::string ssl_https_private_key_type = "";
-};
-
-//
-// The object factory to create client backends to communicate with the
-// inference service
-//
-class ClientBackendFactory {
- public:
-  /// Create a factory that can be used to construct Client Backends.
-  /// \param kind The kind of client backend to create.
-  /// \param url The inference server url and port.
-  /// \param endpoint The endpoint on the inference server to send requests to
-  /// \param protocol The protocol type used.
-  /// \param ssl_options The SSL options used with client backend.
-  /// \param compression_algorithm The compression algorithm to be used
-  /// on the grpc requests.
-  /// \param http_headers Map of HTTP headers. The map key/value
-  /// indicates the header name/value. The headers will be included
-  /// with all the requests made to server using this client.
-  /// \param triton_server_path Only for C api backend. Lbrary path to
-  /// path to the top-level Triton directory (which is typically
-  /// /opt/tritonserver) Must contain libtritonserver.so.
-  /// \param model_repository_path Only for C api backend. Path to model
-  /// repository which contains the desired model.
-  /// \param verbose Enables the verbose mode.
-  /// \param metrics_url The inference server metrics url and port.
-  /// \param input_tensor_format The Triton inference request input tensor
-  /// format.
-  /// \param output_tensor_format The Triton inference response output tensor
-  /// format.
-  /// \param factory Returns a new ClientBackend object.
-  /// \return Error object indicating success or failure.
-  static Error Create(
-      const BackendKind kind, const std::string& url,
-      const std::string& endpoint, const ProtocolType protocol,
-      const SslOptionsBase& ssl_options,
-      const std::map<std::string, std::vector<std::string>> trace_options,
-      const GrpcCompressionAlgorithm compression_algorithm,
-      std::shared_ptr<Headers> http_headers,
-      const std::string& triton_server_path,
-      const std::string& model_repository_path, const bool verbose,
-      const std::string& metrics_url, const TensorFormat input_tensor_format,
-      const TensorFormat output_tensor_format,
-      std::shared_ptr<ClientBackendFactory>* factory);
-
-  const BackendKind& Kind();
-
-  /// Create a ClientBackend.
-  /// \param backend Returns a new Client backend object.
-  virtual Error CreateClientBackend(std::unique_ptr<ClientBackend>* backend);
-
- private:
-  ClientBackendFactory(
-      const BackendKind kind, const std::string& url,
-      const std::string& endpoint, const ProtocolType protocol,
-      const SslOptionsBase& ssl_options,
-      const std::map<std::string, std::vector<std::string>> trace_options,
-      const GrpcCompressionAlgorithm compression_algorithm,
-      const std::shared_ptr<Headers> http_headers,
-      const std::string& triton_server_path,
-      const std::string& model_repository_path, const bool verbose,
-      const std::string& metrics_url, const TensorFormat input_tensor_format,
-      const TensorFormat output_tensor_format)
-      : kind_(kind), url_(url), endpoint_(endpoint), protocol_(protocol),
-        ssl_options_(ssl_options), trace_options_(trace_options),
-        compression_algorithm_(compression_algorithm),
-        http_headers_(http_headers), triton_server_path(triton_server_path),
-        model_repository_path_(model_repository_path), verbose_(verbose),
-        metrics_url_(metrics_url), input_tensor_format_(input_tensor_format),
-        output_tensor_format_(output_tensor_format)
-  {
-  }
-
-  const BackendKind kind_;
-  const std::string url_;
-  const std::string endpoint_;
-  const ProtocolType protocol_;
-  const SslOptionsBase& ssl_options_;
-  const std::map<std::string, std::vector<std::string>> trace_options_;
-  const GrpcCompressionAlgorithm compression_algorithm_;
-  std::shared_ptr<Headers> http_headers_;
-  std::string triton_server_path;
-  std::string model_repository_path_;
-  const bool verbose_;
-  const std::string metrics_url_{""};
-  const TensorFormat input_tensor_format_{TensorFormat::UNKNOWN};
-  const TensorFormat output_tensor_format_{TensorFormat::UNKNOWN};
-
-
-#ifndef DOCTEST_CONFIG_DISABLE
- protected:
-  ClientBackendFactory()
-      : kind_(BackendKind()), url_(""), protocol_(ProtocolType()),
-        ssl_options_(SslOptionsBase()),
-        trace_options_(std::map<std::string, std::vector<std::string>>()),
-        compression_algorithm_(GrpcCompressionAlgorithm()), verbose_(false)
-  {
-  }
-#endif
-};
-
-//
-// Interface for interacting with an inference service
-//
-class ClientBackend {
- public:
-  static Error Create(
-      const BackendKind kind, const std::string& url,
-      const std::string& endpoint, const ProtocolType protocol,
-      const SslOptionsBase& ssl_options,
-      const std::map<std::string, std::vector<std::string>> trace_options,
-      const GrpcCompressionAlgorithm compression_algorithm,
-      std::shared_ptr<Headers> http_headers, const bool verbose,
-      const std::string& library_directory, const std::string& model_repository,
-      const std::string& metrics_url, const TensorFormat input_tensor_format,
-      const TensorFormat output_tensor_format,
-      std::unique_ptr<ClientBackend>* client_backend);
-
-  /// Destructor for the client backend object
-  virtual ~ClientBackend() = default;
-
-  /// Get the backend kind
-  BackendKind Kind() const { return kind_; }
-
-  /// Get the server metadata from the server
-  virtual Error ServerExtensions(std::set<std::string>* server_extensions);
-
-  /// Get the model metadata from the server for specified name and
-  /// version as rapidjson DOM object.
-  virtual Error ModelMetadata(
-      rapidjson::Document* model_metadata, const std::string& model_name,
-      const std::string& model_version);
-
-  /// Get the model config from the server for specified name and
-  /// version as rapidjson DOM object.
-  virtual Error ModelConfig(
-      rapidjson::Document* model_config, const std::string& model_name,
-      const std::string& model_version);
-
-  /// Issues a synchronous inference request to the server.
-  virtual Error Infer(
-      InferResult** result, const InferOptions& options,
-      const std::vector<InferInput*>& inputs,
-      const std::vector<const InferRequestedOutput*>& outputs);
-
-  /// Issues an asynchronous inference request to the server.
-  virtual Error AsyncInfer(
-      OnCompleteFn callback, const InferOptions& options,
-      const std::vector<InferInput*>& inputs,
-      const std::vector<const InferRequestedOutput*>& outputs);
-
-  /// Established a stream to the server.
-  virtual Error StartStream(OnCompleteFn callback, bool enable_stats);
-
-  /// Issues an asynchronous inference request to the underlying stream.
-  virtual Error AsyncStreamInfer(
-      const InferOptions& options, const std::vector<InferInput*>& inputs,
-      const std::vector<const InferRequestedOutput*>& outputs);
-
-  /// Gets the client side inference statistics from the client library.
-  virtual Error ClientInferStat(InferStat* infer_stat);
-
-  /// Gets the server-side model inference statistics from the server.
-  virtual Error ModelInferenceStatistics(
-      std::map<ModelIdentifier, ModelStatistics>* model_stats,
-      const std::string& model_name = "",
-      const std::string& model_version = "");
-
-  /// Gets the server-side metrics from the server.
-  /// \param metrics Output metrics object.
-  /// \return Error object indicating success or failure.
-  virtual Error Metrics(Metrics& metrics);
-
-  /// Unregisters all the shared memory from the server
-  virtual Error UnregisterAllSharedMemory();
-
-  /// Registers a system shared memory from the server
-  virtual Error RegisterSystemSharedMemory(
-      const std::string& name, const std::string& key, const size_t byte_size);
-
-  /// Registers cuda shared memory to the server.
-  virtual Error RegisterCudaSharedMemory(
-      const std::string& name, const cudaIpcMemHandle_t& handle,
-      const size_t byte_size);
-
-  /// Registers cuda memory to the server.
-  virtual Error RegisterCudaMemory(
-      const std::string& name, void* handle, const size_t byte_size);
-
-  /// Registers a system memory location on the server.
-  virtual Error RegisterSystemMemory(
-      const std::string& name, void* memory_ptr, const size_t byte_size);
-
-  //
-  // Shared Memory Utilities
-  //
-  // FIXME: These should probably move to a common area with shm_utils not
-  // tied specifically to inferenceserver. Create a shared memory region of
-  // the size 'byte_size' and return the unique identifier.
-  virtual Error CreateSharedMemoryRegion(
-      std::string shm_key, size_t byte_size, int* shm_fd);
-
-  // Mmap the shared memory region with the given 'offset' and 'byte_size' and
-  // return the base address of the region.
-  // \param shm_fd The int descriptor of the created shared memory region
-  // \param offset The offset of the shared memory block from the start of the
-  // shared memory region
-  // \param byte_size The size in bytes of the shared memory region
-  // \param shm_addr Returns the base address of the shared memory region
-  // \return error Returns an error if unable to mmap shared memory region.
-  virtual Error MapSharedMemory(
-      int shm_fd, size_t offset, size_t byte_size, void** shm_addr);
-
-  // Close the shared memory descriptor.
-  // \param shm_fd The int descriptor of the created shared memory region
-  // \return error Returns an error if unable to close shared memory descriptor.
-  virtual Error CloseSharedMemory(int shm_fd);
-
-  // Destroy the shared memory region with the given name.
-  // \return error Returns an error if unable to unlink shared memory region.
-  virtual Error UnlinkSharedMemoryRegion(std::string shm_key);
-
-  // Munmap the shared memory region from the base address with the given
-  // byte_size.
-  // \return error Returns an error if unable to unmap shared memory region.
-  virtual Error UnmapSharedMemory(void* shm_addr, size_t byte_size);
-
- protected:
-  /// Constructor for client backend
-  ClientBackend(const BackendKind kind);
-  // The kind of the backend.
-  const BackendKind kind_{TRITON};
-
-#ifndef DOCTEST_CONFIG_DISABLE
- public:
-  ClientBackend() = default;
-#endif
-};
-
-
-//
-// Interface for preparing the inputs for inference to the backend
-//
-class InferInput {
- public:
-  /// Create a InferInput instance that describes a model input.
-  /// \param infer_input Returns a new InferInput object.
-  /// \param kind The kind of the associated client backend.
-  /// \param name The name of input whose data will be described by this object.
-  /// \param dims The shape of the input.
-  /// \param datatype The datatype of the input.
-  /// \return Error object indicating success or failure.
-  static Error Create(
-      InferInput** infer_input, const BackendKind kind, const std::string& name,
-      const std::vector<int64_t>& dims, const std::string& datatype);
-
-  virtual ~InferInput() = default;
-
-  /// Gets name of the associated input tensor.
-  /// \return The name of the tensor.
-  const std::string& Name() const { return name_; }
-
-  /// Gets datatype of the associated input tensor.
-  /// \return The datatype of the tensor.
-  const std::string& Datatype() const { return datatype_; }
-
-  /// Gets the shape of the input tensor.
-  /// \return The shape of the tensor.
-  virtual const std::vector<int64_t>& Shape() const = 0;
-
-  /// Set the shape of input associated with this object.
-  /// \param dims the vector of dims representing the new shape
-  /// of input.
-  /// \return Error object indicating success or failure of the
-  /// request.
-  virtual Error SetShape(const std::vector<int64_t>& dims);
-
-  /// Prepare this input to receive new tensor values. Forget any
-  /// existing values that were set by previous calls to SetSharedMemory()
-  /// or AppendRaw().
-  /// \return Error object indicating success or failure.
-  virtual Error Reset();
-
-  /// Append tensor values for this input from a byte array.
-  /// \param input The pointer to the array holding the tensor value.
-  /// \param input_byte_size The size of the array in bytes.
-  /// \return Error object indicating success or failure.
-  virtual Error AppendRaw(const uint8_t* input, size_t input_byte_size);
-
-  /// Set tensor values for this input by reference into a shared memory
-  /// region.
-  /// \param name The user-given name for the registered shared memory region
-  /// where the tensor values for this input is stored.
-  /// \param byte_size The size, in bytes of the input tensor data. Must
-  /// match the size expected for the input shape.
-  /// \param offset The offset into the shared memory region upto the start
-  /// of the input tensor values. The default value is 0.
-  /// \return Error object indicating success or failure
-  virtual Error SetSharedMemory(
-      const std::string& name, size_t byte_size, size_t offset = 0);
-
-  /// Get access to the buffer holding raw input. Note the buffer is owned by
-  /// InferInput instance. Users can copy out the data if required to extend
-  /// the lifetime.
-  /// \param buf Returns the pointer to the start of the buffer.
-  /// \param byte_size Returns the size of buffer in bytes.
-  /// \return Error object indicating success or failure of the
-  /// request.
-  virtual Error RawData(const uint8_t** buf, size_t* byte_size);
-
- protected:
-  InferInput(
-      const BackendKind kind, const std::string& name,
-      const std::string& datatype_);
-
-  const BackendKind kind_;
-  const std::string name_;
-  const std::string datatype_;
-};
-
-
-//
-// Interface for preparing the inputs for inference to the backend
-//
-class InferRequestedOutput {
- public:
-  virtual ~InferRequestedOutput() = default;
-
-  /// Create a InferRequestedOutput instance that describes a model output being
-  /// requested.
-  /// \param infer_output Returns a new InferOutputGrpc object.
-  /// \param kind The kind of the associated client backend.
-  /// \param name The name of output being requested.
-  /// \param datatype The datatype of the output
-  /// \param class_count The number of classifications to be requested. The
-  /// default value is 0 which means the classification results are not
-  /// requested.
-  /// \return Error object indicating success or failure.
-  static Error Create(
-      InferRequestedOutput** infer_output, const BackendKind kind,
-      const std::string& name, const std::string& datatype,
-      const size_t class_count = 0);
-
-  /// Gets name of the associated output tensor.
-  /// \return The name of the tensor.
-  const std::string& Name() const { return name_; }
-
-  /// Gets datatype of the associated output tensor.
-  /// \return The datatype of the tensor
-  const std::string& Datatype() const { return datatype_; }
-
-  /// Set the output tensor data to be written to specified shared
-  /// memory region.
-  /// \param region_name The name of the shared memory region.
-  /// \param byte_size The size of data in bytes.
-  /// \param offset The offset in shared memory region. Default value is 0.
-  /// \return Error object indicating success or failure of the
-  /// request.
-  virtual Error SetSharedMemory(
-      const std::string& region_name, const size_t byte_size,
-      const size_t offset = 0);
-
- protected:
-  InferRequestedOutput(
-      const BackendKind kind, const std::string& name,
-      const std::string& datatype = "");
-  const BackendKind kind_;
-  const std::string name_;
-  const std::string datatype_;
-};
-
-//
-// Interface for accessing the processed results.
-//
-class InferResult {
- public:
-  virtual ~InferResult() = default;
-
-  /// Get the id of the request which generated this response.
-  /// \param id Returns the request id that generated the result.
-  /// \return Error object indicating success or failure.
-  virtual Error Id(std::string* id) const = 0;
-
-
-  /// Returns the status of the request.
-  /// \return Error object indicating the success or failure of the
-  /// request.
-  virtual Error RequestStatus() const = 0;
-
-  /// Returns the raw data of the output.
-  /// \return Error object indicating the success or failure of the
-  /// request.
-  virtual Error RawData(
-      const std::string& output_name, const uint8_t** buf,
-      size_t* byte_size) const = 0;
-
-  /// Get final response bool for this response.
-  /// \return Error object indicating the success or failure.
-  virtual Error IsFinalResponse(bool* is_final_response) const
-  {
-    return Error("InferResult::IsFinalResponse() not implemented");
-  };
-
-  /// Get null response bool for this response.
-  /// \return Error object indicating the success or failure.
-  virtual Error IsNullResponse(bool* is_null_response) const
-  {
-    return Error("InferResult::IsNullResponse() not implemented");
-  };
-};
-
-}}}  // namespace triton::perfanalyzer::clientbackend
-
-namespace cb = triton::perfanalyzer::clientbackend;
diff --git a/src/c++/perf_analyzer/client_backend/mock_client_backend.h b/src/c++/perf_analyzer/client_backend/mock_client_backend.h
deleted file mode 100644
index 483af914d..000000000
--- a/src/c++/perf_analyzer/client_backend/mock_client_backend.h
+++ /dev/null
@@ -1,660 +0,0 @@
-// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#pragma once
-
-#include <atomic>
-#include <chrono>
-#include <mutex>
-#include <thread>
-
-#include "../doctest.h"
-#include "client_backend.h"
-#include "gmock/gmock.h"
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-
-// Holds information (either the raw data or a shared memory label) for an
-// inference input
-//
-struct TestRecordedInput {
-  TestRecordedInput(int32_t data_in, size_t size_in)
-      : shared_memory_label(""), data(data_in), size(size_in)
-  {
-  }
-
-  TestRecordedInput(std::string label_in, size_t size_in)
-      : shared_memory_label(label_in), data(0), size(size_in)
-  {
-  }
-
-  std::string shared_memory_label;
-  int32_t data;
-  size_t size;
-};
-
-/// Mock class of an InferInput
-///
-class MockInferInput : public InferInput {
- public:
-  MockInferInput(
-      const BackendKind kind, const std::string& name,
-      const std::vector<int64_t>& dims, const std::string& datatype)
-      : InferInput(kind, name, datatype), dims_(dims)
-  {
-  }
-
-  const std::vector<int64_t>& Shape() const override { return dims_; }
-
-  Error Reset() override
-  {
-    recorded_inputs_.clear();
-    return Error::Success;
-  }
-
-  Error AppendRaw(const uint8_t* input, size_t input_byte_size) override
-  {
-    if (input) {
-      int32_t val = *reinterpret_cast<const int32_t*>(input);
-      recorded_inputs_.push_back(TestRecordedInput(val, input_byte_size));
-    }
-    ++append_raw_calls_;
-    return Error::Success;
-  }
-
-  Error SetSharedMemory(
-      const std::string& name, size_t byte_size, size_t offset = 0)
-  {
-    recorded_inputs_.push_back(TestRecordedInput(name, byte_size));
-    ++set_shared_memory_calls_;
-    return Error::Success;
-  }
-
-  const std::vector<int64_t> dims_{};
-  std::vector<TestRecordedInput> recorded_inputs_{};
-  std::atomic<size_t> append_raw_calls_{0};
-  std::atomic<size_t> set_shared_memory_calls_{0};
-};
-
-/// Mock class of an InferResult
-///
-class MockInferResult : public InferResult {
- public:
-  MockInferResult(const InferOptions& options) : req_id_(options.request_id_) {}
-
-  Error Id(std::string* id) const override
-  {
-    *id = req_id_;
-    return Error::Success;
-  }
-  Error RequestStatus() const override { return Error::Success; }
-  Error RawData(
-      const std::string& output_name, const uint8_t** buf,
-      size_t* byte_size) const override
-  {
-    return Error::Success;
-  }
-
-  Error IsFinalResponse(bool* is_final_response) const override
-  {
-    if (is_final_response == nullptr) {
-      return Error("is_final_response cannot be nullptr");
-    }
-    *is_final_response = true;
-    return Error::Success;
-  }
-
-  Error IsNullResponse(bool* is_null_response) const override
-  {
-    if (is_null_response == nullptr) {
-      return Error("is_null_response cannot be nullptr");
-    }
-    *is_null_response = false;
-    return Error::Success;
-  }
-
- private:
-  std::string req_id_;
-};
-
-/// Class to track statistics of MockClientBackend
-///
-class MockClientStats {
- public:
-  enum class ReqType { SYNC, ASYNC, ASYNC_STREAM };
-
-  struct SeqStatus {
-    // Set of all unique sequence IDs observed in requests
-    //
-    std::set<uint64_t> used_seq_ids;
-
-    // Map of all "live" sequence IDs (sequences that have started and not
-    // ended) to their current length (how many requests have been sent to that
-    // sequence ID since it started)
-    //
-    std::map<uint64_t, uint32_t> live_seq_ids_to_length;
-
-    // Map of sequence ID to how many requests have been received for it.
-    //
-    std::map<uint64_t, uint32_t> seq_ids_to_count;
-
-    // Map of sequence IDs to how many are "inflight" for that sequence ID
-    // (inflight means the request has been received, response has not been
-    // returned)
-    //
-    std::map<uint64_t, uint32_t> seq_ids_to_inflight_count;
-
-    // Maximum observed number of live sequences (sequences that have started
-    // and not ended)
-    //
-    uint32_t max_live_seq_count = 0;
-
-    // Maximum observed number of inflight requests for a sequence
-    //
-    uint32_t max_inflight_seq_count = 0;
-
-    std::vector<uint64_t> seq_lengths;
-
-    bool IsSeqLive(uint64_t seq_id)
-    {
-      return (
-          live_seq_ids_to_length.find(seq_id) != live_seq_ids_to_length.end());
-    }
-    void HandleSeqStart(uint64_t seq_id)
-    {
-      used_seq_ids.insert(seq_id);
-      live_seq_ids_to_length[seq_id] = 0;
-      if (live_seq_ids_to_length.size() > max_live_seq_count) {
-        max_live_seq_count = live_seq_ids_to_length.size();
-      }
-    }
-    void HandleSeqEnd(uint64_t seq_id)
-    {
-      uint32_t len = live_seq_ids_to_length[seq_id];
-      seq_lengths.push_back(len);
-      auto it = live_seq_ids_to_length.find(seq_id);
-      live_seq_ids_to_length.erase(it);
-    }
-
-    void HandleSeqRequest(uint64_t seq_id)
-    {
-      live_seq_ids_to_length[seq_id]++;
-
-      if (seq_ids_to_count.find(seq_id) == seq_ids_to_count.end()) {
-        seq_ids_to_count[seq_id] = 1;
-      } else {
-        seq_ids_to_count[seq_id]++;
-      }
-
-      if (seq_ids_to_inflight_count.find(seq_id) ==
-          seq_ids_to_inflight_count.end()) {
-        seq_ids_to_inflight_count[seq_id] = 1;
-      } else {
-        seq_ids_to_inflight_count[seq_id]++;
-      }
-      if (seq_ids_to_inflight_count[seq_id] > max_inflight_seq_count) {
-        max_inflight_seq_count = seq_ids_to_inflight_count[seq_id];
-      }
-    }
-
-    void Reset()
-    {
-      // Note that live_seq_ids_to_length is explicitly not reset here.
-      // This is because we always want to maintain the true status of
-      // live sequences
-
-      used_seq_ids.clear();
-      max_live_seq_count = 0;
-      seq_lengths.clear();
-      seq_ids_to_count.clear();
-    }
-  };
-
-  std::atomic<size_t> num_infer_calls{0};
-  std::atomic<size_t> num_async_infer_calls{0};
-  std::atomic<size_t> num_async_stream_infer_calls{0};
-  std::atomic<size_t> num_start_stream_calls{0};
-
-  std::atomic<size_t> num_active_infer_calls{0};
-
-  std::atomic<size_t> num_append_raw_calls{0};
-  std::atomic<size_t> num_set_shared_memory_calls{0};
-  // Struct tracking shared memory method calls
-  //
-  struct SharedMemoryStats {
-    std::atomic<size_t> num_unregister_all_shared_memory_calls{0};
-    std::atomic<size_t> num_register_system_shared_memory_calls{0};
-    std::atomic<size_t> num_register_cuda_shared_memory_calls{0};
-    std::atomic<size_t> num_register_cuda_memory_calls{0};
-    std::atomic<size_t> num_register_system_memory_calls{0};
-    std::atomic<size_t> num_create_shared_memory_region_calls{0};
-    std::atomic<size_t> num_map_shared_memory_calls{0};
-    std::atomic<size_t> num_close_shared_memory_calls{0};
-    std::atomic<size_t> num_unlink_shared_memory_region_calls{0};
-    std::atomic<size_t> num_unmap_shared_memory_calls{0};
-
-    // bool operator==(const SharedMemoryStats& lhs, const SharedMemoryStats&
-    // rhs)
-    bool operator==(const SharedMemoryStats& rhs) const
-    {
-      if (this->num_unregister_all_shared_memory_calls ==
-              rhs.num_unregister_all_shared_memory_calls &&
-          this->num_register_system_shared_memory_calls ==
-              rhs.num_register_system_shared_memory_calls &&
-          this->num_register_cuda_shared_memory_calls ==
-              rhs.num_register_cuda_shared_memory_calls &&
-          this->num_register_cuda_memory_calls ==
-              rhs.num_register_cuda_memory_calls &&
-          this->num_register_system_memory_calls ==
-              rhs.num_register_system_memory_calls &&
-          this->num_create_shared_memory_region_calls ==
-              rhs.num_create_shared_memory_region_calls &&
-          this->num_map_shared_memory_calls ==
-              rhs.num_map_shared_memory_calls &&
-          this->num_close_shared_memory_calls ==
-              rhs.num_close_shared_memory_calls &&
-          this->num_unlink_shared_memory_region_calls ==
-              rhs.num_unlink_shared_memory_region_calls &&
-          this->num_unmap_shared_memory_calls ==
-              rhs.num_unmap_shared_memory_calls) {
-        return true;
-      }
-      return false;
-    }
-  };
-
-  /// Determines how long the backend will delay before sending a "response".
-  /// If a single value vector is passed in, all responses will take that long.
-  /// If a list of values is passed in, then the mock backend will loop through
-  /// the values (and loop back to the start when it hits the end of the vector)
-  ///
-  void SetDelays(std::vector<size_t> times)
-  {
-    response_delays_.clear();
-    for (size_t t : times) {
-      response_delays_.push_back(std::chrono::milliseconds{t});
-    }
-  }
-
-  /// Determines the return status of requests.
-  /// If a single value vector is passed in, all responses will return that
-  /// status. If a list of values is passed in, then the mock backend will loop
-  /// through the values (and loop back to the start when it hits the end of the
-  /// vector)
-  ///
-  void SetReturnStatuses(std::vector<bool> statuses)
-  {
-    response_statuses_.clear();
-    for (bool success : statuses) {
-      if (success) {
-        response_statuses_.push_back(Error::Success);
-      } else {
-        response_statuses_.push_back(Error("Injected test error"));
-      }
-    }
-  }
-
-  std::chrono::milliseconds GetNextDelay()
-  {
-    std::lock_guard<std::mutex> lock(mtx_);
-
-    auto val = response_delays_[response_delays_index_];
-    response_delays_index_++;
-    if (response_delays_index_ == response_delays_.size()) {
-      response_delays_index_ = 0;
-    }
-    return val;
-  }
-
-  Error GetNextReturnStatus()
-  {
-    std::lock_guard<std::mutex> lock(mtx_);
-
-    auto val = response_statuses_[response_statuses_index_];
-    response_statuses_index_++;
-    if (response_statuses_index_ == response_statuses_.size()) {
-      response_statuses_index_ = 0;
-    }
-    return val;
-  }
-
-  bool start_stream_enable_stats_value{false};
-
-  std::vector<std::chrono::time_point<std::chrono::system_clock>>
-      request_timestamps;
-  SeqStatus sequence_status;
-  SharedMemoryStats memory_stats;
-
-  // Each entry in the top vector is a list of all inputs for an inference
-  // request. If there are multiple inputs due to batching and/or the model
-  // having multiple inputs, all of those from the same request will be in the
-  // same second level vector
-  std::vector<std::vector<TestRecordedInput>> recorded_inputs{};
-
-  void CaptureRequest(
-      ReqType type, const InferOptions& options,
-      const std::vector<InferInput*>& inputs,
-      const std::vector<const InferRequestedOutput*>& outputs)
-  {
-    num_active_infer_calls++;
-
-    std::lock_guard<std::mutex> lock(mtx_);
-    auto time = std::chrono::system_clock::now();
-    request_timestamps.push_back(time);
-
-    // Group all values across all inputs together into a single vector, and
-    // then record it
-    std::vector<TestRecordedInput> request_inputs;
-    for (const auto& input : inputs) {
-      auto recorded_inputs =
-          static_cast<const MockInferInput*>(input)->recorded_inputs_;
-      request_inputs.insert(
-          request_inputs.end(), recorded_inputs.begin(), recorded_inputs.end());
-    }
-    recorded_inputs.push_back(request_inputs);
-
-    UpdateCallCount(type);
-    UpdateSeqStatus(options);
-    AccumulateInferInputCalls(inputs);
-  }
-
-  void CaptureRequestEnd(const InferOptions& options)
-  {
-    num_active_infer_calls--;
-
-    if (options.sequence_id_ != 0) {
-      sequence_status.seq_ids_to_inflight_count[options.sequence_id_]--;
-    }
-  }
-
-  void CaptureStreamStart()
-  {
-    std::lock_guard<std::mutex> lock(mtx_);
-    num_start_stream_calls++;
-  }
-
-
-  void Reset()
-  {
-    std::lock_guard<std::mutex> lock(mtx_);
-    num_infer_calls = 0;
-    num_async_infer_calls = 0;
-    num_async_stream_infer_calls = 0;
-    num_start_stream_calls = 0;
-    request_timestamps.clear();
-    sequence_status.Reset();
-  }
-
- private:
-  std::vector<std::chrono::milliseconds> response_delays_{
-      std::chrono::milliseconds{0}};
-  std::vector<Error> response_statuses_{Error::Success};
-  std::atomic<size_t> response_delays_index_{0};
-  std::atomic<size_t> response_statuses_index_{0};
-
-  std::mutex mtx_;
-
-  void UpdateCallCount(ReqType type)
-  {
-    if (type == ReqType::SYNC) {
-      num_infer_calls++;
-    } else if (type == ReqType::ASYNC) {
-      num_async_infer_calls++;
-    } else {
-      num_async_stream_infer_calls++;
-    }
-  }
-
-  void UpdateSeqStatus(const InferOptions& options)
-  {
-    // Seq ID of 0 is reserved for "not a sequence"
-    //
-    if (options.sequence_id_ != 0) {
-      // If a sequence ID is not live, it must be starting
-      if (!sequence_status.IsSeqLive(options.sequence_id_)) {
-        REQUIRE(options.sequence_start_ == true);
-      }
-
-      // If a new sequence is starting, that sequence ID must not already be
-      // live
-      if (options.sequence_start_ == true) {
-        REQUIRE(sequence_status.IsSeqLive(options.sequence_id_) == false);
-        sequence_status.HandleSeqStart(options.sequence_id_);
-      }
-
-      sequence_status.HandleSeqRequest(options.sequence_id_);
-
-      // If a sequence is ending, it must be live
-      if (options.sequence_end_) {
-        REQUIRE(sequence_status.IsSeqLive(options.sequence_id_) == true);
-        sequence_status.HandleSeqEnd(options.sequence_id_);
-      }
-    }
-  }
-
-  void AccumulateInferInputCalls(const std::vector<InferInput*>& inputs)
-  {
-    for (const auto& input : inputs) {
-      const MockInferInput* mock_input =
-          static_cast<const MockInferInput*>(input);
-      num_append_raw_calls += mock_input->append_raw_calls_;
-      num_set_shared_memory_calls += mock_input->set_shared_memory_calls_;
-    }
-  }
-};
-
-/// Mock implementation of ClientBackend interface
-///
-class NaggyMockClientBackend : public ClientBackend {
- public:
-  NaggyMockClientBackend(std::shared_ptr<MockClientStats> stats) : stats_(stats)
-  {
-    ON_CALL(*this, AsyncStreamInfer(testing::_, testing::_, testing::_))
-        .WillByDefault(
-            [this](
-                const InferOptions& options,
-                const std::vector<InferInput*>& inputs,
-                const std::vector<const InferRequestedOutput*>& outputs)
-                -> Error {
-              stats_->CaptureRequest(
-                  MockClientStats::ReqType::ASYNC_STREAM, options, inputs,
-                  outputs);
-
-              LaunchAsyncMockRequest(options, stream_callback_);
-
-              return stats_->GetNextReturnStatus();
-            });
-  }
-
-  MOCK_METHOD(
-      Error, ModelConfig,
-      (rapidjson::Document*, const std::string&, const std::string&),
-      (override));
-  MOCK_METHOD(
-      Error, AsyncStreamInfer,
-      (const InferOptions&, const std::vector<InferInput*>&,
-       const std::vector<const InferRequestedOutput*>&),
-      (override));
-
-  Error Infer(
-      InferResult** result, const InferOptions& options,
-      const std::vector<InferInput*>& inputs,
-      const std::vector<const InferRequestedOutput*>& outputs) override
-  {
-    stats_->CaptureRequest(
-        MockClientStats::ReqType::SYNC, options, inputs, outputs);
-
-    std::this_thread::sleep_for(stats_->GetNextDelay());
-
-    local_completed_req_count_++;
-    stats_->CaptureRequestEnd(options);
-
-    return stats_->GetNextReturnStatus();
-  }
-
-  Error AsyncInfer(
-      OnCompleteFn callback, const InferOptions& options,
-      const std::vector<InferInput*>& inputs,
-      const std::vector<const InferRequestedOutput*>& outputs) override
-  {
-    stats_->CaptureRequest(
-        MockClientStats::ReqType::ASYNC, options, inputs, outputs);
-
-    LaunchAsyncMockRequest(options, callback);
-
-    return stats_->GetNextReturnStatus();
-  }
-
-  Error StartStream(OnCompleteFn callback, bool enable_stats)
-  {
-    stats_->CaptureStreamStart();
-    stats_->start_stream_enable_stats_value = enable_stats;
-    stream_callback_ = callback;
-    return stats_->GetNextReturnStatus();
-  }
-
-  Error ClientInferStat(InferStat* infer_stat) override
-  {
-    infer_stat->completed_request_count = local_completed_req_count_;
-    return Error::Success;
-  }
-
-  Error UnregisterAllSharedMemory() override
-  {
-    stats_->memory_stats.num_unregister_all_shared_memory_calls++;
-    return Error::Success;
-  }
-
-  Error RegisterSystemSharedMemory(
-      const std::string& name, const std::string& key,
-      const size_t byte_size) override
-  {
-    stats_->memory_stats.num_register_system_shared_memory_calls++;
-    return Error::Success;
-  }
-
-  Error RegisterCudaSharedMemory(
-      const std::string& name, const cudaIpcMemHandle_t& handle,
-      const size_t byte_size) override
-  {
-    stats_->memory_stats.num_register_cuda_shared_memory_calls++;
-    return Error::Success;
-  }
-
-  Error RegisterCudaMemory(
-      const std::string& name, void* handle, const size_t byte_size) override
-  {
-    stats_->memory_stats.num_register_cuda_memory_calls++;
-    return Error::Success;
-  }
-
-  Error RegisterSystemMemory(
-      const std::string& name, void* memory_ptr,
-      const size_t byte_size) override
-  {
-    stats_->memory_stats.num_register_system_memory_calls++;
-    return Error::Success;
-  }
-
-  Error CreateSharedMemoryRegion(
-      std::string shm_key, size_t byte_size, int* shm_fd) override
-  {
-    stats_->memory_stats.num_create_shared_memory_region_calls++;
-    return Error::Success;
-  }
-
-  Error MapSharedMemory(
-      int shm_fd, size_t offset, size_t byte_size, void** shm_addr) override
-  {
-    stats_->memory_stats.num_map_shared_memory_calls++;
-    return Error::Success;
-  }
-
-  Error CloseSharedMemory(int shm_fd) override
-  {
-    stats_->memory_stats.num_close_shared_memory_calls++;
-    return Error::Success;
-  }
-
-  Error UnlinkSharedMemoryRegion(std::string shm_key) override
-  {
-    stats_->memory_stats.num_unlink_shared_memory_region_calls++;
-    return Error::Success;
-  }
-
-  Error UnmapSharedMemory(void* shm_addr, size_t byte_size) override
-  {
-    stats_->memory_stats.num_unmap_shared_memory_calls++;
-    return Error::Success;
-  }
-
-  OnCompleteFn stream_callback_;
-
- private:
-  void LaunchAsyncMockRequest(const InferOptions options, OnCompleteFn callback)
-  {
-    std::thread([this, options, callback]() {
-      std::this_thread::sleep_for(stats_->GetNextDelay());
-      local_completed_req_count_++;
-
-      InferResult* result = new MockInferResult(options);
-      callback(result);
-
-      stats_->CaptureRequestEnd(options);
-    }).detach();
-  }
-
-  // Total count of how many requests this client has handled and finished
-  size_t local_completed_req_count_ = 0;
-
-  std::shared_ptr<MockClientStats> stats_;
-};
-
-using MockClientBackend = testing::NiceMock<NaggyMockClientBackend>;
-
-/// Mock factory that always creates a MockClientBackend instead
-/// of a real backend
-///
-class MockClientBackendFactory : public ClientBackendFactory {
- public:
-  MockClientBackendFactory(std::shared_ptr<MockClientStats> stats)
-  {
-    stats_ = stats;
-  }
-
-  Error CreateClientBackend(std::unique_ptr<ClientBackend>* backend) override
-  {
-    std::unique_ptr<MockClientBackend> mock_backend(
-        new MockClientBackend(stats_));
-    *backend = std::move(mock_backend);
-    return Error::Success;
-  }
-
- private:
-  std::shared_ptr<MockClientStats> stats_;
-};
-
-}}}  // namespace triton::perfanalyzer::clientbackend
diff --git a/src/c++/perf_analyzer/client_backend/openai/CMakeLists.txt b/src/c++/perf_analyzer/client_backend/openai/CMakeLists.txt
deleted file mode 100644
index 93963e378..000000000
--- a/src/c++/perf_analyzer/client_backend/openai/CMakeLists.txt
+++ /dev/null
@@ -1,60 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-cmake_minimum_required (VERSION 3.18)
-
-set(
-    OPENAI_CLIENT_BACKEND_SRCS
-    http_client.cc
-    openai_client_backend.cc
-    openai_client.cc
-    openai_infer_input.cc
-)
-
-set(
-    OPENAI_CLIENT_BACKEND_HDRS
-    http_client.h
-    openai_client_backend.h
-    openai_client.h
-    openai_infer_input.h
-)
-
-add_library(
-    openai-client-backend-library  EXCLUDE_FROM_ALL OBJECT
-    ${OPENAI_CLIENT_BACKEND_SRCS}
-    ${OPENAI_CLIENT_BACKEND_HDRS}
-)
-
-target_link_libraries(
-  openai-client-backend-library
-  PUBLIC CURL::libcurl
-  PUBLIC httpclient_static
-)
-
-if(${TRITON_ENABLE_GPU})
-    target_include_directories(openai-client-backend-library PUBLIC ${CUDA_INCLUDE_DIRS})
-    target_link_libraries(openai-client-backend-library PRIVATE ${CUDA_LIBRARIES})
-endif() # TRITON_ENABLE_GPU
diff --git a/src/c++/perf_analyzer/client_backend/openai/http_client.cc b/src/c++/perf_analyzer/client_backend/openai/http_client.cc
deleted file mode 100644
index 17fb42e08..000000000
--- a/src/c++/perf_analyzer/client_backend/openai/http_client.cc
+++ /dev/null
@@ -1,301 +0,0 @@
-// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "http_client.h"
-
-#include <functional>
-#include <iostream>
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace openai {
-
-HttpRequest::HttpRequest(
-    std::function<void(HttpRequest*)>&& completion_callback, const bool verbose)
-    : completion_callback_(std::move(completion_callback)), verbose_(verbose)
-{
-}
-
-HttpRequest::~HttpRequest()
-{
-  if (header_list_ != nullptr) {
-    curl_slist_free_all(header_list_);
-    header_list_ = nullptr;
-  }
-}
-
-void
-HttpRequest::AddInput(uint8_t* buf, size_t byte_size)
-{
-  data_buffers_.push_back(std::pair<uint8_t*, size_t>(buf, byte_size));
-  total_input_byte_size_ += byte_size;
-}
-
-void
-HttpRequest::GetNextInput(uint8_t* buf, size_t size, size_t* input_bytes)
-{
-  *input_bytes = 0;
-
-  while (!data_buffers_.empty() && size > 0) {
-    const size_t csz = std::min(data_buffers_.front().second, size);
-    if (csz > 0) {
-      const uint8_t* input_ptr = data_buffers_.front().first;
-      std::copy(input_ptr, input_ptr + csz, buf);
-      size -= csz;
-      buf += csz;
-      *input_bytes += csz;
-
-      data_buffers_.front().first += csz;
-      data_buffers_.front().second -= csz;
-    }
-    if (data_buffers_.front().second == 0) {
-      data_buffers_.pop_front();
-    }
-  }
-}
-
-std::mutex HttpClient::curl_init_mtx_{};
-HttpClient::HttpClient(
-    const std::string& server_url, bool verbose,
-    const HttpSslOptions& ssl_options)
-    : url_(server_url), verbose_(verbose), ssl_options_(ssl_options)
-{
-  // [TODO TMA-1670] uncomment below and remove class-wise mutex once confirm
-  // curl >= 7.84.0 will always be used
-  // auto* ver = curl_version_info(CURLVERSION_NOW);
-  // if (ver->features & CURL_VERSION_THREADSAFE == 0) {
-  //   throw std::runtime_error(
-  //       "HTTP client has dependency on CURL library to have thread-safe "
-  //       "support (CURL_VERSION_THREADSAFE set)");
-  // }
-  {
-    std::lock_guard<std::mutex> lk(curl_init_mtx_);
-    if (curl_global_init(CURL_GLOBAL_ALL) != 0) {
-      throw std::runtime_error("CURL global initialization failed");
-    }
-  }
-
-  multi_handle_ = curl_multi_init();
-
-  worker_ = std::thread(&HttpClient::AsyncTransfer, this);
-}
-
-HttpClient::~HttpClient()
-{
-  {
-    std::lock_guard<std::mutex> lock(mutex_);
-    exiting_ = true;
-  }
-
-  curl_multi_wakeup(multi_handle_);
-
-  // thread not joinable if AsyncInfer() is not called
-  // (it is default constructed thread before the first AsyncInfer() call)
-  if (worker_.joinable()) {
-    worker_.join();
-  }
-
-  curl_multi_cleanup(multi_handle_);
-
-  {
-    std::lock_guard<std::mutex> lk(curl_init_mtx_);
-    curl_global_cleanup();
-  }
-}
-
-const std::string&
-HttpClient::ParseSslCertType(HttpSslOptions::CERTTYPE cert_type)
-{
-  static std::string pem_str{"PEM"};
-  static std::string der_str{"DER"};
-  switch (cert_type) {
-    case HttpSslOptions::CERTTYPE::CERT_PEM:
-      return pem_str;
-    case HttpSslOptions::CERTTYPE::CERT_DER:
-      return der_str;
-  }
-  throw std::runtime_error(
-      "Unexpected SSL certificate type encountered. Only PEM and DER are "
-      "supported.");
-}
-
-const std::string&
-HttpClient::ParseSslKeyType(HttpSslOptions::KEYTYPE key_type)
-{
-  static std::string pem_str{"PEM"};
-  static std::string der_str{"DER"};
-  switch (key_type) {
-    case HttpSslOptions::KEYTYPE::KEY_PEM:
-      return pem_str;
-    case HttpSslOptions::KEYTYPE::KEY_DER:
-      return der_str;
-  }
-  throw std::runtime_error(
-      "unsupported SSL key type encountered. Only PEM and DER are "
-      "supported.");
-}
-
-void
-HttpClient::SetSSLCurlOptions(CURL* curl_handle)
-{
-  curl_easy_setopt(
-      curl_handle, CURLOPT_SSL_VERIFYPEER, ssl_options_.verify_peer);
-  curl_easy_setopt(
-      curl_handle, CURLOPT_SSL_VERIFYHOST, ssl_options_.verify_host);
-  if (!ssl_options_.ca_info.empty()) {
-    curl_easy_setopt(curl_handle, CURLOPT_CAINFO, ssl_options_.ca_info.c_str());
-  }
-  const auto& curl_cert_type = ParseSslCertType(ssl_options_.cert_type);
-  curl_easy_setopt(curl_handle, CURLOPT_SSLCERTTYPE, curl_cert_type.c_str());
-  if (!ssl_options_.cert.empty()) {
-    curl_easy_setopt(curl_handle, CURLOPT_SSLCERT, ssl_options_.cert.c_str());
-  }
-  const auto& curl_key_type = ParseSslKeyType(ssl_options_.key_type);
-  curl_easy_setopt(curl_handle, CURLOPT_SSLKEYTYPE, curl_key_type.c_str());
-  if (!ssl_options_.key.empty()) {
-    curl_easy_setopt(curl_handle, CURLOPT_SSLKEY, ssl_options_.key.c_str());
-  }
-}
-
-void
-HttpClient::Send(CURL* handle, std::unique_ptr<HttpRequest>&& request)
-{
-  {
-    std::lock_guard<std::mutex> lock(mutex_);
-
-    if (exiting_) {
-      return;
-    }
-
-    auto insert_result = new_async_requests_.emplace(std::make_pair(
-        reinterpret_cast<uintptr_t>(handle), std::move(request)));
-    if (!insert_result.second) {
-      curl_easy_cleanup(handle);
-      throw std::runtime_error(
-          "Failed to insert new asynchronous request context.");
-    }
-  }
-  curl_multi_wakeup(multi_handle_);
-}
-
-void
-HttpClient::AsyncTransfer()
-{
-  int messages_in_queue = 0;
-  int still_running = 0;
-  int numfds = 0;
-  CURLMsg* msg = nullptr;
-  AsyncReqMap ongoing_async_requests;
-
-  do {
-    {
-      // Check for new requests and add them to ongoing requests
-
-      std::lock_guard<std::mutex> lock(mutex_);
-
-      for (auto& pair : new_async_requests_) {
-        curl_multi_add_handle(
-            multi_handle_, reinterpret_cast<CURL*>(pair.first));
-
-        ongoing_async_requests[pair.first] = std::move(pair.second);
-      }
-      new_async_requests_.clear();
-    }
-
-    CURLMcode mc = curl_multi_perform(multi_handle_, &still_running);
-
-    if (mc != CURLM_OK) {
-      std::cerr << "Unexpected error: curl_multi failed. Code:" << mc
-                << std::endl;
-      continue;
-    }
-
-    while ((msg = curl_multi_info_read(multi_handle_, &messages_in_queue))) {
-      if (msg->msg != CURLMSG_DONE) {
-        // Something wrong happened.
-        std::cerr << "Unexpected error: received CURLMsg=" << msg->msg
-                  << std::endl;
-        continue;
-      }
-
-      uintptr_t identifier = reinterpret_cast<uintptr_t>(msg->easy_handle);
-      auto itr = ongoing_async_requests.find(identifier);
-      // This shouldn't happen
-      if (itr == ongoing_async_requests.end()) {
-        std::cerr << "Unexpected error: received completed request that is not "
-                     "in the list of asynchronous requests"
-                  << std::endl;
-        curl_multi_remove_handle(multi_handle_, msg->easy_handle);
-        curl_easy_cleanup(msg->easy_handle);
-        continue;
-      }
-
-      uint32_t http_code = 400;
-      if (msg->data.result == CURLE_OK) {
-        curl_easy_getinfo(msg->easy_handle, CURLINFO_RESPONSE_CODE, &http_code);
-      } else if (msg->data.result == CURLE_OPERATION_TIMEDOUT) {
-        http_code = 499;
-      }
-
-      itr->second->http_code_ = http_code;
-      itr->second->completion_callback_(itr->second.get());
-      ongoing_async_requests.erase(itr);
-      curl_multi_remove_handle(multi_handle_, msg->easy_handle);
-      curl_easy_cleanup(msg->easy_handle);
-    }
-
-
-    // Wait for activity on existing requests or
-    // explicit curl_multi_wakeup call
-    //
-    // If there are no descriptors in the multi_handle_
-    // then curl_multi_poll will wait until curl_multi_wakeup
-    // is called
-    //
-    // curl_multi_wakeup is called when adding a new request
-    // or exiting
-
-    mc = curl_multi_poll(multi_handle_, NULL, 0, INT_MAX, &numfds);
-
-    if (mc != CURLM_OK) {
-      std::cerr << "Unexpected error: curl_multi failed. Code:" << mc
-                << std::endl;
-    }
-
-  } while (!exiting_);
-
-  for (auto& request : ongoing_async_requests) {
-    CURL* easy_handle = reinterpret_cast<CURL*>(request.first);
-    curl_multi_remove_handle(multi_handle_, easy_handle);
-    curl_easy_cleanup(easy_handle);
-  }
-
-  for (auto& request : new_async_requests_) {
-    CURL* easy_handle = reinterpret_cast<CURL*>(request.first);
-    curl_easy_cleanup(easy_handle);
-  }
-}
-
-}}}}  // namespace triton::perfanalyzer::clientbackend::openai
diff --git a/src/c++/perf_analyzer/client_backend/openai/http_client.h b/src/c++/perf_analyzer/client_backend/openai/http_client.h
deleted file mode 100644
index 7ff9bb14e..000000000
--- a/src/c++/perf_analyzer/client_backend/openai/http_client.h
+++ /dev/null
@@ -1,172 +0,0 @@
-// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <curl/curl.h>
-
-#include <condition_variable>
-#include <deque>
-#include <functional>
-#include <map>
-#include <memory>
-#include <mutex>
-#include <thread>
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace openai {
-
-// The options for authorizing and authenticating SSL/TLS connections.
-struct HttpSslOptions {
-  enum CERTTYPE { CERT_PEM = 0, CERT_DER = 1 };
-  enum KEYTYPE {
-    KEY_PEM = 0,
-    KEY_DER = 1
-    // TODO TMA-1645: Support loading private key from crypto engine
-    // KEY_ENG = 2
-  };
-  explicit HttpSslOptions()
-      : verify_peer(1), verify_host(2), cert_type(CERTTYPE::CERT_PEM),
-        key_type(KEYTYPE::KEY_PEM)
-  {
-  }
-  // This option determines whether curl verifies the authenticity of the peer's
-  // certificate. A value of 1 means curl verifies; 0 (zero) means it does not.
-  // Default value is 1. See here for more details:
-  // https://curl.se/libcurl/c/CURLOPT_SSL_VERIFYPEER.html
-  long verify_peer;
-  // This option determines whether libcurl verifies that the server cert is for
-  // the server it is known as. The default value for this option is 2 which
-  // means that certificate must indicate that the server is the server to which
-  // you meant to connect, or the connection fails. See here for more details:
-  // https://curl.se/libcurl/c/CURLOPT_SSL_VERIFYHOST.html
-  long verify_host;
-  // File holding one or more certificates to verify the peer with. If not
-  // specified, client will look for the system path where cacert bundle is
-  // assumed to be stored, as established at build time. See here for more
-  // information: https://curl.se/libcurl/c/CURLOPT_CAINFO.html
-  std::string ca_info;
-  // The format of client certificate. By default it is CERT_PEM. See here for
-  // more details: https://curl.se/libcurl/c/CURLOPT_SSLCERTTYPE.html
-  CERTTYPE cert_type;
-  // The file name of your client certificate. See here for more details:
-  // https://curl.se/libcurl/c/CURLOPT_SSLCERT.html
-  std::string cert;
-  // The format of the private key. By default it is KEY_PEM. See here for more
-  // details: https://curl.se/libcurl/c/CURLOPT_SSLKEYTYPE.html.
-  KEYTYPE key_type;
-  // The private key. See here for more details:
-  // https://curl.se/libcurl/c/CURLOPT_SSLKEY.html.
-  std::string key;
-};
-
-// HttpRequest object representing the context of a HTTP transaction. Currently
-// it is also designed to be the placeholder for response data, but how the
-// response is stored can be revisited later.
-// 'completion_callback' doesn't transfer ownership of HttpRequest, caller must
-// not keep the reference and access HttpRequest object after
-// 'completion_callback' returns
-class HttpRequest {
- public:
-  HttpRequest(
-      std::function<void(HttpRequest*)>&& completion_callback,
-      const bool verbose = false);
-  virtual ~HttpRequest();
-
-  // Adds the input data to be delivered to the server, note that the HTTP
-  // request does not own the buffer.
-  void AddInput(uint8_t* buf, size_t byte_size);
-
-  // Helper function for CURL
-  // Copy into 'buf' up to 'size' bytes of input data. Return the
-  // actual amount copied in 'input_bytes'.
-  void GetNextInput(uint8_t* buf, size_t size, size_t* input_bytes);
-
-  // Buffer that accumulates the response body.
-  std::string response_buffer_;
-
-  size_t total_input_byte_size_{0};
-
-  // HTTP response code for the inference request
-  uint32_t http_code_{200};
-
-  std::function<void(HttpRequest*)> completion_callback_{nullptr};
-
-  // Pointer to the list of the HTTP request header, keep it such that it will
-  // be valid during the transfer and can be freed once transfer is completed.
-  struct curl_slist* header_list_{nullptr};
-
- protected:
-  const bool verbose_{false};
-
-  // Pointers to the input data.
-  std::deque<std::pair<uint8_t*, size_t>> data_buffers_;
-};
-
-// Base class for common HTTP functionalities
-class HttpClient {
- public:
-  enum class CompressionType { NONE, DEFLATE, GZIP };
-
-  virtual ~HttpClient();
-
- protected:
-  void SetSSLCurlOptions(CURL* curl_handle);
-
-  HttpClient(
-      const std::string& server_url, bool verbose = false,
-      const HttpSslOptions& ssl_options = HttpSslOptions());
-
-  // Note that this function does not block
-  void Send(CURL* handle, std::unique_ptr<HttpRequest>&& request);
-
- protected:
-  void AsyncTransfer();
-
-  bool exiting_{false};
-
-  std::thread worker_;
-  std::mutex mutex_;
-
-  // The server url
-  const std::string url_;
-  // The options for authorizing and authenticating SSL/TLS connections
-  HttpSslOptions ssl_options_;
-
-  using AsyncReqMap = std::map<uintptr_t, std::unique_ptr<HttpRequest>>;
-  // curl multi handle for processing asynchronous requests
-  void* multi_handle_;
-  // map to record new asynchronous requests with pointer to easy handle
-  // or tag id as key
-  AsyncReqMap new_async_requests_;
-
-  bool verbose_;
-
- private:
-  const std::string& ParseSslKeyType(HttpSslOptions::KEYTYPE key_type);
-  const std::string& ParseSslCertType(HttpSslOptions::CERTTYPE cert_type);
-  static std::mutex curl_init_mtx_;
-};
-}}}}  // namespace triton::perfanalyzer::clientbackend::openai
diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client.cc b/src/c++/perf_analyzer/client_backend/openai/openai_client.cc
deleted file mode 100644
index 9b167fae1..000000000
--- a/src/c++/perf_analyzer/client_backend/openai/openai_client.cc
+++ /dev/null
@@ -1,319 +0,0 @@
-// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Include this first to make sure we are a friend of common classes.
-#define TRITON_INFERENCE_SERVER_CLIENT_CLASS InferenceServerHttpClient
-#include "openai_client.h"
-
-#include <curl/curl.h>
-
-#include <algorithm>
-#include <atomic>
-#include <cctype>
-#include <climits>
-#include <cstdint>
-#include <deque>
-#include <iostream>
-#include <string>
-#include <utility>
-
-#include "common.h"
-
-#ifdef TRITON_ENABLE_ZLIB
-#include <zlib.h>
-#endif
-
-extern "C" {
-#include "cencode.h"
-}
-
-#ifdef _WIN32
-#define strncasecmp(x, y, z) _strnicmp(x, y, z)
-#undef min  // NOMINMAX did not resolve std::min compile error
-#endif      //_WIN32
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace openai {
-
-//==============================================================================
-
-void
-ChatCompletionRequest::SendResponse(bool is_final, bool is_null)
-{
-  final_response_sent_ = is_final;
-  response_callback_(new ChatCompletionResult(
-      http_code_, std::move(response_buffer_), is_final, is_null, request_id_));
-}
-
-ChatCompletionClient::ChatCompletionClient(
-    const std::string& url, const std::string& endpoint, bool verbose,
-    const HttpSslOptions& ssl_options)
-    : HttpClient(std::string(url + "/" + endpoint), verbose, ssl_options)
-{
-}
-
-size_t
-ChatCompletionClient::RequestProvider(
-    void* contents, size_t size, size_t nmemb, void* userp)
-{
-  auto request = reinterpret_cast<ChatCompletionRequest*>(userp);
-
-  size_t input_bytes = 0;
-  request->GetNextInput(
-      reinterpret_cast<uint8_t*>(contents), size * nmemb, &input_bytes);
-
-  request->timer_.CaptureTimestamp(
-      triton::client::RequestTimers::Kind::SEND_END);
-
-  return input_bytes;
-}
-
-size_t
-ChatCompletionClient::ResponseHeaderHandler(
-    void* contents, size_t size, size_t nmemb, void* userp)
-{
-  auto request = reinterpret_cast<ChatCompletionRequest*>(userp);
-
-  char* buf = reinterpret_cast<char*>(contents);
-  size_t byte_size = size * nmemb;
-
-  std::string hdr(buf, byte_size);
-  std::transform(hdr.begin(), hdr.end(), hdr.begin(), [](unsigned char c) {
-    return std::tolower(c);
-  });
-  if (hdr.find("content-type") != std::string::npos &&
-      hdr.find("text/event-stream") != std::string::npos) {
-    request->is_stream_ = true;
-  }
-  return byte_size;
-}
-
-size_t
-ChatCompletionClient::ResponseHandler(
-    void* contents, size_t size, size_t nmemb, void* userp)
-{
-  // [TODO TMA-1666] verify if the SSE responses received are complete, or the
-  // response need to be stitched first. To verify, print out the received
-  // responses from SendResponse() to make sure the OpenAI server doesn't chunk
-  // the HTTP responses in the way that misaligns with the SSE responses. Reason
-  // of not stitching responses now is that it is a bit complicated that to make
-  // the write callback bulletproof is to assume the response can be chunked at
-  // arbitrary position, then bake in checking for SSE style (data:.*\n\n) by
-  // iterating all received buffer character by character.
-  size_t result_bytes = size * nmemb;
-  // return early if the response is empty as the response handling is
-  // triggered by the content of the response.
-  if (result_bytes == 0) {
-    return result_bytes;
-  }
-
-  auto request = reinterpret_cast<ChatCompletionRequest*>(userp);
-  if (request->timer_.Timestamp(
-          triton::client::RequestTimers::Kind::RECV_START) == 0) {
-    request->timer_.CaptureTimestamp(
-        triton::client::RequestTimers::Kind::RECV_START);
-  }
-
-  char* buf = reinterpret_cast<char*>(contents);
-  request->response_buffer_.append(buf, result_bytes);
-  // Send response now if streaming, otherwise wait until request has been
-  // completed
-  if (request->is_stream_) {
-    auto done_signal =
-        (request->response_buffer_.find("data: [DONE]") != std::string::npos);
-    request->SendResponse(
-        done_signal /* is_final */, done_signal /* is_null */);
-  }
-
-  // ResponseHandler may be called multiple times so we overwrite
-  // RECV_END so that we always have the time of the last.
-  request->timer_.CaptureTimestamp(
-      triton::client::RequestTimers::Kind::RECV_END);
-
-  return result_bytes;
-}
-
-
-Error
-ChatCompletionClient::AsyncInfer(
-    std::function<void(InferResult*)> callback,
-    std::string& serialized_request_body, const std::string& request_id,
-    const Headers& headers)
-{
-  if (callback == nullptr) {
-    return Error(
-        "Callback function must be provided along with AsyncInfer() call.");
-  }
-
-  auto completion_callback = [this](HttpRequest* req) {
-    auto request = static_cast<ChatCompletionRequest*>(req);
-    request->timer_.CaptureTimestamp(
-        triton::client::RequestTimers::Kind::REQUEST_END);
-    UpdateInferStat(request->timer_);
-
-    // Send final response on request completion
-    // if it has not already been sent.
-    // (e.g. in the case of seeing [DONE] in streaming case)
-    if (!request->IsFinalResponseSent()) {
-      request->SendResponse(true /* is_final */, false /* is_null */);
-    }
-  };
-  std::unique_ptr<HttpRequest> request(new ChatCompletionRequest(
-      std::move(completion_callback), std::move(callback), request_id,
-      verbose_));
-  auto raw_request = static_cast<ChatCompletionRequest*>(request.get());
-  raw_request->timer_.CaptureTimestamp(
-      triton::client::RequestTimers::Kind::REQUEST_START);
-  request->AddInput(
-      reinterpret_cast<uint8_t*>(serialized_request_body.data()),
-      serialized_request_body.size());
-
-  CURL* multi_easy_handle = curl_easy_init();
-  Error err = PreRunProcessing(multi_easy_handle, raw_request, headers);
-  if (!err.IsOk()) {
-    curl_easy_cleanup(multi_easy_handle);
-    return err;
-  }
-
-  raw_request->timer_.CaptureTimestamp(
-      triton::client::RequestTimers::Kind::SEND_START);
-  Send(multi_easy_handle, std::move(request));
-  return Error::Success;
-}
-
-Error
-ChatCompletionClient::PreRunProcessing(
-    CURL* curl, ChatCompletionRequest* request, const Headers& headers)
-{
-  curl_easy_setopt(curl, CURLOPT_URL, url_.c_str());
-  curl_easy_setopt(curl, CURLOPT_USERAGENT, "libcurl-agent/1.0");
-  curl_easy_setopt(curl, CURLOPT_POST, 1L);
-  curl_easy_setopt(curl, CURLOPT_TCP_NODELAY, 1L);
-
-  if (verbose_) {
-    curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
-  }
-
-  const long buffer_byte_size = 16 * 1024 * 1024;
-  curl_easy_setopt(curl, CURLOPT_UPLOAD_BUFFERSIZE, buffer_byte_size);
-  curl_easy_setopt(curl, CURLOPT_BUFFERSIZE, buffer_byte_size);
-
-  // request data provided by RequestProvider()
-  curl_easy_setopt(curl, CURLOPT_READFUNCTION, RequestProvider);
-  curl_easy_setopt(curl, CURLOPT_READDATA, request);
-
-  // response headers handled by ResponseHeaderHandler()
-  curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, ResponseHeaderHandler);
-  curl_easy_setopt(curl, CURLOPT_HEADERDATA, request);
-
-  // response data handled by ResponseHandler()
-  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, ResponseHandler);
-  curl_easy_setopt(curl, CURLOPT_WRITEDATA, request);
-
-  const curl_off_t post_byte_size = request->total_input_byte_size_;
-  curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE_LARGE, post_byte_size);
-
-  SetSSLCurlOptions(curl);
-
-  struct curl_slist* list = nullptr;
-  list = curl_slist_append(list, "Expect:");
-  list = curl_slist_append(list, "Content-Type: application/json");
-
-  for (const auto& pr : headers) {
-    std::string hdr = pr.first + ": " + pr.second;
-    list = curl_slist_append(list, hdr.c_str());
-  }
-
-  curl_easy_setopt(curl, CURLOPT_HTTPHEADER, list);
-
-  // The list will be freed when the request is destructed
-  request->header_list_ = list;
-
-  return Error::Success;
-}
-
-Error
-ChatCompletionClient::UpdateInferStat(
-    const triton::client::RequestTimers& timer)
-{
-  const uint64_t request_time_ns = timer.Duration(
-      triton::client::RequestTimers::Kind::REQUEST_START,
-      triton::client::RequestTimers::Kind::REQUEST_END);
-  const uint64_t send_time_ns = timer.Duration(
-      triton::client::RequestTimers::Kind::SEND_START,
-      triton::client::RequestTimers::Kind::SEND_END);
-  const uint64_t recv_time_ns = timer.Duration(
-      triton::client::RequestTimers::Kind::RECV_START,
-      triton::client::RequestTimers::Kind::RECV_END);
-
-  if ((request_time_ns == std::numeric_limits<uint64_t>::max()) ||
-      (send_time_ns == std::numeric_limits<uint64_t>::max()) ||
-      (recv_time_ns == std::numeric_limits<uint64_t>::max())) {
-    return Error(
-        "Timer not set correctly." +
-        ((timer.Timestamp(triton::client::RequestTimers::Kind::REQUEST_START) >
-          timer.Timestamp(triton::client::RequestTimers::Kind::REQUEST_END))
-             ? (" Request time from " +
-                std::to_string(timer.Timestamp(
-                    triton::client::RequestTimers::Kind::REQUEST_START)) +
-                " to " +
-                std::to_string(timer.Timestamp(
-                    triton::client::RequestTimers::Kind::REQUEST_END)) +
-                ".")
-             : "") +
-        ((timer.Timestamp(triton::client::RequestTimers::Kind::SEND_START) >
-          timer.Timestamp(triton::client::RequestTimers::Kind::SEND_END))
-             ? (" Send time from " +
-                std::to_string(timer.Timestamp(
-                    triton::client::RequestTimers::Kind::SEND_START)) +
-                " to " +
-                std::to_string(timer.Timestamp(
-                    triton::client::RequestTimers::Kind::SEND_END)) +
-                ".")
-             : "") +
-        ((timer.Timestamp(triton::client::RequestTimers::Kind::RECV_START) >
-          timer.Timestamp(triton::client::RequestTimers::Kind::RECV_END))
-             ? (" Receive time from " +
-                std::to_string(timer.Timestamp(
-                    triton::client::RequestTimers::Kind::RECV_START)) +
-                " to " +
-                std::to_string(timer.Timestamp(
-                    triton::client::RequestTimers::Kind::RECV_END)) +
-                ".")
-             : ""));
-  }
-
-  infer_stat_.completed_request_count++;
-  infer_stat_.cumulative_total_request_time_ns += request_time_ns;
-  infer_stat_.cumulative_send_time_ns += send_time_ns;
-  infer_stat_.cumulative_receive_time_ns += recv_time_ns;
-
-  return Error::Success;
-}
-
-//==============================================================================
-
-}}}}  // namespace triton::perfanalyzer::clientbackend::openai
diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client.h b/src/c++/perf_analyzer/client_backend/openai/openai_client.h
deleted file mode 100644
index 00ccbd5fa..000000000
--- a/src/c++/perf_analyzer/client_backend/openai/openai_client.h
+++ /dev/null
@@ -1,182 +0,0 @@
-// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <map>
-#include <memory>
-
-#include "../client_backend.h"
-#include "common.h"
-#include "http_client.h"
-
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace openai {
-
-class ChatCompletionResult : public InferResult {
- public:
-  ChatCompletionResult(
-      uint32_t http_code, std::string&& serialized_response, bool is_final,
-      bool is_null, const std::string& request_id)
-      : http_code_(http_code),
-        serialized_response_(std::move(serialized_response)),
-        is_final_(is_final), is_null_(is_null), request_id_(request_id)
-  {
-  }
-  virtual ~ChatCompletionResult() = default;
-
-  /// Get the id of the request which generated this response.
-  /// \param id Returns the request id that generated the result.
-  /// \return Error object indicating success or failure.
-  Error Id(std::string* id) const override
-  {
-    *id = request_id_;
-    return Error::Success;
-  }
-
-
-  /// Returns the status of the request.
-  /// \return Error object indicating the success or failure of the
-  /// request.
-  Error RequestStatus() const override
-  {
-    if ((http_code_ >= 400) && (http_code_ <= 599)) {
-      return Error(
-          "OpenAI response returns HTTP code " + std::to_string(http_code_));
-    }
-    return Error::Success;
-  }
-
-  /// Returns the raw data of the output.
-  /// \return Error object indicating the success or failure of the
-  /// request.
-  Error RawData(
-      const std::string& output_name, const uint8_t** buf,
-      size_t* byte_size) const override
-  {
-    // There is only a single output (and it has no defined name), so we can
-    // disregard output_name
-    *buf = reinterpret_cast<const uint8_t*>(serialized_response_.c_str());
-    *byte_size = serialized_response_.size();
-    return Error::Success;
-  }
-
-  /// Get final response bool for this response.
-  /// \return Error object indicating the success or failure.
-  Error IsFinalResponse(bool* is_final_response) const override
-  {
-    *is_final_response = is_final_;
-    return Error::Success;
-  };
-
-  /// Get null response bool for this response.
-  /// \return Error object indicating the success or failure.
-  Error IsNullResponse(bool* is_null_response) const override
-  {
-    *is_null_response = is_null_;
-    return Error::Success;
-  };
-
- private:
-  const uint32_t http_code_{200};
-  const std::string serialized_response_;
-  const bool is_final_{false};
-  const bool is_null_{false};
-  const std::string request_id_;
-};
-
-
-class ChatCompletionRequest : public HttpRequest {
- public:
-  virtual ~ChatCompletionRequest() {}
-  ChatCompletionRequest(
-      std::function<void(HttpRequest*)>&& completion_callback,
-      std::function<void(InferResult*)>&& response_callback,
-      const std::string& request_id, const bool verbose = false)
-      : HttpRequest(std::move(completion_callback), verbose),
-        response_callback_(std::move(response_callback)),
-        request_id_(request_id)
-  {
-  }
-  bool IsFinalResponseSent() { return final_response_sent_; };
-  void SendResponse(bool is_final, bool is_null);
-  bool is_stream_{false};
-  std::function<void(InferResult*)> response_callback_{nullptr};
-  // The timers for infer request.
-  triton::client::RequestTimers timer_;
-  const std::string request_id_;
-  bool final_response_sent_{false};
-};
-
-class ChatCompletionClient : public HttpClient {
- public:
-  virtual ~ChatCompletionClient() = default;
-
-  /// Create a client that can be used to communicate with the server.
-  /// \param server_url The inference server name, port, optional
-  /// scheme and optional base path in the following format:
-  /// <scheme://>host:port/<base-path>.
-  /// \param endpoint The name of the endpoint to send requests to
-  /// \param verbose If true generate verbose output when contacting
-  /// the inference server.
-  /// \param ssl_options Specifies the settings for configuring
-  /// SSL encryption and authorization. Providing these options
-  /// do not ensure that SSL/TLS will be used in communication.
-  /// The use of SSL/TLS depends entirely on the server endpoint.
-  /// These options will be ignored if the server_url does not
-  /// expose `https://` scheme.
-  ChatCompletionClient(
-      const std::string& server_url, const std::string& endpoint,
-      bool verbose = false,
-      const HttpSslOptions& ssl_options = HttpSslOptions());
-
-  /// Simplified AsyncInfer() where the request body is expected to be
-  /// prepared by the caller, the client here is responsible to communicate
-  /// with a OpenAI-compatible server in both streaming and non-streaming case.
-  Error AsyncInfer(
-      std::function<void(InferResult*)> callback,
-      std::string& serialized_request_body, const std::string& request_id,
-      const Headers& headers);
-
-  const InferStat& ClientInferStat() { return infer_stat_; }
-
- private:
-  // setup curl handle
-  Error PreRunProcessing(
-      CURL* curl, ChatCompletionRequest* request, const Headers& headers);
-
-  static size_t ResponseHandler(
-      void* contents, size_t size, size_t nmemb, void* userp);
-  static size_t RequestProvider(
-      void* contents, size_t size, size_t nmemb, void* userp);
-  static size_t ResponseHeaderHandler(
-      void* contents, size_t size, size_t nmemb, void* userp);
-
-  Error UpdateInferStat(const triton::client::RequestTimers& timer);
-  InferStat infer_stat_;
-};
-
-}}}}  // namespace triton::perfanalyzer::clientbackend::openai
diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc b/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc
deleted file mode 100644
index 15bbbdc68..000000000
--- a/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc
+++ /dev/null
@@ -1,112 +0,0 @@
-// Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "openai_client_backend.h"
-
-#include "openai_infer_input.h"
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace openai {
-
-//==============================================================================
-
-Error
-OpenAiClientBackend::Create(
-    const std::string& url, const std::string& endpoint,
-    const ProtocolType protocol, std::shared_ptr<Headers> http_headers,
-    const bool verbose, std::unique_ptr<ClientBackend>* client_backend)
-{
-  if (protocol == ProtocolType::GRPC) {
-    return Error(
-        "perf_analyzer does not support gRPC protocol with OpenAI endpoints");
-  }
-  std::unique_ptr<OpenAiClientBackend> openai_client_backend(
-      new OpenAiClientBackend(http_headers));
-
-  openai_client_backend->http_client_.reset(
-      new ChatCompletionClient(url, endpoint, verbose));
-
-  *client_backend = std::move(openai_client_backend);
-
-  return Error::Success;
-}
-
-Error
-OpenAiClientBackend::AsyncInfer(
-    OnCompleteFn callback, const InferOptions& options,
-    const std::vector<InferInput*>& inputs,
-    const std::vector<const InferRequestedOutput*>& outputs)
-{
-  if (inputs.size() != 1) {
-    return Error("Only expecting one input");
-  }
-
-  auto raw_input = dynamic_cast<OpenAiInferInput*>(inputs[0]);
-  raw_input->PrepareForRequest();
-  RETURN_IF_CB_ERROR(http_client_->AsyncInfer(
-      callback, raw_input->GetRequestBody(), options.request_id_,
-      *http_headers_));
-  return Error::Success;
-}
-
-
-Error
-OpenAiClientBackend::ClientInferStat(InferStat* infer_stat)
-{
-  *infer_stat = http_client_->ClientInferStat();
-  return Error::Success;
-}
-
-//==============================================================================
-
-Error
-OpenAiInferRequestedOutput::Create(
-    InferRequestedOutput** infer_output, const std::string& name,
-    const std::string& datatype)
-{
-  OpenAiInferRequestedOutput* local_infer_output =
-      new OpenAiInferRequestedOutput(name, datatype);
-
-  tc::InferRequestedOutput* openai_infer_output;
-  RETURN_IF_TRITON_ERROR(tc::InferRequestedOutput::Create(
-      &openai_infer_output, name, 0, datatype));
-  local_infer_output->output_.reset(openai_infer_output);
-
-  *infer_output = local_infer_output;
-
-  return Error::Success;
-}
-
-OpenAiInferRequestedOutput::OpenAiInferRequestedOutput(
-    const std::string& name, const std::string& datatype)
-    : InferRequestedOutput(BackendKind::OPENAI, name, datatype)
-{
-}
-
-//==============================================================================
-
-
-}}}}  // namespace triton::perfanalyzer::clientbackend::openai
diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.h b/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.h
deleted file mode 100644
index 2d475eacf..000000000
--- a/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.h
+++ /dev/null
@@ -1,111 +0,0 @@
-// Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <string>
-
-#include "../../perf_utils.h"
-#include "../client_backend.h"
-#include "openai_client.h"
-#include "openai_infer_input.h"
-
-#define RETURN_IF_TRITON_ERROR(S)       \
-  do {                                  \
-    const tc::Error& status__ = (S);    \
-    if (!status__.IsOk()) {             \
-      return Error(status__.Message()); \
-    }                                   \
-  } while (false)
-
-namespace tc = triton::client;
-namespace cb = triton::perfanalyzer::clientbackend;
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace openai {
-
-
-//==============================================================================
-/// OpenAiClientBackend is used to generate load on the serving instance,
-/// which supports OpenAI Chat Completions API
-///
-class OpenAiClientBackend : public ClientBackend {
- public:
-  /// Create an OpenAI client backend which can be used to interact with the
-  /// server.
-  /// \param url The inference server url and port.
-  /// \param endpoint The endpoint on the inference server to send requests to
-  /// \param protocol The protocol type used.
-  /// \param http_headers Map of HTTP headers. The map key/value indicates
-  /// the header name/value.
-  /// \param verbose Enables the verbose mode.
-  /// \param client_backend Returns a new OpenAiClientBackend
-  /// object.
-  /// \return Error object indicating success or failure.
-  static Error Create(
-      const std::string& url, const std::string& endpoint,
-      const ProtocolType protocol, std::shared_ptr<Headers> http_headers,
-      const bool verbose, std::unique_ptr<ClientBackend>* client_backend);
-
-  /// See ClientBackend::AsyncInfer()
-  Error AsyncInfer(
-      OnCompleteFn callback, const InferOptions& options,
-      const std::vector<InferInput*>& inputs,
-      const std::vector<const InferRequestedOutput*>& outputs) override;
-
-  /// See ClientBackend::ClientInferStat()
-  Error ClientInferStat(InferStat* infer_stat) override;
-
- private:
-  OpenAiClientBackend(std::shared_ptr<Headers> http_headers)
-      : ClientBackend(BackendKind::OPENAI), http_headers_(http_headers)
-  {
-  }
-
-  std::unique_ptr<openai::ChatCompletionClient> http_client_;
-  std::shared_ptr<Headers> http_headers_;
-};
-
-//==============================================================
-/// OpenAiInferRequestedOutput is a wrapper around
-/// InferRequestedOutput object of triton common client library.
-///
-class OpenAiInferRequestedOutput : public InferRequestedOutput {
- public:
-  static Error Create(
-      InferRequestedOutput** infer_output, const std::string& name,
-      const std::string& datatype);
-  /// Returns the raw InferRequestedOutput object required by OpenAi client
-  /// library.
-  tc::InferRequestedOutput* Get() const { return output_.get(); }
-
- private:
-  explicit OpenAiInferRequestedOutput(
-      const std::string& name, const std::string& datatype);
-
-  std::unique_ptr<tc::InferRequestedOutput> output_;
-};
-
-}}}}  // namespace triton::perfanalyzer::clientbackend::openai
diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.cc b/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.cc
deleted file mode 100644
index dcf213fc2..000000000
--- a/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.cc
+++ /dev/null
@@ -1,103 +0,0 @@
-// Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "openai_infer_input.h"
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace openai {
-
-Error
-OpenAiInferInput::Create(
-    InferInput** infer_input, const std::string& name,
-    const std::vector<int64_t>& dims, const std::string& datatype)
-{
-  OpenAiInferInput* local_infer_input =
-      new OpenAiInferInput(name, dims, datatype);
-
-  *infer_input = local_infer_input;
-  return Error::Success;
-}
-
-Error
-OpenAiInferInput::SetShape(const std::vector<int64_t>& shape)
-{
-  shape_ = shape;
-  return Error::Success;
-}
-
-Error
-OpenAiInferInput::Reset()
-{
-  data_str_.clear();
-
-  bufs_.clear();
-  buf_byte_sizes_.clear();
-  byte_size_ = 0;
-  return Error::Success;
-}
-
-Error
-OpenAiInferInput::AppendRaw(const uint8_t* input, size_t input_byte_size)
-{
-  data_str_.clear();
-
-  byte_size_ += input_byte_size;
-
-  bufs_.push_back(input);
-  buf_byte_sizes_.push_back(input_byte_size);
-  return Error::Success;
-}
-
-Error
-OpenAiInferInput::RawData(const uint8_t** buf, size_t* byte_size)
-{
-  // TMA-1775 - handle multi-batch case
-  *buf = bufs_[0];
-  *byte_size = buf_byte_sizes_[0];
-  return Error::Success;
-}
-
-Error
-OpenAiInferInput::PrepareForRequest()
-{
-  // Reset position so request sends entire input.
-  if (data_str_.empty() && (byte_size_ != 0)) {
-    for (size_t i = 0; i < bufs_.size(); ++i) {
-      data_str_.append(
-          reinterpret_cast<const char*>(bufs_[i]), buf_byte_sizes_[i]);
-    }
-  }
-  return Error::Success;
-}
-
-OpenAiInferInput::OpenAiInferInput(
-    const std::string& name, const std::vector<int64_t>& dims,
-    const std::string& datatype)
-    : InferInput(BackendKind::OPENAI, name, datatype), shape_(dims)
-{
-}
-
-}}}}  // namespace triton::perfanalyzer::clientbackend::openai
diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.h b/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.h
deleted file mode 100644
index 93a12b519..000000000
--- a/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.h
+++ /dev/null
@@ -1,75 +0,0 @@
-// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <string>
-
-#include "../../perf_utils.h"
-#include "../client_backend.h"
-
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace openai {
-
-//==============================================================
-/// OpenAiInferInput instance holds the information regarding
-/// model input tensors and their corresponding generated data.
-///
-class OpenAiInferInput : public InferInput {
- public:
-  static Error Create(
-      InferInput** infer_input, const std::string& name,
-      const std::vector<int64_t>& dims, const std::string& datatype);
-  /// See InferInput::Shape()
-  const std::vector<int64_t>& Shape() const override { return shape_; }
-  /// See InferInput::SetShape()
-  Error SetShape(const std::vector<int64_t>& shape) override;
-  /// See InferInput::Reset()
-  Error Reset() override;
-  /// See InferInput::AppendRaw()
-  Error AppendRaw(const uint8_t* input, size_t input_byte_size) override;
-  /// See InferInput::RawData()
-  Error RawData(const uint8_t** buf, size_t* byte_size) override;
-  /// Prepare the input to be in the form expected by an OpenAI client,
-  /// must call before accessing the data.
-  Error PrepareForRequest();
-  /// Get the contiguous request body string
-  std::string& GetRequestBody() { return data_str_; }
-
- private:
-  explicit OpenAiInferInput(
-      const std::string& name, const std::vector<int64_t>& dims,
-      const std::string& datatype);
-
-  std::vector<int64_t> shape_;
-  size_t byte_size_{0};
-
-  std::vector<const uint8_t*> bufs_;
-  std::vector<size_t> buf_byte_sizes_;
-  std::string data_str_;
-};
-
-}}}}  // namespace triton::perfanalyzer::clientbackend::openai
diff --git a/src/c++/perf_analyzer/client_backend/tensorflow_serving/CMakeLists.txt b/src/c++/perf_analyzer/client_backend/tensorflow_serving/CMakeLists.txt
deleted file mode 100644
index ba1c2fa40..000000000
--- a/src/c++/perf_analyzer/client_backend/tensorflow_serving/CMakeLists.txt
+++ /dev/null
@@ -1,122 +0,0 @@
-# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-cmake_minimum_required (VERSION 3.18)
-
-FetchContent_Declare(tensorflow-serving-repo
-  PREFIX tensorflow-serving-rep
-)
-FetchContent_GetProperties(tensorflow-serving-repo)
-if(NOT tensorflow-serving-repo_POPULATED)
-  FetchContent_Populate(tensorflow-serving-repo
-  GIT_REPOSITORY "https://github.com/tensorflow/serving.git"
-  GIT_TAG "2.3.0"
- SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/tensorflow-serving-repo/src/tensorflow_serving"
-)
-endif()
-
-FetchContent_Declare(tensorflow-repo
-  PREFIX tensorflow-repo
-  SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/tensorflow-repo/src/tensorflow"
-)
-FetchContent_GetProperties(tensorflow-repo)
-if(NOT tensorflow-repo_POPULATED)
-  FetchContent_Populate(tensorflow-repo
-  GIT_REPOSITORY "https://github.com/tensorflow/tensorflow.git"
-  GIT_TAG "v2.3.0"
- SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/tensorflow-repo/src/tensorflow"
-)
-endif()
-
-
-set(TENSORFLOW_PATH ${CMAKE_CURRENT_BINARY_DIR}/tensorflow-repo/src/tensorflow)
-set(TFSERVE_PATH ${CMAKE_CURRENT_BINARY_DIR}/tensorflow-serving-repo/src/tensorflow_serving)
-
-# Copy the repos to a proto staging area.
-file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/protos)
-execute_process(COMMAND ${CMAKE_COMMAND} -E copy_directory ${TENSORFLOW_PATH}/tensorflow
-                                                           ${CMAKE_BINARY_DIR}/protos/tensorflow)
-execute_process(COMMAND ${CMAKE_COMMAND} -E copy_directory ${TFSERVE_PATH}/tensorflow_serving
-                                                           ${CMAKE_BINARY_DIR}/protos/tensorflow_serving)
-
-# Protobuf compiler dependency.
-include(CompileProto.cmake)
-
-# Protobuf sources of the TensorFlow Serving to be compiled without a gRPC plugin.
-file(GLOB_RECURSE TFSERVING_PROTOS ${CMAKE_BINARY_DIR}/protos/tensorflow_serving/*.proto)
-file(GLOB TF_EXAMPLE_PROTOS ${CMAKE_BINARY_DIR}/protos/tensorflow/core/example/*.proto)
-file(GLOB TF_FW_PROTOS ${CMAKE_BINARY_DIR}/protos/tensorflow/core/framework/*.proto)
-file(GLOB TF_PROTOBUF_PROTOS ${CMAKE_BINARY_DIR}/protos/tensorflow/core/protobuf/*.proto)
-
-# This is a dirty hack to prevent unnecessary leaking dependency
-list(FILTER TF_PROTOBUF_PROTOS EXCLUDE REGEX "autotuning.proto$|conv_autotuning.proto$")
-
-# Compiling CPP sources from proto files.
-compile_proto(0 "${CMAKE_BINARY_DIR}/protos" "${CMAKE_CURRENT_BINARY_DIR}/compiled" PB_SOURCES PB_HEADERS
-        ${TFSERVING_PROTOS}  ${TF_EXAMPLE_PROTOS} ${TF_FW_PROTOS} ${TF_PROTOBUF_PROTOS})
-
-# Compiling CPP sources with gRPC plugin.
-compile_proto(1 "${CMAKE_BINARY_DIR}/protos" "${CMAKE_CURRENT_BINARY_DIR}/compiled" PB_GRPC_SOURCES PB_GRPC_HEADERS
-        ${CMAKE_BINARY_DIR}/protos/tensorflow_serving/apis/prediction_service.proto)
-
-set(
-    TFS_CLIENT_BACKEND_SRCS
-    tfserve_client_backend.cc
-    tfserve_infer_input.cc
-    tfserve_grpc_client.cc
-    ${PB_SOURCES}
-    ${PB_GRPC_SOURCES}
-)
-
-set(
-    TFS_CLIENT_BACKEND_HDRS
-    tfserve_client_backend.h
-    tfserve_infer_input.h
-    tfserve_grpc_client.h
-    ${PB_HEADERS}
-    ${PB_GRPC_HEADERS}
-)
-
-add_library(
-    tfs-client-backend-library  EXCLUDE_FROM_ALL OBJECT
-    ${TFS_CLIENT_BACKEND_SRCS}
-    ${TFS_CLIENT_BACKEND_HDRS}
-)
-
-target_include_directories(tfs-client-backend-library PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/compiled)
-
-target_link_libraries(
-  tfs-client-backend-library
-  PUBLIC gRPC::grpc++
-  PUBLIC gRPC::grpc
-  PUBLIC protobuf::libprotobuf
-  PUBLIC grpcclient_static
-)
-
-if(${TRITON_ENABLE_GPU})
-    target_include_directories(tfs-client-backend-library PUBLIC ${CUDA_INCLUDE_DIRS})
-    target_link_libraries(tfs-client-backend-library PRIVATE ${CUDA_LIBRARIES})
-endif() # TRITON_ENABLE_GPU
diff --git a/src/c++/perf_analyzer/client_backend/tensorflow_serving/CompileProto.cmake b/src/c++/perf_analyzer/client_backend/tensorflow_serving/CompileProto.cmake
deleted file mode 100644
index 79de28e4e..000000000
--- a/src/c++/perf_analyzer/client_backend/tensorflow_serving/CompileProto.cmake
+++ /dev/null
@@ -1,116 +0,0 @@
-# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-# A function that creates CPP sources from proto files.
-function(COMPILE_PROTO USE_GRPC PROTO_PATH OUT_PATH SRCS HDRS)
-
-    # Checking args.
-    if(NOT ARGN)
-        message(SEND_ERROR "Error: COMPILE_PROTO() called without any proto files")
-        return()
-    endif()
-
-    # To collect paths to created sources and headers.
-    set(${SRCS})
-    set(${HDRS})
-
-
-
-    # Getting actual absolute paths to all protos location and output directory.
-    get_filename_component(ABS_PROTO_PATH "${PROTO_PATH}" ABSOLUTE)
-    get_filename_component(ABS_OUT_PATH "${OUT_PATH}" ABSOLUTE)
-
-    # Launching sources generation for all proto files.
-    foreach(FIL ${ARGN})
-
-        # Getting the absolute path and filename without extension for the current proto file.
-        get_filename_component(ABS_FIL "${FIL}" ABSOLUTE)
-        get_filename_component(FIL_WE  "${FIL}" NAME_WE)
-
-        # Getting the relative dir of the proto file (relative to the protos root dir).
-        file(RELATIVE_PATH REL_FIL_TO_PROTO "${ABS_PROTO_PATH}" "${ABS_FIL}")
-        get_filename_component(REL_DIR_TO_PROTO "${REL_FIL_TO_PROTO}" DIRECTORY)
-
-        # Preparing a path to label created sources from proto.
-        set(COMPILED_NAME_TEMPLATE "${ABS_OUT_PATH}/${REL_DIR_TO_PROTO}/${FIL_WE}")
-
-
-
-        # Firing sources generation command with gRPC application.
-        if(${USE_GRPC})
-            set(_GRPC_CPP_PLUGIN_EXECUTABLE $<TARGET_FILE:gRPC::grpc_cpp_plugin>)
-
-            # Marking created files for CMake.
-            list(APPEND ${SRCS} "${COMPILED_NAME_TEMPLATE}.grpc.pb.cc")
-            list(APPEND ${HDRS} "${COMPILED_NAME_TEMPLATE}.grpc.pb.h")
-
-            # Launching proto compilation command.
-            add_custom_command(
-                    COMMAND ${CMAKE_COMMAND} -E make_directory "${ABS_OUT_PATH}"
-                    OUTPUT
-                        "${COMPILED_NAME_TEMPLATE}.grpc.pb.cc"
-                        "${COMPILED_NAME_TEMPLATE}.grpc.pb.h"
-                    COMMAND
-                        ${Protobuf_PROTOC_EXECUTABLE}
-                    ARGS
-                        --grpc_out=${ABS_OUT_PATH}
-                        --plugin=protoc-gen-grpc=${_GRPC_CPP_PLUGIN_EXECUTABLE}
-                        --proto_path=${ABS_PROTO_PATH}
-                        ${ABS_FIL}
-                    DEPENDS
-                        ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE}
-                    COMMENT
-                        "Running gRPC C++ protocol buffer compiler on ${FIL}"
-                    VERBATIM)
-
-        # Without gRPC.
-        else()
-            list(APPEND ${SRCS} "${COMPILED_NAME_TEMPLATE}.pb.cc")
-            list(APPEND ${HDRS} "${COMPILED_NAME_TEMPLATE}.pb.h")
-            add_custom_command(
-                    COMMAND ${CMAKE_COMMAND} -E make_directory "${ABS_OUT_PATH}"
-                    OUTPUT
-                        "${COMPILED_NAME_TEMPLATE}.pb.cc"
-                        "${COMPILED_NAME_TEMPLATE}.pb.h"
-                    COMMAND
-                        ${Protobuf_PROTOC_EXECUTABLE}
-                    ARGS
-                        --cpp_out=${ABS_OUT_PATH}
-                        --proto_path=${ABS_PROTO_PATH}
-                        ${ABS_FIL}
-                    DEPENDS
-                        ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE}
-                    COMMENT
-                        "Running C++ protocol buffer compiler on ${FIL}"
-                    VERBATIM)
-        endif()
-    endforeach()
-
-    # Returning generated sources list.
-    set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE)
-    set(${SRCS} ${${SRCS}} PARENT_SCOPE)
-    set(${HDRS} ${${HDRS}} PARENT_SCOPE)
-endfunction()
diff --git a/src/c++/perf_analyzer/client_backend/tensorflow_serving/tfserve_client_backend.cc b/src/c++/perf_analyzer/client_backend/tensorflow_serving/tfserve_client_backend.cc
deleted file mode 100644
index 1fde3e5a8..000000000
--- a/src/c++/perf_analyzer/client_backend/tensorflow_serving/tfserve_client_backend.cc
+++ /dev/null
@@ -1,200 +0,0 @@
-// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "tfserve_client_backend.h"
-
-#include "json_utils.h"
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace tfserving {
-
-//==============================================================================
-
-Error
-TFServeClientBackend::Create(
-    const std::string& url, const ProtocolType protocol,
-    const grpc_compression_algorithm compression_algorithm,
-    std::shared_ptr<Headers> http_headers, const bool verbose,
-    std::unique_ptr<ClientBackend>* client_backend)
-{
-  if (protocol == ProtocolType::HTTP) {
-    return Error(
-        "perf_analyzer does not support http protocol with TF serving");
-  }
-  std::unique_ptr<TFServeClientBackend> tfserve_client_backend(
-      new TFServeClientBackend(compression_algorithm, http_headers));
-
-  RETURN_IF_CB_ERROR(GrpcClient::Create(
-      &(tfserve_client_backend->grpc_client_), url, verbose));
-
-  *client_backend = std::move(tfserve_client_backend);
-
-  return Error::Success;
-}
-
-Error
-TFServeClientBackend::ModelMetadata(
-    rapidjson::Document* model_metadata, const std::string& model_name,
-    const std::string& model_version)
-{
-  tensorflow::serving::GetModelMetadataResponse metadata_proto;
-  RETURN_IF_CB_ERROR(grpc_client_->ModelMetadata(
-      &metadata_proto, model_name, model_version, *http_headers_));
-
-  std::string metadata;
-  ::google::protobuf::util::JsonPrintOptions options;
-  options.preserve_proto_field_names = true;
-  options.always_print_primitive_fields = true;
-  ::google::protobuf::util::MessageToJsonString(
-      metadata_proto, &metadata, options);
-
-  RETURN_IF_TRITON_ERROR(tc::ParseJson(model_metadata, metadata));
-
-  return Error::Success;
-}
-
-Error
-TFServeClientBackend::Infer(
-    cb::InferResult** result, const InferOptions& options,
-    const std::vector<InferInput*>& inputs,
-    const std::vector<const InferRequestedOutput*>& outputs)
-{
-  tfs::InferResult* tfserve_result;
-  RETURN_IF_CB_ERROR(grpc_client_->Infer(
-      &tfserve_result, options, inputs, outputs, *http_headers_,
-      compression_algorithm_));
-
-  *result = new TFServeInferResult(tfserve_result);
-
-  return Error::Success;
-}
-
-Error
-TFServeClientBackend::AsyncInfer(
-    OnCompleteFn callback, const InferOptions& options,
-    const std::vector<InferInput*>& inputs,
-    const std::vector<const InferRequestedOutput*>& outputs)
-{
-  auto wrapped_callback = [callback](tfs::InferResult* client_result) {
-    cb::InferResult* result = new TFServeInferResult(client_result);
-    callback(result);
-  };
-
-  RETURN_IF_CB_ERROR(grpc_client_->AsyncInfer(
-      wrapped_callback, options, inputs, outputs, *http_headers_,
-      compression_algorithm_));
-
-  return Error::Success;
-}
-
-
-Error
-TFServeClientBackend::ClientInferStat(InferStat* infer_stat)
-{
-  // Reusing the common library utilities to collect and report the
-  // client side statistics.
-  tc::InferStat client_infer_stat;
-
-  RETURN_IF_TRITON_ERROR(grpc_client_->ClientInferStat(&client_infer_stat));
-
-  ParseInferStat(client_infer_stat, infer_stat);
-
-  return Error::Success;
-}
-
-void
-TFServeClientBackend::ParseInferStat(
-    const tc::InferStat& tfserve_infer_stat, InferStat* infer_stat)
-{
-  infer_stat->completed_request_count =
-      tfserve_infer_stat.completed_request_count;
-  infer_stat->cumulative_total_request_time_ns =
-      tfserve_infer_stat.cumulative_total_request_time_ns;
-  infer_stat->cumulative_send_time_ns =
-      tfserve_infer_stat.cumulative_send_time_ns;
-  infer_stat->cumulative_receive_time_ns =
-      tfserve_infer_stat.cumulative_receive_time_ns;
-}
-
-//==============================================================================
-
-Error
-TFServeInferRequestedOutput::Create(
-    InferRequestedOutput** infer_output, const std::string& name)
-{
-  TFServeInferRequestedOutput* local_infer_output =
-      new TFServeInferRequestedOutput(name);
-
-  tc::InferRequestedOutput* tfserve_infer_output;
-  RETURN_IF_TRITON_ERROR(
-      tc::InferRequestedOutput::Create(&tfserve_infer_output, name));
-  local_infer_output->output_.reset(tfserve_infer_output);
-
-  *infer_output = local_infer_output;
-
-  return Error::Success;
-}
-
-TFServeInferRequestedOutput::TFServeInferRequestedOutput(
-    const std::string& name)
-    : InferRequestedOutput(BackendKind::TENSORFLOW_SERVING, name)
-{
-}
-
-//==============================================================================
-
-TFServeInferResult::TFServeInferResult(tfs::InferResult* result)
-{
-  result_.reset(result);
-}
-
-Error
-TFServeInferResult::Id(std::string* id) const
-{
-  id->clear();
-  return Error::Success;
-}
-
-Error
-TFServeInferResult::RequestStatus() const
-{
-  RETURN_IF_CB_ERROR(result_->RequestStatus());
-  return Error::Success;
-}
-
-Error
-TFServeInferResult::RawData(
-    const std::string& output_name, const uint8_t** buf,
-    size_t* byte_size) const
-{
-  return Error(
-      "Output retrieval is not currently supported for TFS client backend");
-}
-
-//==============================================================================
-
-
-}}}}  // namespace triton::perfanalyzer::clientbackend::tfserving
diff --git a/src/c++/perf_analyzer/client_backend/tensorflow_serving/tfserve_client_backend.h b/src/c++/perf_analyzer/client_backend/tensorflow_serving/tfserve_client_backend.h
deleted file mode 100644
index bd6b5db8b..000000000
--- a/src/c++/perf_analyzer/client_backend/tensorflow_serving/tfserve_client_backend.h
+++ /dev/null
@@ -1,150 +0,0 @@
-// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <string>
-
-#include "../../perf_utils.h"
-#include "../client_backend.h"
-#include "tfserve_grpc_client.h"
-
-#define RETURN_IF_TRITON_ERROR(S)       \
-  do {                                  \
-    const tc::Error& status__ = (S);    \
-    if (!status__.IsOk()) {             \
-      return Error(status__.Message()); \
-    }                                   \
-  } while (false)
-
-namespace tc = triton::client;
-namespace cb = triton::perfanalyzer::clientbackend;
-namespace tfs = triton::perfanalyzer::clientbackend::tfserving;
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace tfserving {
-
-
-//==============================================================================
-/// TFServeClientBackend is used to generate load on the TF serving instance
-///
-class TFServeClientBackend : public ClientBackend {
- public:
-  /// Create a TFserving client backend which can be used to interact with the
-  /// server.
-  /// \param url The inference server url and port.
-  /// \param protocol The protocol type used.
-  /// \param compression_algorithm The compression algorithm to be used
-  /// on the grpc requests.
-  /// \param http_headers Map of HTTP headers. The map key/value indicates
-  /// the header name/value.
-  /// \param verbose Enables the verbose mode.
-  /// \param client_backend Returns a new TFServeClientBackend
-  /// object.
-  /// \return Error object indicating success or failure.
-  static Error Create(
-      const std::string& url, const ProtocolType protocol,
-      const grpc_compression_algorithm compression_algorithm,
-      std::shared_ptr<Headers> http_headers, const bool verbose,
-      std::unique_ptr<ClientBackend>* client_backend);
-
-  /// See ClientBackend::ModelMetadata()
-  Error ModelMetadata(
-      rapidjson::Document* model_metadata, const std::string& model_name,
-      const std::string& model_version) override;
-
-  /// See ClientBackend::Infer()
-  Error Infer(
-      cb::InferResult** result, const InferOptions& options,
-      const std::vector<InferInput*>& inputs,
-      const std::vector<const InferRequestedOutput*>& outputs) override;
-
-  /// See ClientBackend::AsyncInfer()
-  Error AsyncInfer(
-      OnCompleteFn callback, const InferOptions& options,
-      const std::vector<InferInput*>& inputs,
-      const std::vector<const InferRequestedOutput*>& outputs) override;
-
-  /// See ClientBackend::ClientInferStat()
-  Error ClientInferStat(InferStat* infer_stat) override;
-
- private:
-  TFServeClientBackend(
-      const grpc_compression_algorithm compression_algorithm,
-      std::shared_ptr<Headers> http_headers)
-      : ClientBackend(BackendKind::TENSORFLOW_SERVING),
-        compression_algorithm_(compression_algorithm),
-        http_headers_(http_headers)
-  {
-  }
-
-  void ParseInferStat(
-      const tc::InferStat& tfserve_infer_stat, InferStat* infer_stat);
-
-  std::unique_ptr<GrpcClient> grpc_client_;
-
-  grpc_compression_algorithm compression_algorithm_;
-  std::shared_ptr<Headers> http_headers_;
-};
-
-//==============================================================
-/// TFServeInferRequestedOutput is a wrapper around
-/// InferRequestedOutput object of triton common client library.
-///
-class TFServeInferRequestedOutput : public InferRequestedOutput {
- public:
-  static Error Create(
-      InferRequestedOutput** infer_output, const std::string& name);
-  /// Returns the raw InferRequestedOutput object required by TFserving client
-  /// library.
-  tc::InferRequestedOutput* Get() const { return output_.get(); }
-
- private:
-  explicit TFServeInferRequestedOutput(const std::string& name);
-
-  std::unique_ptr<tc::InferRequestedOutput> output_;
-};
-
-//==============================================================
-/// TFServeInferResult is a wrapper around InferResult object of
-/// TF serving InferResult object.
-///
-class TFServeInferResult : public cb::InferResult {
- public:
-  explicit TFServeInferResult(tfs::InferResult* result);
-  /// See InferResult::Id()
-  Error Id(std::string* id) const override;
-  /// See InferResult::RequestStatus()
-  Error RequestStatus() const override;
-  /// See InferResult::RawData()
-  Error RawData(
-      const std::string& output_name, const uint8_t** buf,
-      size_t* byte_size) const override;
-
- private:
-  std::unique_ptr<tfs::InferResult> result_;
-};
-
-}}}}  // namespace triton::perfanalyzer::clientbackend::tfserving
diff --git a/src/c++/perf_analyzer/client_backend/tensorflow_serving/tfserve_grpc_client.cc b/src/c++/perf_analyzer/client_backend/tensorflow_serving/tfserve_grpc_client.cc
deleted file mode 100644
index f53e4d179..000000000
--- a/src/c++/perf_analyzer/client_backend/tensorflow_serving/tfserve_grpc_client.cc
+++ /dev/null
@@ -1,729 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "tfserve_grpc_client.h"
-
-#include <chrono>
-#include <cstdint>
-#include <fstream>
-#include <iostream>
-#include <mutex>
-#include <sstream>
-
-#include "tfserve_client_backend.h"
-
-/// Type alias for string-TensorProto map.
-typedef google::protobuf::Map<std::string, tensorflow::TensorProto>
-    StringKeyedProtos;
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace tfserving {
-
-namespace {
-
-// Use map to keep track of GRPC channels. <key, value> : <url, Channel*>
-// If context is created on url that has established Channel, then reuse it.
-std::map<std::string, std::shared_ptr<grpc::Channel>> grpc_channel_map_;
-std::mutex grpc_channel_map_mtx_;
-
-void
-GetTensorFlowDataType(const std::string& datatype, tensorflow::DataType* dtype)
-{
-  if (datatype == "FP16") {
-    *dtype = tensorflow::DataType::DT_HALF;
-  } else if (datatype == "BF16") {
-    *dtype = tensorflow::DataType::DT_BFLOAT16;
-  } else if (datatype == "FP32") {
-    *dtype = tensorflow::DataType::DT_FLOAT;
-  } else if (datatype == "FP64") {
-    *dtype = tensorflow::DataType::DT_DOUBLE;
-  } else if (datatype == "INT32") {
-    *dtype = tensorflow::DataType::DT_INT32;
-  } else if (datatype == "INT16") {
-    *dtype = tensorflow::DataType::DT_INT16;
-  } else if (datatype == "UINT16") {
-    *dtype = tensorflow::DataType::DT_UINT16;
-  } else if (datatype == "INT8") {
-    *dtype = tensorflow::DataType::DT_INT8;
-  } else if (datatype == "UINT8") {
-    *dtype = tensorflow::DataType::DT_UINT8;
-  } else if (datatype == "BYTES") {
-    *dtype = tensorflow::DataType::DT_STRING;
-  } else if (datatype == "INT64") {
-    *dtype = tensorflow::DataType::DT_INT64;
-  } else if (datatype == "BOOL") {
-    *dtype = tensorflow::DataType::DT_BOOL;
-  } else if (datatype == "UINT32") {
-    *dtype = tensorflow::DataType::DT_UINT32;
-  } else if (datatype == "UINT64") {
-    *dtype = tensorflow::DataType::DT_UINT64;
-  } else {
-    *dtype = tensorflow::DT_INVALID;
-  }
-}
-
-void
-ReadFile(const std::string& filename, std::string& data)
-{
-  data.clear();
-  if (!filename.empty()) {
-    std::ifstream file(filename.c_str(), std::ios::in);
-    if (file.is_open()) {
-      std::stringstream ss;
-      ss << file.rdbuf();
-      file.close();
-      data = ss.str();
-    }
-  }
-}
-
-std::shared_ptr<grpc::Channel>
-GetChannel(const std::string& url, bool use_ssl, const SslOptions& ssl_options)
-{
-  std::lock_guard<std::mutex> lock(grpc_channel_map_mtx_);
-
-  const auto& channel_itr = grpc_channel_map_.find(url);
-  if (channel_itr != grpc_channel_map_.end()) {
-    return channel_itr->second;
-  } else {
-    grpc::ChannelArguments arguments;
-    arguments.SetMaxSendMessageSize(tc::MAX_GRPC_MESSAGE_SIZE);
-    arguments.SetMaxReceiveMessageSize(tc::MAX_GRPC_MESSAGE_SIZE);
-    std::shared_ptr<grpc::ChannelCredentials> credentials;
-    if (use_ssl) {
-      std::string root;
-      std::string key;
-      std::string cert;
-      ReadFile(ssl_options.root_certificates, root);
-      ReadFile(ssl_options.private_key, key);
-      ReadFile(ssl_options.certificate_chain, cert);
-      grpc::SslCredentialsOptions opts = {root, key, cert};
-      credentials = grpc::SslCredentials(opts);
-    } else {
-      credentials = grpc::InsecureChannelCredentials();
-    }
-    std::shared_ptr<grpc::Channel> channel =
-        grpc::CreateCustomChannel(url, credentials, arguments);
-    grpc_channel_map_.insert(std::make_pair(url, channel));
-    return channel;
-  }
-}
-
-}  // namespace
-
-//==============================================================================
-// An GrpcInferRequest represents an inflght inference request on gRPC.
-//
-class GrpcInferRequest {
- public:
-  GrpcInferRequest(TFServeOnCompleteFn callback = nullptr)
-      : callback_(callback), grpc_status_(),
-        grpc_response_(std::make_shared<tensorflow::serving::PredictResponse>())
-  {
-  }
-
-  tc::RequestTimers& Timer() { return timer_; }
-  friend GrpcClient;
-
- private:
-  TFServeOnCompleteFn callback_;
-  // Variables for GRPC call
-  grpc::ClientContext grpc_context_;
-  grpc::Status grpc_status_;
-  std::shared_ptr<tensorflow::serving::PredictResponse> grpc_response_;
-  // The timers for infer request.
-  tc::RequestTimers timer_;
-};
-
-//==============================================================================
-
-Error
-GrpcClient::Create(
-    std::unique_ptr<GrpcClient>* client, const std::string& server_url,
-    bool verbose, bool use_ssl, const SslOptions& ssl_options)
-{
-  client->reset(new GrpcClient(server_url, verbose, use_ssl, ssl_options));
-  return Error::Success;
-}
-
-Error
-GrpcClient::ModelMetadata(
-    tensorflow::serving::GetModelMetadataResponse* model_metadata,
-    const std::string& model_name, const std::string& model_version,
-    const Headers& headers)
-{
-  model_metadata->Clear();
-  Error err;
-
-  tensorflow::serving::GetModelMetadataRequest request;
-  grpc::ClientContext context;
-
-  for (const auto& it : headers) {
-    context.AddMetadata(it.first, it.second);
-  }
-
-  request.mutable_model_spec()->set_name(model_name);
-  if (!model_version.empty()) {
-    request.mutable_model_spec()->set_version_label(model_version);
-  }
-  request.add_metadata_field("signature_def");
-  grpc::Status grpc_status =
-      stub_->GetModelMetadata(&context, request, model_metadata);
-  if (grpc_status.ok()) {
-    if (verbose_) {
-      std::cout << model_metadata->DebugString() << std::endl;
-    }
-  } else {
-    err = Error(grpc_status.error_message());
-  }
-
-  return err;
-}
-
-Error
-GrpcClient::Infer(
-    InferResult** result, const InferOptions& options,
-    const std::vector<InferInput*>& inputs,
-    const std::vector<const InferRequestedOutput*>& outputs,
-    const Headers& headers,
-    const grpc_compression_algorithm compression_algorithm)
-{
-  Error err;
-
-  grpc::ClientContext context;
-
-  std::shared_ptr<GrpcInferRequest> sync_request(new GrpcInferRequest());
-
-  sync_request->Timer().Reset();
-  sync_request->Timer().CaptureTimestamp(
-      tc::RequestTimers::Kind::REQUEST_START);
-  // Use send timer to measure time for marshalling infer request
-  sync_request->Timer().CaptureTimestamp(tc::RequestTimers::Kind::SEND_START);
-  for (const auto& it : headers) {
-    context.AddMetadata(it.first, it.second);
-  }
-  context.set_compression_algorithm(compression_algorithm);
-
-  err = PreRunProcessing(options, inputs, outputs);
-  sync_request->Timer().CaptureTimestamp(tc::RequestTimers::Kind::SEND_END);
-  if (!err.IsOk()) {
-    return err;
-  }
-  sync_request->grpc_response_->Clear();
-  sync_request->grpc_status_ = stub_->Predict(
-      &context, infer_request_, sync_request->grpc_response_.get());
-
-  if (!sync_request->grpc_status_.ok()) {
-    err = Error(sync_request->grpc_status_.error_message());
-  }
-
-  sync_request->Timer().CaptureTimestamp(tc::RequestTimers::Kind::RECV_START);
-  InferResult::Create(result, sync_request->grpc_response_, err);
-  sync_request->Timer().CaptureTimestamp(tc::RequestTimers::Kind::RECV_END);
-
-  sync_request->Timer().CaptureTimestamp(tc::RequestTimers::Kind::REQUEST_END);
-
-  tc::Error update_err = UpdateInferStat(sync_request->Timer());
-  if (!update_err.IsOk()) {
-    std::cerr << "Failed to update context stat: " << update_err << std::endl;
-  }
-
-  if (sync_request->grpc_status_.ok()) {
-    if (verbose_) {
-      std::cout << sync_request->grpc_response_->DebugString() << std::endl;
-    }
-  }
-
-  return (*result)->RequestStatus();
-}
-
-Error
-GrpcClient::AsyncInfer(
-    TFServeOnCompleteFn callback, const InferOptions& options,
-    const std::vector<InferInput*>& inputs,
-    const std::vector<const InferRequestedOutput*>& outputs,
-    const Headers& headers,
-    const grpc_compression_algorithm compression_algorithm)
-{
-  if (callback == nullptr) {
-    return Error(
-        "Callback function must be provided along with AsyncInfer() call.");
-  }
-  if (!worker_.joinable()) {
-    worker_ = std::thread(&GrpcClient::AsyncTransfer, this);
-  }
-
-  GrpcInferRequest* async_request;
-  async_request = new GrpcInferRequest(std::move(callback));
-
-  async_request->Timer().CaptureTimestamp(
-      tc::RequestTimers::Kind::REQUEST_START);
-  async_request->Timer().CaptureTimestamp(tc::RequestTimers::Kind::SEND_START);
-  for (const auto& it : headers) {
-    async_request->grpc_context_.AddMetadata(it.first, it.second);
-  }
-  async_request->grpc_context_.set_compression_algorithm(compression_algorithm);
-
-  Error err = PreRunProcessing(options, inputs, outputs);
-  if (!err.IsOk()) {
-    delete async_request;
-    return err;
-  }
-
-  async_request->Timer().CaptureTimestamp(tc::RequestTimers::Kind::SEND_END);
-
-  std::unique_ptr<
-      grpc::ClientAsyncResponseReader<tensorflow::serving::PredictResponse>>
-      rpc(stub_->PrepareAsyncPredict(
-          &async_request->grpc_context_, infer_request_,
-          &async_request_completion_queue_));
-
-  rpc->StartCall();
-
-  rpc->Finish(
-      async_request->grpc_response_.get(), &async_request->grpc_status_,
-      (void*)async_request);
-
-  if (verbose_) {
-    std::cout << "Sent request";
-    if (options.request_id_.size() != 0) {
-      std::cout << " '" << options.request_id_ << "'";
-    }
-    std::cout << std::endl;
-  }
-
-  return Error::Success;
-}
-
-void
-GrpcClient::AsyncTransfer()
-{
-  while (!exiting_) {
-    // GRPC async APIs are thread-safe https://github.com/grpc/grpc/issues/4486
-    GrpcInferRequest* raw_async_request;
-    bool ok = true;
-    bool status =
-        async_request_completion_queue_.Next((void**)(&raw_async_request), &ok);
-    std::shared_ptr<GrpcInferRequest> async_request;
-    if (!ok) {
-      fprintf(stderr, "Unexpected not ok on client side.\n");
-    }
-    if (!status) {
-      if (!exiting_) {
-        fprintf(stderr, "Completion queue is closed.\n");
-      }
-    } else if (raw_async_request == nullptr) {
-      fprintf(stderr, "Unexpected null tag received at client.\n");
-    } else {
-      async_request.reset(raw_async_request);
-      InferResult* async_result;
-      Error err;
-      if (!async_request->grpc_status_.ok()) {
-        err = Error(async_request->grpc_status_.error_message());
-      }
-      async_request->Timer().CaptureTimestamp(
-          tc::RequestTimers::Kind::RECV_START);
-      InferResult::Create(&async_result, async_request->grpc_response_, err);
-      async_request->Timer().CaptureTimestamp(
-          tc::RequestTimers::Kind::RECV_END);
-      async_request->Timer().CaptureTimestamp(
-          tc::RequestTimers::Kind::REQUEST_END);
-      tc::Error update_err = UpdateInferStat(async_request->Timer());
-      if (!update_err.IsOk()) {
-        std::cerr << "Failed to update context stat: " << update_err
-                  << std::endl;
-      }
-      if (async_request->grpc_status_.ok()) {
-        if (verbose_) {
-          std::cout << async_request->grpc_response_->DebugString()
-                    << std::endl;
-        }
-      }
-      async_request->callback_(async_result);
-    }
-  }
-}
-
-Error
-GrpcClient::PreRunProcessing(
-    const InferOptions& options, const std::vector<InferInput*>& inputs,
-    const std::vector<const InferRequestedOutput*>& outputs)
-{
-  // Populate the request protobuf
-
-  // Describing model name and signature from remote server.
-  infer_request_.mutable_model_spec()->set_name(options.model_name_);
-  if (!options.model_version_.empty()) {
-    infer_request_.mutable_model_spec()->set_version_label(
-        options.model_version_);
-  }
-  if (!options.model_signature_name_.empty()) {
-    infer_request_.mutable_model_spec()->set_signature_name(
-        options.model_signature_name_);
-  }
-
-  // Describing remote model inputs shape.
-  StringKeyedProtos& keyed_proto_inputs = *infer_request_.mutable_inputs();
-  std::set<std::string> request_inputs;
-
-  for (const auto input : inputs) {
-    auto raw_input = dynamic_cast<TFServeInferInput*>(input);
-    request_inputs.insert(raw_input->Name());
-    // Add new TensorProto submessages only if required, otherwise
-    // reuse the submessages already available.
-    auto itr = keyed_proto_inputs.find(raw_input->Name());
-    if (itr == keyed_proto_inputs.end()) {
-      itr = keyed_proto_inputs
-                .insert(google::protobuf::MapPair<
-                        std::string, tensorflow::TensorProto>(
-                    raw_input->Name(), tensorflow::TensorProto()))
-                .first;
-    }
-
-    // Set datatype
-    tensorflow::DataType tf_dtype = tensorflow::DT_INVALID;
-    GetTensorFlowDataType(raw_input->Datatype(), &tf_dtype);
-    itr->second.set_dtype(tf_dtype);
-    if (tf_dtype == tensorflow::DT_INVALID) {
-      return Error(
-          "failed to retrieve the TF datatype for " + raw_input->Name());
-    }
-
-    // Populate the shape
-    itr->second.mutable_tensor_shape()->Clear();
-    for (const auto dim : raw_input->Shape()) {
-      itr->second.mutable_tensor_shape()->add_dim()->set_size(dim);
-    }
-
-    raw_input->PrepareForRequest();
-    // There is an extra copy into the buffer to collect all the input
-    // batches. This is a room for improvement for later.
-    bool end_of_input = false;
-
-    // auto* raw_contents = itr->second.mutable_float_val()->mutable_data();
-    size_t content_size;
-    raw_input->ByteSize(&content_size);
-    temp_buffer_.clear();
-    temp_buffer_.reserve(content_size);
-    while (!end_of_input) {
-      const uint8_t* buf;
-      size_t buf_size;
-      raw_input->GetNext(&buf, &buf_size, &end_of_input);
-      if (buf != nullptr) {
-        temp_buffer_.append(reinterpret_cast<const char*>(buf), buf_size);
-      }
-    }
-    ClearAllInputFields(&itr->second);
-    PopulateInputData(raw_input, &itr->second);
-  }
-
-  // Remove extra tensor protos, if any.
-  std::set<std::string> extra_inputs;
-  for (const auto& iter : keyed_proto_inputs) {
-    if (request_inputs.find(iter.first) == request_inputs.end()) {
-      extra_inputs.insert(iter.first);
-    }
-  }
-  for (const auto& extra_input : extra_inputs) {
-    keyed_proto_inputs.erase(extra_input);
-  }
-
-  if (infer_request_.ByteSizeLong() > INT_MAX) {
-    size_t request_size = infer_request_.ByteSizeLong();
-    infer_request_.Clear();
-    return Error(
-        "Request has byte size " + std::to_string(request_size) +
-        " which exceed gRPC's byte size limit " + std::to_string(INT_MAX) +
-        ".");
-  }
-
-  return Error::Success;
-}
-
-Error
-GrpcClient::ClearAllInputFields(tensorflow::TensorProto* input_tensor_proto)
-{
-  input_tensor_proto->mutable_half_val()->Clear();
-  input_tensor_proto->mutable_float_val()->Clear();
-  input_tensor_proto->mutable_double_val()->Clear();
-  input_tensor_proto->mutable_int_val()->Clear();
-  input_tensor_proto->mutable_string_val()->Clear();
-  input_tensor_proto->mutable_int64_val()->Clear();
-  input_tensor_proto->mutable_bool_val()->Clear();
-  input_tensor_proto->mutable_uint32_val()->Clear();
-  input_tensor_proto->mutable_uint64_val()->Clear();
-
-  return Error::Success;
-}
-
-Error
-GrpcClient::PopulateInputData(
-    TFServeInferInput* input, tensorflow::TensorProto* input_tensor_proto)
-{
-  if (input->Datatype() == "FP16") {
-    RETURN_IF_CB_ERROR(PopulateHalfVal(input_tensor_proto));
-  } else if (input->Datatype() == "BF16") {
-    return Error(
-        "BF16 datatype not currently supported for populating input data.");
-  } else if (input->Datatype() == "FP32") {
-    RETURN_IF_CB_ERROR(PopulateFloatVal(input_tensor_proto));
-  } else if (input->Datatype() == "FP64") {
-    RETURN_IF_CB_ERROR(PopulateDoubleVal(input_tensor_proto));
-  } else if (input->Datatype() == "INT32") {
-    RETURN_IF_CB_ERROR(PopulateIntVal(input_tensor_proto));
-  } else if (input->Datatype() == "INT16") {
-    RETURN_IF_CB_ERROR(PopulateIntVal(input_tensor_proto, 2));
-  } else if (input->Datatype() == "UINT16") {
-    RETURN_IF_CB_ERROR(PopulateIntVal(input_tensor_proto, 2));
-  } else if (input->Datatype() == "INT8") {
-    RETURN_IF_CB_ERROR(PopulateIntVal(input_tensor_proto, 1));
-  } else if (input->Datatype() == "UINT8") {
-    RETURN_IF_CB_ERROR(PopulateIntVal(input_tensor_proto, 1));
-  } else if (input->Datatype() == "BYTES") {
-    RETURN_IF_CB_ERROR(PopulateStrVal(input_tensor_proto));
-  } else if (input->Datatype() == "INT64") {
-    RETURN_IF_CB_ERROR(PopulateInt64Val(input_tensor_proto));
-  } else if (input->Datatype() == "BOOL") {
-    RETURN_IF_CB_ERROR(PopulateBoolVal(input_tensor_proto));
-  } else if (input->Datatype() == "UINT32") {
-    RETURN_IF_CB_ERROR(PopulateUintVal(input_tensor_proto));
-  } else if (input->Datatype() == "UINT64") {
-    RETURN_IF_CB_ERROR(PopulateUint64Val(input_tensor_proto));
-  } else {
-    return Error("unsupported datatype for populating input data");
-  }
-
-  return Error::Success;
-}
-
-Error
-GrpcClient::PopulateHalfVal(tensorflow::TensorProto* input_tensor_proto)
-{
-  // Building FP16 one by one. Note that since protobuf has no int16 type, we'll
-  // have some pointless zero padding for each value here.
-  input_tensor_proto->mutable_half_val()->Reserve(2 * temp_buffer_.size());
-  uint64_t copied_byte_size = 0;
-  while (copied_byte_size < temp_buffer_.size()) {
-    int32_t elem;
-    memcpy(&elem, (temp_buffer_.c_str() + copied_byte_size), 2);
-    input_tensor_proto->add_half_val(elem);
-    copied_byte_size += 2;
-  }
-
-  return Error::Success;
-}
-
-Error
-GrpcClient::PopulateFloatVal(tensorflow::TensorProto* input_tensor_proto)
-{
-  input_tensor_proto->mutable_float_val()->Reserve(temp_buffer_.size());
-  uint64_t copied_byte_size = 0;
-  while (copied_byte_size < temp_buffer_.size()) {
-    input_tensor_proto->add_float_val(
-        *(float*)(temp_buffer_.c_str() + copied_byte_size));
-    copied_byte_size += sizeof(float);
-  }
-
-  return Error::Success;
-}
-
-Error
-GrpcClient::PopulateDoubleVal(tensorflow::TensorProto* input_tensor_proto)
-{
-  input_tensor_proto->mutable_double_val()->Reserve(temp_buffer_.size());
-  uint64_t copied_byte_size = 0;
-  while (copied_byte_size < temp_buffer_.size()) {
-    input_tensor_proto->add_double_val(
-        *(double*)(temp_buffer_.c_str() + copied_byte_size));
-    copied_byte_size += sizeof(double);
-  }
-
-  return Error::Success;
-}
-
-Error
-GrpcClient::PopulateIntVal(
-    tensorflow::TensorProto* input_tensor_proto, size_t step_size)
-{
-  if (step_size == 4) {
-    input_tensor_proto->mutable_int_val()->Reserve(temp_buffer_.size());
-    uint64_t copied_byte_size = 0;
-    while (copied_byte_size < temp_buffer_.size()) {
-      input_tensor_proto->add_int_val(
-          *(int*)(temp_buffer_.c_str() + copied_byte_size));
-      copied_byte_size += sizeof(int);
-    }
-  } else {
-    // Note that since protobuf has no int16/int8 type, we'll
-    // have some pointless zero padding for each value here and
-    // need to build the tensor one element at a time
-    input_tensor_proto->mutable_int_val()->Reserve(
-        temp_buffer_.size() * (4 / step_size));
-    uint64_t copied_byte_size = 0;
-    while (copied_byte_size < temp_buffer_.size()) {
-      int32_t elem;
-      memcpy(&elem, (temp_buffer_.c_str() + copied_byte_size), step_size);
-      input_tensor_proto->add_int_val(elem);
-      copied_byte_size += step_size;
-    }
-  }
-
-  return Error::Success;
-}
-
-Error
-GrpcClient::PopulateStrVal(tensorflow::TensorProto* input_tensor_proto)
-{
-  input_tensor_proto->mutable_string_val()->Reserve(temp_buffer_.size());
-  uint64_t copied_byte_size = 0;
-  while (copied_byte_size < temp_buffer_.size()) {
-    int32_t string_length = *((int*)(temp_buffer_.c_str() + copied_byte_size));
-    input_tensor_proto->add_string_val(std::string(
-        (temp_buffer_.c_str() + copied_byte_size + 4), string_length));
-    copied_byte_size += (string_length + 4);
-  }
-
-  return Error::Success;
-}
-
-Error
-GrpcClient::PopulateBoolVal(tensorflow::TensorProto* input_tensor_proto)
-{
-  input_tensor_proto->mutable_bool_val()->Reserve(temp_buffer_.size());
-  uint64_t copied_byte_size = 0;
-  while (copied_byte_size < temp_buffer_.size()) {
-    input_tensor_proto->add_bool_val(
-        *(bool*)(temp_buffer_.c_str() + copied_byte_size));
-    copied_byte_size += sizeof(bool);
-  }
-
-  return Error::Success;
-}
-
-Error
-GrpcClient::PopulateInt64Val(tensorflow::TensorProto* input_tensor_proto)
-{
-  input_tensor_proto->mutable_int64_val()->Reserve(temp_buffer_.size());
-  uint64_t copied_byte_size = 0;
-  while (copied_byte_size < temp_buffer_.size()) {
-    input_tensor_proto->add_bool_val(
-        *(int64_t*)(temp_buffer_.c_str() + copied_byte_size));
-    copied_byte_size += sizeof(int64_t);
-  }
-
-  return Error::Success;
-}
-
-Error
-GrpcClient::PopulateUintVal(tensorflow::TensorProto* input_tensor_proto)
-{
-  input_tensor_proto->mutable_uint32_val()->Reserve(temp_buffer_.size());
-  uint64_t copied_byte_size = 0;
-  while (copied_byte_size < temp_buffer_.size()) {
-    input_tensor_proto->add_uint32_val(
-        *(uint32_t*)(temp_buffer_.c_str() + copied_byte_size));
-    copied_byte_size += sizeof(uint32_t);
-  }
-
-  return Error::Success;
-}
-
-Error
-GrpcClient::PopulateUint64Val(tensorflow::TensorProto* input_tensor_proto)
-{
-  input_tensor_proto->mutable_uint64_val()->Reserve(temp_buffer_.size());
-  uint64_t copied_byte_size = 0;
-  while (copied_byte_size < temp_buffer_.size()) {
-    input_tensor_proto->add_uint64_val(
-        *(uint64_t*)(temp_buffer_.c_str() + copied_byte_size));
-    copied_byte_size += sizeof(uint64_t);
-  }
-
-  return Error::Success;
-}
-
-GrpcClient::GrpcClient(
-    const std::string& url, bool verbose, bool use_ssl,
-    const SslOptions& ssl_options)
-    : InferenceServerClient(verbose),
-      stub_(tensorflow::serving::PredictionService::NewStub(
-          GetChannel(url, use_ssl, ssl_options)))
-{
-}
-
-GrpcClient::~GrpcClient()
-{
-  exiting_ = true;
-  // Close complete queue and wait for the worker thread to return
-  async_request_completion_queue_.Shutdown();
-
-  // thread not joinable if AsyncInfer() is not called
-  // (it is default constructed thread before the first AsyncInfer() call)
-  if (worker_.joinable()) {
-    worker_.join();
-  }
-
-  bool has_next = true;
-  GrpcInferRequest* async_request;
-  bool ok;
-  do {
-    has_next =
-        async_request_completion_queue_.Next((void**)&async_request, &ok);
-    if (has_next && async_request != nullptr) {
-      delete async_request;
-    }
-  } while (has_next);
-}
-
-//======================================================================
-
-Error
-InferResult::Create(
-    InferResult** infer_result,
-    std::shared_ptr<tensorflow::serving::PredictResponse> response,
-    Error& request_status)
-{
-  *infer_result =
-      reinterpret_cast<InferResult*>(new InferResult(response, request_status));
-  return Error::Success;
-}
-
-Error
-InferResult::RequestStatus() const
-{
-  return request_status_;
-}
-
-InferResult::InferResult(
-    std::shared_ptr<tensorflow::serving::PredictResponse> response,
-    Error& request_status)
-    : response_(response), request_status_(request_status)
-{
-}
-
-//======================================================================
-
-}}}}  // namespace triton::perfanalyzer::clientbackend::tfserving
diff --git a/src/c++/perf_analyzer/client_backend/tensorflow_serving/tfserve_grpc_client.h b/src/c++/perf_analyzer/client_backend/tensorflow_serving/tfserve_grpc_client.h
deleted file mode 100644
index bfa475b8c..000000000
--- a/src/c++/perf_analyzer/client_backend/tensorflow_serving/tfserve_grpc_client.h
+++ /dev/null
@@ -1,220 +0,0 @@
-// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <grpc++/grpc++.h>
-
-#include "../client_backend.h"
-#include "common.h"
-#include "tensorflow_serving/apis/prediction_service.grpc.pb.h"
-#include "tfserve_infer_input.h"
-
-namespace tc = triton::client;
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace tfserving {
-
-struct SslOptions {
-  explicit SslOptions() {}
-  // File containing the PEM encoding of the server root certificates.
-  // If this parameter is empty, the default roots will be used. The
-  // default roots can be overridden using the
-  // GRPC_DEFAULT_SSL_ROOTS_FILE_PATH environment variable pointing
-  // to a file on the file system containing the roots.
-  std::string root_certificates;
-  // File containing the PEM encoding of the client's private key.
-  // This parameter can be empty if the client does not have a
-  // private key.
-  std::string private_key;
-  // File containing the PEM encoding of the client's certificate chain.
-  // This parameter can be empty if the client does not have a
-  // certificate chain.
-  std::string certificate_chain;
-};
-
-class InferResult;
-
-using TFServeOnCompleteFn = std::function<void(InferResult*)>;
-
-//==============================================================================
-/// An GrpcClient object is used to perform any kind of communication with the
-/// TFserving service using gRPC protocol. None of the functions are thread
-/// safe.
-///
-/// \code
-///   std::unique_ptr<GrpcClient> client;
-///   GrpcClient::Create(&client, "localhost:8500");
-///   ...
-///   ...
-/// \endcode
-///
-class GrpcClient : public tc::InferenceServerClient {
- public:
-  ~GrpcClient();
-
-  /// Create a client that can be used to communicate with the server.
-  /// \param client Returns a new InferenceServerGrpcClient object.
-  /// \param server_url The inference server name and port.
-  /// \param verbose If true generate verbose output when contacting
-  /// the inference server.
-  /// \param use_ssl If true use encrypted channel to the server.
-  /// \param ssl_options Specifies the files required for
-  /// SSL encryption and authorization.
-  /// \return Error object indicating success or failure.
-  static Error Create(
-      std::unique_ptr<GrpcClient>* client, const std::string& server_url,
-      bool verbose = false, bool use_ssl = false,
-      const SslOptions& ssl_options = SslOptions());
-
-  /// Contact the inference server and get the metadata of specified model.
-  /// \param model_metadata Returns model metadata as ModelMetadataResponse
-  /// message.
-  /// \param model_name The name of the model to get metadata.
-  /// \param model_version The version of the model to get metadata.
-  /// The default value is an empty string which means then the server will
-  /// choose a version based on the model and internal policy.
-  /// \param headers Optional map specifying additional HTTP headers to include
-  /// in the metadata of gRPC request.
-  /// \return Error object indicating success or failure of the request.
-  Error ModelMetadata(
-      tensorflow::serving::GetModelMetadataResponse* model_metadata,
-      const std::string& model_name, const std::string& model_version = "",
-      const Headers& headers = Headers());
-
-  /// Run synchronous inference on server.
-  /// \param result Returns the result of inference.
-  /// \param options The options for inference request.
-  /// \param inputs The vector of InferInput describing the model inputs.
-  /// \param outputs Optional vector of InferRequestedOutput describing how the
-  /// output must be returned. If not provided then all the outputs in the model
-  /// config will be returned as default settings.
-  /// \param headers Optional map specifying additional HTTP headers to include
-  /// in the metadata of gRPC request.
-  /// \param compression_algorithm The compression algorithm to be used
-  /// on the grpc requests.
-  /// \return Error object indicating success or failure of the
-  /// request.
-  Error Infer(
-      InferResult** result, const InferOptions& options,
-      const std::vector<InferInput*>& inputs,
-      const std::vector<const InferRequestedOutput*>& outputs =
-          std::vector<const InferRequestedOutput*>(),
-      const Headers& headers = Headers(),
-      const grpc_compression_algorithm compression_algorithm =
-          GRPC_COMPRESS_NONE);
-
-  /// Run asynchronous inference on server.
-  /// Once the request is completed, the InferResult pointer will be passed to
-  /// the provided 'callback' function. Upon the invocation of callback
-  /// function, the ownership of InferResult object is transferred to the
-  /// function caller. It is then the caller's choice on either retrieving the
-  /// results inside the callback function or deferring it to a different thread
-  /// so that the client is unblocked. In order to prevent memory leak, user
-  /// must ensure this object gets deleted.
-  /// \param callback The callback function to be invoked on request completion.
-  /// \param options The options for inference request.
-  /// \param inputs The vector of InferInput describing the model inputs.
-  /// \param outputs Optional vector of InferRequestedOutput describing how the
-  /// output must be returned. If not provided then all the outputs in the model
-  /// config will be returned as default settings.
-  /// \param headers Optional map specifying additional HTTP headers to include
-  /// in the metadata of gRPC request.
-  /// \param compression_algorithm The compression algorithm to be used
-  /// on the grpc requests.
-  /// \return Error object indicating success or failure of the request.
-  Error AsyncInfer(
-      TFServeOnCompleteFn callback, const InferOptions& options,
-      const std::vector<InferInput*>& inputs,
-      const std::vector<const InferRequestedOutput*>& outputs =
-          std::vector<const InferRequestedOutput*>(),
-      const Headers& headers = Headers(),
-      const grpc_compression_algorithm compression_algorithm =
-          GRPC_COMPRESS_NONE);
-
- private:
-  GrpcClient(
-      const std::string& url, bool verbose, bool use_ssl,
-      const SslOptions& ssl_options);
-  Error PreRunProcessing(
-      const InferOptions& options, const std::vector<InferInput*>& inputs,
-      const std::vector<const InferRequestedOutput*>& outputs);
-  void AsyncTransfer();
-  Error ClearAllInputFields(tensorflow::TensorProto* input_tensor_proto);
-  Error PopulateInputData(
-      TFServeInferInput* input, tensorflow::TensorProto* input_tensor_proto);
-  Error PopulateHalfVal(tensorflow::TensorProto* input_tensor_proto);
-  Error PopulateFloatVal(tensorflow::TensorProto* input_tensor_proto);
-  Error PopulateDoubleVal(tensorflow::TensorProto* input_tensor_proto);
-  Error PopulateIntVal(
-      tensorflow::TensorProto* input_tensor_proto, size_t step_size = 4);
-  Error PopulateStrVal(tensorflow::TensorProto* input_tensor_proto);
-  Error PopulateBoolVal(tensorflow::TensorProto* input_tensor_proto);
-  Error PopulateInt64Val(tensorflow::TensorProto* input_tensor_proto);
-  Error PopulateUintVal(tensorflow::TensorProto* input_tensor_proto);
-  Error PopulateUint64Val(tensorflow::TensorProto* input_tensor_proto);
-
-  // The producer-consumer queue used to communicate asynchronously with
-  // the GRPC runtime.
-  grpc::CompletionQueue async_request_completion_queue_;
-
-  bool enable_stream_stats_;
-  std::mutex stream_mutex_;
-
-  // GRPC end point.
-  std::unique_ptr<tensorflow::serving::PredictionService::Stub> stub_;
-  // request for GRPC call, one request object can be used for multiple calls
-  // since it can be overwritten as soon as the GRPC send finishes.
-  tensorflow::serving::PredictRequest infer_request_;
-  // A temporary buffer to hold serialized data
-  std::string temp_buffer_;
-};
-
-//======================================================================
-
-class InferResult {
- public:
-  static Error Create(
-      InferResult** infer_result,
-      std::shared_ptr<tensorflow::serving::PredictResponse> response,
-      Error& request_status);
-
-
-  Error RequestStatus() const;
-  Error Id(std::string* id) const;
-  std::string DebugString() const { return response_->DebugString(); }
-
- private:
-  InferResult(
-      std::shared_ptr<tensorflow::serving::PredictResponse> response,
-      Error& request_status);
-
-  std::shared_ptr<tensorflow::serving::PredictResponse> response_;
-  Error request_status_;
-};
-
-//======================================================================
-
-}}}}  // namespace triton::perfanalyzer::clientbackend::tfserving
diff --git a/src/c++/perf_analyzer/client_backend/tensorflow_serving/tfserve_infer_input.cc b/src/c++/perf_analyzer/client_backend/tensorflow_serving/tfserve_infer_input.cc
deleted file mode 100644
index 60edf87e7..000000000
--- a/src/c++/perf_analyzer/client_backend/tensorflow_serving/tfserve_infer_input.cc
+++ /dev/null
@@ -1,112 +0,0 @@
-// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "tfserve_infer_input.h"
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace tfserving {
-
-Error
-TFServeInferInput::Create(
-    InferInput** infer_input, const std::string& name,
-    const std::vector<int64_t>& dims, const std::string& datatype)
-{
-  TFServeInferInput* local_infer_input =
-      new TFServeInferInput(name, dims, datatype);
-
-  *infer_input = local_infer_input;
-  return Error::Success;
-}
-
-Error
-TFServeInferInput::SetShape(const std::vector<int64_t>& shape)
-{
-  shape_ = shape;
-  return Error::Success;
-}
-
-Error
-TFServeInferInput::Reset()
-{
-  bufs_.clear();
-  buf_byte_sizes_.clear();
-  bufs_idx_ = 0;
-  byte_size_ = 0;
-  return Error::Success;
-}
-
-Error
-TFServeInferInput::AppendRaw(const uint8_t* input, size_t input_byte_size)
-{
-  byte_size_ += input_byte_size;
-
-  bufs_.push_back(input);
-  buf_byte_sizes_.push_back(input_byte_size);
-
-  return Error::Success;
-}
-
-Error
-TFServeInferInput::ByteSize(size_t* byte_size) const
-{
-  *byte_size = byte_size_;
-  return Error::Success;
-}
-
-Error
-TFServeInferInput::PrepareForRequest()
-{
-  // Reset position so request sends entire input.
-  bufs_idx_ = 0;
-  buf_pos_ = 0;
-  return Error::Success;
-}
-
-Error
-TFServeInferInput::GetNext(
-    const uint8_t** buf, size_t* input_bytes, bool* end_of_input)
-{
-  if (bufs_idx_ < bufs_.size()) {
-    *buf = bufs_[bufs_idx_];
-    *input_bytes = buf_byte_sizes_[bufs_idx_];
-    bufs_idx_++;
-  } else {
-    *buf = nullptr;
-    *input_bytes = 0;
-  }
-  *end_of_input = (bufs_idx_ >= bufs_.size());
-
-  return Error::Success;
-}
-
-TFServeInferInput::TFServeInferInput(
-    const std::string& name, const std::vector<int64_t>& dims,
-    const std::string& datatype)
-    : InferInput(BackendKind::TENSORFLOW_SERVING, name, datatype), shape_(dims)
-{
-}
-
-}}}}  // namespace triton::perfanalyzer::clientbackend::tfserving
diff --git a/src/c++/perf_analyzer/client_backend/tensorflow_serving/tfserve_infer_input.h b/src/c++/perf_analyzer/client_backend/tensorflow_serving/tfserve_infer_input.h
deleted file mode 100644
index ec1a35dd9..000000000
--- a/src/c++/perf_analyzer/client_backend/tensorflow_serving/tfserve_infer_input.h
+++ /dev/null
@@ -1,76 +0,0 @@
-// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <string>
-
-#include "../../perf_utils.h"
-#include "../client_backend.h"
-
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace tfserving {
-
-//==============================================================
-/// TFServeInferInput instance holds the information regarding
-/// model input tensors and their corresponding generated data.
-///
-class TFServeInferInput : public InferInput {
- public:
-  static Error Create(
-      InferInput** infer_input, const std::string& name,
-      const std::vector<int64_t>& dims, const std::string& datatype);
-  /// See InferInput::Shape()
-  const std::vector<int64_t>& Shape() const override { return shape_; }
-  /// See InferInput::SetShape()
-  Error SetShape(const std::vector<int64_t>& shape) override;
-  /// See InferInput::Reset()
-  Error Reset() override;
-  /// See InferInput::AppendRaw()
-  Error AppendRaw(const uint8_t* input, size_t input_byte_size) override;
-  /// Gets the size of data added into this input in bytes.
-  /// \param byte_size The size of data added in bytes.
-  /// \return Error object indicating success or failure.
-  Error ByteSize(size_t* byte_size) const;
-  /// Resets the heads to start providing data from the beginning.
-  Error PrepareForRequest();
-  /// Get the next chunk of data if available.
-  Error GetNext(const uint8_t** buf, size_t* input_bytes, bool* end_of_input);
-
- private:
-  explicit TFServeInferInput(
-      const std::string& name, const std::vector<int64_t>& dims,
-      const std::string& datatype);
-
-  std::vector<int64_t> shape_;
-  size_t byte_size_{0};
-
-  size_t bufs_idx_, buf_pos_;
-  std::vector<const uint8_t*> bufs_;
-  std::vector<size_t> buf_byte_sizes_;
-};
-
-}}}}  // namespace triton::perfanalyzer::clientbackend::tfserving
diff --git a/src/c++/perf_analyzer/client_backend/torchserve/CMakeLists.txt b/src/c++/perf_analyzer/client_backend/torchserve/CMakeLists.txt
deleted file mode 100644
index 19e4c6245..000000000
--- a/src/c++/perf_analyzer/client_backend/torchserve/CMakeLists.txt
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-cmake_minimum_required (VERSION 3.18)
-
-set(
-    TS_CLIENT_BACKEND_SRCS
-    torchserve_client_backend.cc
-    torchserve_infer_input.cc
-    torchserve_http_client.cc
-)
-
-set(
-    TS_CLIENT_BACKEND_HDRS
-    torchserve_client_backend.h
-    torchserve_infer_input.h
-    torchserve_http_client.h
-)
-
-add_library(
-    ts-client-backend-library  EXCLUDE_FROM_ALL OBJECT
-    ${TS_CLIENT_BACKEND_SRCS}
-    ${TS_CLIENT_BACKEND_HDRS}
-)
-
-target_link_libraries(
-  ts-client-backend-library
-  PUBLIC CURL::libcurl
-  PUBLIC httpclient_static
-)
-
-if(${TRITON_ENABLE_GPU})
-    target_include_directories(ts-client-backend-library PUBLIC ${CUDA_INCLUDE_DIRS})
-    target_link_libraries(ts-client-backend-library PRIVATE ${CUDA_LIBRARIES})
-endif() # TRITON_ENABLE_GPU
diff --git a/src/c++/perf_analyzer/client_backend/torchserve/torchserve_client_backend.cc b/src/c++/perf_analyzer/client_backend/torchserve/torchserve_client_backend.cc
deleted file mode 100644
index 76e62c6c0..000000000
--- a/src/c++/perf_analyzer/client_backend/torchserve/torchserve_client_backend.cc
+++ /dev/null
@@ -1,126 +0,0 @@
-// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "torchserve_client_backend.h"
-
-#include "json_utils.h"
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace torchserve {
-
-
-//==============================================================================
-
-Error
-TorchServeClientBackend::Create(
-    const std::string& url, const ProtocolType protocol,
-    std::shared_ptr<Headers> http_headers, const bool verbose,
-    std::unique_ptr<ClientBackend>* client_backend)
-{
-  if (protocol == ProtocolType::GRPC) {
-    return Error(
-        "perf_analyzer does not support gRPC protocol with TorchServe");
-  }
-  std::unique_ptr<TorchServeClientBackend> torchserve_client_backend(
-      new TorchServeClientBackend(http_headers));
-  RETURN_IF_CB_ERROR(ts::HttpClient::Create(
-      &(torchserve_client_backend->http_client_), url, verbose));
-  *client_backend = std::move(torchserve_client_backend);
-  return Error::Success;
-}
-
-Error
-TorchServeClientBackend::Infer(
-    cb::InferResult** result, const InferOptions& options,
-    const std::vector<InferInput*>& inputs,
-    const std::vector<const InferRequestedOutput*>& outputs)
-{
-  ts::InferResult* torchserve_result;
-  RETURN_IF_CB_ERROR(http_client_->Infer(
-      &torchserve_result, options, inputs, outputs, *http_headers_));
-  *result = new TorchServeInferResult(torchserve_result);
-  return Error::Success;
-}
-
-Error
-TorchServeClientBackend::ClientInferStat(InferStat* infer_stat)
-{
-  // Reusing the common library utilities to collect and report the
-  // client side statistics.
-  tc::InferStat client_infer_stat;
-  RETURN_IF_TRITON_ERROR(http_client_->ClientInferStat(&client_infer_stat));
-  ParseInferStat(client_infer_stat, infer_stat);
-  return Error::Success;
-}
-
-void
-TorchServeClientBackend::ParseInferStat(
-    const tc::InferStat& torchserve_infer_stat, InferStat* infer_stat)
-{
-  infer_stat->completed_request_count =
-      torchserve_infer_stat.completed_request_count;
-  infer_stat->cumulative_total_request_time_ns =
-      torchserve_infer_stat.cumulative_total_request_time_ns;
-  infer_stat->cumulative_send_time_ns =
-      torchserve_infer_stat.cumulative_send_time_ns;
-  infer_stat->cumulative_receive_time_ns =
-      torchserve_infer_stat.cumulative_receive_time_ns;
-}
-
-//==============================================================================
-
-TorchServeInferResult::TorchServeInferResult(ts::InferResult* result)
-{
-  result_.reset(result);
-}
-
-Error
-TorchServeInferResult::Id(std::string* id) const
-{
-  id->clear();
-  return Error::Success;
-}
-
-Error
-TorchServeInferResult::RequestStatus() const
-{
-  RETURN_IF_CB_ERROR(result_->RequestStatus());
-  return Error::Success;
-}
-
-Error
-TorchServeInferResult::RawData(
-    const std::string& output_name, const uint8_t** buf,
-    size_t* byte_size) const
-{
-  return Error(
-      "Output retrieval is not currently supported for TorchServe client "
-      "backend");
-}
-
-//==============================================================================
-
-}}}}  // namespace triton::perfanalyzer::clientbackend::torchserve
diff --git a/src/c++/perf_analyzer/client_backend/torchserve/torchserve_client_backend.h b/src/c++/perf_analyzer/client_backend/torchserve/torchserve_client_backend.h
deleted file mode 100644
index 25566256e..000000000
--- a/src/c++/perf_analyzer/client_backend/torchserve/torchserve_client_backend.h
+++ /dev/null
@@ -1,112 +0,0 @@
-// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <string>
-
-#include "../../perf_utils.h"
-#include "../client_backend.h"
-#include "torchserve_http_client.h"
-
-#define RETURN_IF_TRITON_ERROR(S)       \
-  do {                                  \
-    const tc::Error& status__ = (S);    \
-    if (!status__.IsOk()) {             \
-      return Error(status__.Message()); \
-    }                                   \
-  } while (false)
-
-namespace tc = triton::client;
-namespace cb = triton::perfanalyzer::clientbackend;
-namespace ts = triton::perfanalyzer::clientbackend::torchserve;
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace torchserve {
-
-
-//==============================================================================
-/// TorchServeClientBackend is used to generate load on the Torchserve instance
-///
-class TorchServeClientBackend : public ClientBackend {
- public:
-  /// Create a torchserve client backend which can be used to interact with the
-  /// server.
-  /// \param url The inference server url and port.
-  /// \param protocol The protocol type used.
-  /// \param http_headers Map of HTTP headers. The map key/value indicates
-  /// the header name/value.
-  /// \param verbose Enables the verbose mode.
-  /// \param client_backend Returns a new TorchServeClientBackend
-  /// object.
-  /// \return Error object indicating success or failure.
-  static Error Create(
-      const std::string& url, const ProtocolType protocol,
-      std::shared_ptr<Headers> http_headers, const bool verbose,
-      std::unique_ptr<ClientBackend>* client_backend);
-
-  /// See ClientBackend::Infer()
-  Error Infer(
-      cb::InferResult** result, const InferOptions& options,
-      const std::vector<InferInput*>& inputs,
-      const std::vector<const InferRequestedOutput*>& outputs) override;
-
-  /// See ClientBackend::ClientInferStat()
-  Error ClientInferStat(InferStat* infer_stat) override;
-
- private:
-  TorchServeClientBackend(std::shared_ptr<Headers> http_headers)
-      : ClientBackend(BackendKind::TORCHSERVE), http_headers_(http_headers)
-  {
-  }
-
-  void ParseInferStat(
-      const tc::InferStat& torchserve_infer_stat, InferStat* infer_stat);
-
-  std::unique_ptr<ts::HttpClient> http_client_;
-  std::shared_ptr<Headers> http_headers_;
-};
-
-//==============================================================
-/// TorchServeInferResult is a wrapper around InferResult object of
-/// torchserve InferResult object.
-///
-class TorchServeInferResult : public cb::InferResult {
- public:
-  explicit TorchServeInferResult(ts::InferResult* result);
-  /// See InferResult::Id()
-  Error Id(std::string* id) const override;
-  /// See InferResult::RequestStatus()
-  Error RequestStatus() const override;
-  /// See InferResult::RawData()
-  Error RawData(
-      const std::string& output_name, const uint8_t** buf,
-      size_t* byte_size) const override;
-
- private:
-  std::unique_ptr<ts::InferResult> result_;
-};
-
-}}}}  // namespace triton::perfanalyzer::clientbackend::torchserve
diff --git a/src/c++/perf_analyzer/client_backend/torchserve/torchserve_http_client.cc b/src/c++/perf_analyzer/client_backend/torchserve/torchserve_http_client.cc
deleted file mode 100644
index c835ab109..000000000
--- a/src/c++/perf_analyzer/client_backend/torchserve/torchserve_http_client.cc
+++ /dev/null
@@ -1,409 +0,0 @@
-// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "torchserve_http_client.h"
-
-#include <chrono>
-#include <cstdint>
-
-#include "torchserve_client_backend.h"
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace torchserve {
-
-namespace {
-
-constexpr char kContentLengthHTTPHeader[] = "Content-Length";
-
-//==============================================================================
-
-// Global initialization for libcurl. Libcurl requires global
-// initialization before any other threads are created and before any
-// curl methods are used. The curl_global static object is used to
-// perform this initialization.
-class CurlGlobal {
- public:
-  CurlGlobal();
-  ~CurlGlobal();
-
-  const Error& Status() const { return err_; }
-
- private:
-  Error err_;
-};
-
-CurlGlobal::CurlGlobal() : err_(Error::Success)
-{
-  if (curl_global_init(CURL_GLOBAL_ALL) != 0) {
-    err_ = Error("global initialization failed");
-  }
-}
-
-CurlGlobal::~CurlGlobal()
-{
-  curl_global_cleanup();
-}
-
-static CurlGlobal curl_global;
-
-
-}  // namespace
-
-//==============================================================================
-
-HttpInferRequest::HttpInferRequest()
-    : header_list_(nullptr),
-      file_ptr_(std::unique_ptr<FILE, Deleter>(nullptr, Deleter()))
-{
-}
-
-HttpInferRequest::~HttpInferRequest()
-{
-  if (header_list_ != nullptr) {
-    curl_slist_free_all(static_cast<curl_slist*>(header_list_));
-    header_list_ = nullptr;
-  }
-}
-
-Error
-HttpInferRequest::InitializeRequest()
-{
-  http_code_ = 400;
-  // Prepare buffer to record the response
-  infer_response_buffer_.reset(new std::string());
-  return Error::Success;
-}
-
-Error
-HttpInferRequest::OpenFileData(std::string& file_path)
-{
-  FILE* pFile = fopen(file_path.c_str(), "rb");
-  if (pFile == nullptr) {
-    return Error("Failed to open the specified file `" + file_path + "`");
-  }
-  file_ptr_.reset(pFile);
-  return Error::Success;
-}
-
-long
-HttpInferRequest::FileSize()
-{
-  long size;
-  fseek(file_ptr_.get(), 0, SEEK_END);
-  size = ftell(file_ptr_.get());
-  rewind(file_ptr_.get());
-  return size;
-}
-
-Error
-HttpInferRequest::CloseFileData()
-{
-  file_ptr_.reset(nullptr);
-  return Error::Success;
-}
-
-
-//==============================================================================
-
-Error
-HttpClient::Create(
-    std::unique_ptr<HttpClient>* client, const std::string& server_url,
-    bool verbose)
-{
-  client->reset(new HttpClient(server_url, verbose));
-  return Error::Success;
-}
-
-Error
-HttpClient::Infer(
-    InferResult** result, const InferOptions& options,
-    const std::vector<InferInput*>& inputs,
-    const std::vector<const InferRequestedOutput*>& outputs,
-    const Headers& headers)
-{
-  Error err;
-
-  std::string request_uri(url_ + "/predictions/" + options.model_name_);
-  if (!options.model_version_.empty()) {
-    request_uri += "/" + options.model_version_;
-  }
-
-  std::shared_ptr<HttpInferRequest> sync_request(new HttpInferRequest());
-
-  sync_request->Timer().Reset();
-  sync_request->Timer().CaptureTimestamp(
-      tc::RequestTimers::Kind::REQUEST_START);
-
-  if (!curl_global.Status().IsOk()) {
-    return curl_global.Status();
-  }
-
-  err = PreRunProcessing(
-      easy_handle_, request_uri, options, inputs, outputs, headers,
-      sync_request);
-  if (!err.IsOk()) {
-    return err;
-  }
-
-  sync_request->Timer().CaptureTimestamp(tc::RequestTimers::Kind::SEND_START);
-
-  // During this call SEND_END (except in above case), RECV_START, and
-  // RECV_END will be set.
-  auto curl_status = curl_easy_perform(easy_handle_);
-  if (curl_status != CURLE_OK) {
-    sync_request->http_code_ = 400;
-  } else {
-    curl_easy_getinfo(
-        easy_handle_, CURLINFO_RESPONSE_CODE, &sync_request->http_code_);
-  }
-
-  sync_request->CloseFileData();
-  curl_mime_free(mime_handle_);
-
-  InferResult::Create(result, sync_request);
-
-  sync_request->Timer().CaptureTimestamp(tc::RequestTimers::Kind::REQUEST_END);
-
-  tc::Error nic_err = UpdateInferStat(sync_request->Timer());
-  if (!nic_err.IsOk()) {
-    std::cerr << "Failed to update context stat: " << nic_err << std::endl;
-  }
-
-  err = (*result)->RequestStatus();
-
-  return err;
-}
-
-size_t
-HttpClient::ReadCallback(char* buffer, size_t size, size_t nitems, void* userp)
-{
-  size_t retcode =
-      fread(buffer, size, nitems, ((HttpInferRequest*)userp)->FilePtr());
-  if (retcode == 0) {
-    ((HttpInferRequest*)userp)
-        ->Timer()
-        .CaptureTimestamp(tc::RequestTimers::Kind::SEND_END);
-  }
-  return retcode;
-}
-
-int
-HttpClient::SeekCallback(void* userp, curl_off_t offset, int origin)
-{
-  if (fseek(((HttpInferRequest*)userp)->FilePtr(), offset, origin) == 0)
-    return CURL_SEEKFUNC_OK;
-  else
-    return CURL_SEEKFUNC_FAIL;
-}
-
-size_t
-HttpClient::InferResponseHeaderHandler(
-    void* contents, size_t size, size_t nmemb, void* userp)
-{
-  HttpInferRequest* request = reinterpret_cast<HttpInferRequest*>(userp);
-
-  char* buf = reinterpret_cast<char*>(contents);
-  size_t byte_size = size * nmemb;
-
-  size_t idx = strlen(kContentLengthHTTPHeader);
-  if ((idx < byte_size) && !strncasecmp(buf, kContentLengthHTTPHeader, idx)) {
-    while ((idx < byte_size) && (buf[idx] != ':')) {
-      ++idx;
-    }
-
-    if (idx < byte_size) {
-      std::string hdr(buf + idx + 1, byte_size - idx - 1);
-      request->infer_response_buffer_->reserve(std::stoi(hdr));
-    }
-  }
-
-  return byte_size;
-}
-
-size_t
-HttpClient::InferResponseHandler(
-    void* contents, size_t size, size_t nmemb, void* userp)
-{
-  HttpInferRequest* request = reinterpret_cast<HttpInferRequest*>(userp);
-
-  if (request->Timer().Timestamp(tc::RequestTimers::Kind::RECV_START) == 0) {
-    request->Timer().CaptureTimestamp(tc::RequestTimers::Kind::RECV_START);
-  }
-
-  char* buf = reinterpret_cast<char*>(contents);
-  size_t result_bytes = size * nmemb;
-  request->infer_response_buffer_->append(buf, result_bytes);
-
-  // InferResponseHandler may be called multiple times so we overwrite
-  // RECV_END so that we always have the time of the last.
-  request->Timer().CaptureTimestamp(tc::RequestTimers::Kind::RECV_END);
-
-  return result_bytes;
-}
-
-Error
-HttpClient::PreRunProcessing(
-    void* vcurl, std::string& request_uri, const InferOptions& options,
-    const std::vector<InferInput*>& inputs,
-    const std::vector<const InferRequestedOutput*>& outputs,
-    const Headers& headers, std::shared_ptr<HttpInferRequest>& http_request)
-{
-  CURL* curl = reinterpret_cast<CURL*>(vcurl);
-
-  // Prepare the request object to provide the data for inference.
-  Error err = http_request->InitializeRequest();
-  if (!err.IsOk()) {
-    return err;
-  }
-
-  std::vector<std::string> input_filepaths;
-
-  curl_easy_setopt(curl, CURLOPT_URL, request_uri.c_str());
-  curl_easy_setopt(curl, CURLOPT_USERAGENT, "libcurl-agent/1.0");
-  curl_easy_setopt(curl, CURLOPT_TCP_NODELAY, 1L);
-
-  if (verbose_) {
-    curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
-  }
-
-  const long buffer_byte_size = 16 * 1024 * 1024;
-  curl_easy_setopt(curl, CURLOPT_UPLOAD_BUFFERSIZE, buffer_byte_size);
-  curl_easy_setopt(curl, CURLOPT_BUFFERSIZE, buffer_byte_size);
-
-  // request data provided by InferRequestProvider()
-  mime_handle_ = curl_mime_init(easy_handle_);
-  // Add the buffers holding input tensor data
-  for (const auto input : inputs) {
-    TorchServeInferInput* this_input =
-        dynamic_cast<TorchServeInferInput*>(input);
-    this_input->PrepareForRequest();
-    bool end_of_input = false;
-    while (!end_of_input) {
-      const uint8_t* buf;
-      size_t buf_size;
-      this_input->GetNext(&buf, &buf_size, &end_of_input);
-      std::string file_path(
-          reinterpret_cast<const char*>(buf) + 4, buf_size - 4);
-      if (buf != nullptr) {
-        Error err = http_request->OpenFileData(file_path);
-        if (!err.IsOk()) {
-          return err;
-        }
-        if (verbose_) {
-          input_filepaths.push_back(file_path);
-        }
-      }
-    }
-  }
-
-  long file_size = http_request->FileSize();
-  curl_mimepart* part = curl_mime_addpart((curl_mime*)mime_handle_);
-  curl_mime_data_cb(
-      part, file_size, ReadCallback, SeekCallback, NULL, http_request.get());
-  curl_mime_name(part, "data");
-
-  curl_easy_setopt(easy_handle_, CURLOPT_MIMEPOST, (curl_mime*)mime_handle_);
-
-  // response headers handled by InferResponseHeaderHandler()
-  curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, InferResponseHeaderHandler);
-  curl_easy_setopt(curl, CURLOPT_HEADERDATA, http_request.get());
-
-  // response data handled by InferResponseHandler()
-  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, InferResponseHandler);
-  curl_easy_setopt(curl, CURLOPT_WRITEDATA, http_request.get());
-
-  struct curl_slist* list = nullptr;
-  for (const auto& pr : headers) {
-    std::string hdr = pr.first + ": " + pr.second;
-    list = curl_slist_append(list, hdr.c_str());
-  }
-  curl_easy_setopt(curl, CURLOPT_HTTPHEADER, list);
-
-  // The list will be freed when the request is destructed
-  http_request->header_list_ = list;
-
-  if (verbose_) {
-    std::cout << "inference request : [";
-    bool first = true;
-    for (const auto& fn : input_filepaths) {
-      if (first) {
-        first = false;
-      } else {
-        std::cout << ",";
-      }
-      std::cout << "\"" << fn << "\"";
-    }
-    std::cout << "]" << std::endl;
-  }
-
-  return Error::Success;
-}
-
-HttpClient::HttpClient(const std::string& url, bool verbose)
-    : InferenceServerClient(verbose), url_(url),
-      easy_handle_(reinterpret_cast<void*>(curl_easy_init()))
-{
-}
-
-HttpClient::~HttpClient()
-{
-  exiting_ = true;
-
-  if (easy_handle_ != nullptr) {
-    curl_easy_cleanup(reinterpret_cast<CURL*>(easy_handle_));
-  }
-}
-
-//======================================================================
-
-Error
-InferResult::Create(
-    InferResult** infer_result, std::shared_ptr<HttpInferRequest> infer_request)
-{
-  *infer_result =
-      reinterpret_cast<InferResult*>(new InferResult(infer_request));
-  return Error::Success;
-}
-
-Error
-InferResult::RequestStatus() const
-{
-  return status_;
-}
-
-InferResult::InferResult(std::shared_ptr<HttpInferRequest> infer_request)
-    : infer_request_(infer_request)
-{
-  if (infer_request->http_code_ != 200) {
-    status_ = Error(
-        "inference failed with error code " +
-        std::to_string(infer_request->http_code_));
-  }
-}
-
-//======================================================================
-
-}}}}  // namespace triton::perfanalyzer::clientbackend::torchserve
diff --git a/src/c++/perf_analyzer/client_backend/torchserve/torchserve_http_client.h b/src/c++/perf_analyzer/client_backend/torchserve/torchserve_http_client.h
deleted file mode 100644
index ede9cdfd5..000000000
--- a/src/c++/perf_analyzer/client_backend/torchserve/torchserve_http_client.h
+++ /dev/null
@@ -1,175 +0,0 @@
-// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <curl/curl.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "../client_backend.h"
-#include "common.h"
-#include "torchserve_infer_input.h"
-
-
-namespace tc = triton::client;
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace torchserve {
-
-class InferResult;
-class HttpInferRequest;
-
-using TorchServeOnCompleteFn = std::function<void(InferResult*)>;
-
-//==============================================================================
-/// An HttpClient object is used to perform any kind of communication with the
-/// torchserve service using libcurl. None of the functions are thread
-/// safe.
-///
-/// \code
-///   std::unique_ptr<HttpClient> client;
-///   HttpClient::Create(&client, "localhost:8080");
-///   ...
-///   ...
-/// \endcode
-///
-class HttpClient : public tc::InferenceServerClient {
- public:
-  ~HttpClient();
-
-  /// Create a client that can be used to communicate with the server.
-  /// \param client Returns a new InferenceServerHttpClient object.
-  /// \param server_url The inference server name and port.
-  /// \param verbose If true generate verbose output when contacting
-  /// the inference server.
-  /// \return Error object indicating success or failure.
-  static Error Create(
-      std::unique_ptr<HttpClient>* client, const std::string& server_url,
-      const bool verbose);
-
-  /// Run synchronous inference on server.
-  /// \param result Returns the result of inference.
-  /// \param options The options for inference request.
-  /// \param inputs The vector of InferInput describing the model inputs.
-  /// \param outputs Optional vector of InferRequestedOutput describing how the
-  /// output must be returned. If not provided then all the outputs in the model
-  /// config will be returned as default settings.
-  /// \param headers Optional map specifying additional HTTP headers to include
-  /// in the metadata of gRPC request.
-  /// \return Error object indicating success or failure of the
-  /// request.
-  Error Infer(
-      InferResult** result, const InferOptions& options,
-      const std::vector<InferInput*>& inputs,
-      const std::vector<const InferRequestedOutput*>& outputs =
-          std::vector<const InferRequestedOutput*>(),
-      const Headers& headers = Headers());
-
- private:
-  HttpClient(const std::string& url, bool verbose);
-  Error PreRunProcessing(
-      void* curl, std::string& request_uri, const InferOptions& options,
-      const std::vector<InferInput*>& inputs,
-      const std::vector<const InferRequestedOutput*>& outputs,
-      const Headers& headers, std::shared_ptr<HttpInferRequest>& request);
-
-  static size_t ReadCallback(
-      char* buffer, size_t size, size_t nitems, void* userp);
-  static int SeekCallback(void* userp, curl_off_t offset, int origin);
-  static size_t InferResponseHeaderHandler(
-      void* contents, size_t size, size_t nmemb, void* userp);
-  static size_t InferResponseHandler(
-      void* contents, size_t size, size_t nmemb, void* userp);
-
-  // The server url
-  const std::string url_;
-  // curl easy handle shared for all synchronous requests.
-  void* easy_handle_;
-  // The handle to interact with mime API.
-  curl_mime* mime_handle_;
-};
-
-//======================================================================
-
-class HttpInferRequest {
- public:
-  struct Deleter {
-    void operator()(FILE* file)
-    {
-      if (file != nullptr) {
-        fclose(file);
-      }
-    }
-  };
-
-  HttpInferRequest();
-  ~HttpInferRequest();
-  Error InitializeRequest();
-  Error OpenFileData(std::string& file_path);
-  long FileSize();
-  Error CloseFileData();
-  tc::RequestTimers& Timer() { return timer_; }
-  std::string& DebugString() { return *infer_response_buffer_; }
-  FILE* FilePtr() { return file_ptr_.get(); }
-  friend HttpClient;
-  friend InferResult;
-
- private:
-  // Pointer to the list of the HTTP request header, keep it such that it will
-  // be valid during the transfer and can be freed once transfer is completed.
-  struct curl_slist* header_list_;
-  std::unique_ptr<FILE, Deleter> file_ptr_;
-  // HTTP response code for the inference request
-  long http_code_;
-  // Buffer that accumulates the response body.
-  std::unique_ptr<std::string> infer_response_buffer_;
-  // The timers for infer request.
-  tc::RequestTimers timer_;
-};
-
-//======================================================================
-
-class InferResult {
- public:
-  static Error Create(
-      InferResult** infer_result,
-      std::shared_ptr<HttpInferRequest> infer_request);
-  Error RequestStatus() const;
-  Error Id(std::string* id) const;
-  std::string DebugString() const { return infer_request_->DebugString(); }
-
- private:
-  InferResult(std::shared_ptr<HttpInferRequest> infer_request);
-
-  // The status of the inference
-  Error status_;
-  // The pointer to the HttpInferRequest object
-  std::shared_ptr<HttpInferRequest> infer_request_;
-};
-
-//======================================================================
-
-}}}}  // namespace triton::perfanalyzer::clientbackend::torchserve
diff --git a/src/c++/perf_analyzer/client_backend/torchserve/torchserve_infer_input.cc b/src/c++/perf_analyzer/client_backend/torchserve/torchserve_infer_input.cc
deleted file mode 100644
index 22eb1ea97..000000000
--- a/src/c++/perf_analyzer/client_backend/torchserve/torchserve_infer_input.cc
+++ /dev/null
@@ -1,109 +0,0 @@
-// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "torchserve_infer_input.h"
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace torchserve {
-
-
-Error
-TorchServeInferInput::Create(
-    InferInput** infer_input, const std::string& name,
-    const std::vector<int64_t>& dims, const std::string& datatype)
-{
-  TorchServeInferInput* local_infer_input =
-      new TorchServeInferInput(name, dims, datatype);
-  *infer_input = local_infer_input;
-  return Error::Success;
-}
-
-Error
-TorchServeInferInput::SetShape(const std::vector<int64_t>& shape)
-{
-  shape_ = shape;
-  return Error::Success;
-}
-
-Error
-TorchServeInferInput::Reset()
-{
-  bufs_.clear();
-  buf_byte_sizes_.clear();
-  bufs_idx_ = 0;
-  byte_size_ = 0;
-  return Error::Success;
-}
-
-Error
-TorchServeInferInput::AppendRaw(const uint8_t* input, size_t input_byte_size)
-{
-  byte_size_ += input_byte_size;
-  bufs_.push_back(input);
-  buf_byte_sizes_.push_back(input_byte_size);
-  return Error::Success;
-}
-
-Error
-TorchServeInferInput::ByteSize(size_t* byte_size) const
-{
-  *byte_size = byte_size_;
-  return Error::Success;
-}
-
-Error
-TorchServeInferInput::PrepareForRequest()
-{
-  // Reset position so request sends entire input.
-  bufs_idx_ = 0;
-  buf_pos_ = 0;
-  return Error::Success;
-}
-
-Error
-TorchServeInferInput::GetNext(
-    const uint8_t** buf, size_t* input_bytes, bool* end_of_input)
-{
-  if (bufs_idx_ < bufs_.size()) {
-    *buf = bufs_[bufs_idx_];
-    *input_bytes = buf_byte_sizes_[bufs_idx_];
-    bufs_idx_++;
-  } else {
-    *buf = nullptr;
-    *input_bytes = 0;
-  }
-  *end_of_input = (bufs_idx_ >= bufs_.size());
-  return Error::Success;
-}
-
-TorchServeInferInput::TorchServeInferInput(
-    const std::string& name, const std::vector<int64_t>& dims,
-    const std::string& datatype)
-    : InferInput(BackendKind::TORCHSERVE, name, datatype), shape_(dims)
-{
-}
-
-}}}}  // namespace triton::perfanalyzer::clientbackend::torchserve
diff --git a/src/c++/perf_analyzer/client_backend/torchserve/torchserve_infer_input.h b/src/c++/perf_analyzer/client_backend/torchserve/torchserve_infer_input.h
deleted file mode 100644
index cc629cd1d..000000000
--- a/src/c++/perf_analyzer/client_backend/torchserve/torchserve_infer_input.h
+++ /dev/null
@@ -1,76 +0,0 @@
-// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <string>
-
-#include "../../perf_utils.h"
-#include "../client_backend.h"
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace torchserve {
-
-
-//==============================================================
-/// TorchServeInferInput instance holds the information regarding
-/// model input tensor. In this case the content held will be
-/// the path to the file holding data.
-///
-class TorchServeInferInput : public InferInput {
- public:
-  static Error Create(
-      InferInput** infer_input, const std::string& name,
-      const std::vector<int64_t>& dims, const std::string& datatype);
-  /// See InferInput::Shape()
-  const std::vector<int64_t>& Shape() const override { return shape_; }
-  /// See InferInput::SetShape()
-  Error SetShape(const std::vector<int64_t>& shape) override;
-  /// See InferInput::Reset()
-  Error Reset() override;
-  /// See InferInput::AppendRaw()
-  Error AppendRaw(const uint8_t* input, size_t input_byte_size) override;
-  /// Gets the size of data added into this input in bytes.
-  /// \param byte_size The size of data added in bytes.
-  /// \return Error object indicating success or failure.
-  Error ByteSize(size_t* byte_size) const;
-  /// Resets the heads to start providing data from the beginning.
-  Error PrepareForRequest();
-  /// Get the next chunk of data if available.
-  Error GetNext(const uint8_t** buf, size_t* input_bytes, bool* end_of_input);
-
- private:
-  explicit TorchServeInferInput(
-      const std::string& name, const std::vector<int64_t>& dims,
-      const std::string& datatype);
-
-  std::vector<int64_t> shape_;
-  size_t byte_size_;
-  size_t bufs_idx_, buf_pos_;
-  std::vector<const uint8_t*> bufs_;
-  std::vector<size_t> buf_byte_sizes_;
-};
-
-}}}}  // namespace triton::perfanalyzer::clientbackend::torchserve
diff --git a/src/c++/perf_analyzer/client_backend/triton/CMakeLists.txt b/src/c++/perf_analyzer/client_backend/triton/CMakeLists.txt
deleted file mode 100644
index 203a8e350..000000000
--- a/src/c++/perf_analyzer/client_backend/triton/CMakeLists.txt
+++ /dev/null
@@ -1,62 +0,0 @@
-# Copyright 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-cmake_minimum_required (VERSION 3.18)
-
-set(
-    TRITON_CLIENT_BACKEND_SRCS
-    triton_client_backend.cc
-)
-
-set(
-    TRITON_CLIENT_BACKEND_HDRS
-    triton_client_backend.h
-)
-
-add_library(
-    triton-client-backend-library  EXCLUDE_FROM_ALL OBJECT
-    ${TRITON_CLIENT_BACKEND_SRCS}
-    ${TRITON_CLIENT_BACKEND_HDRS}
-)
-
-target_link_libraries(
-  triton-client-backend-library
-  PUBLIC grpcclient_static
-  PUBLIC httpclient_static
-  PRIVATE CURL::libcurl
-)
-
-target_include_directories(
-  triton-client-backend-library
-  PRIVATE CURL::libcurl
-)
-
-if(${TRITON_ENABLE_GPU})
-  target_link_libraries(
-    triton-client-backend-library
-    PRIVATE CUDA::cudart
-  )
-endif() # TRITON_ENABLE_GPU
diff --git a/src/c++/perf_analyzer/client_backend/triton/test_triton_client_backend.cc b/src/c++/perf_analyzer/client_backend/triton/test_triton_client_backend.cc
deleted file mode 100644
index c32ad17be..000000000
--- a/src/c++/perf_analyzer/client_backend/triton/test_triton_client_backend.cc
+++ /dev/null
@@ -1,142 +0,0 @@
-// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <cstdint>
-#include <map>
-#include <string>
-
-#include "../../doctest.h"
-#include "triton_client_backend.h"
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace tritonremote {
-
-class TestTritonClientBackend : public TritonClientBackend {
- public:
-  template <typename T>
-  void ParseAndStoreMetric(
-      const std::string& metrics_endpoint_text, const std::string metric_id,
-      std::map<std::string, T>& metric_per_gpu)
-  {
-    TritonClientBackend::ParseAndStoreMetric<T>(
-        metrics_endpoint_text, metric_id, metric_per_gpu);
-  }
-};
-
-TEST_CASE("testing the ParseAndStoreMetric function")
-{
-  TestTritonClientBackend ttcb{};
-
-  SUBCASE("nv_gpu_utilization metric")
-  {
-    const std::string metrics_endpoint_text{R"(
-# HELP nv_gpu_utilization GPU utilization rate [0.0 - 1.0)
-# TYPE nv_gpu_utilization gauge
-nv_gpu_utilization{gpu_uuid="GPU-00000000-0000-0000-0000-000000000000"} 0.41
-nv_gpu_utilization{gpu_uuid="GPU-00000000-0000-0000-0000-000000000001"} 0.77
-    )"};
-    const std::string metric_id{"nv_gpu_utilization"};
-    std::map<std::string, double> gpu_utilization_per_gpu{};
-
-    ttcb.ParseAndStoreMetric<double>(
-        metrics_endpoint_text, metric_id, gpu_utilization_per_gpu);
-    CHECK(gpu_utilization_per_gpu.size() == 2);
-    CHECK(
-        gpu_utilization_per_gpu["GPU-00000000-0000-0000-0000-000000000000"] ==
-        doctest::Approx(0.41));
-    CHECK(
-        gpu_utilization_per_gpu["GPU-00000000-0000-0000-0000-000000000001"] ==
-        doctest::Approx(0.77));
-  }
-
-  SUBCASE("nv_gpu_power_usage metric")
-  {
-    const std::string metrics_endpoint_text{R"(
-# HELP nv_gpu_power_usage GPU power usage in watts
-# TYPE nv_gpu_power_usage gauge
-nv_gpu_power_usage{gpu_uuid="GPU-00000000-0000-0000-0000-000000000000"} 81.619
-nv_gpu_power_usage{gpu_uuid="GPU-00000000-0000-0000-0000-000000000001"} 99.217
-    )"};
-    const std::string metric_id{"nv_gpu_power_usage"};
-    std::map<std::string, double> gpu_power_usage_per_gpu{};
-
-    ttcb.ParseAndStoreMetric<double>(
-        metrics_endpoint_text, metric_id, gpu_power_usage_per_gpu);
-    CHECK(gpu_power_usage_per_gpu.size() == 2);
-    CHECK(
-        gpu_power_usage_per_gpu["GPU-00000000-0000-0000-0000-000000000000"] ==
-        doctest::Approx(81.619));
-    CHECK(
-        gpu_power_usage_per_gpu["GPU-00000000-0000-0000-0000-000000000001"] ==
-        doctest::Approx(99.217));
-  }
-
-  SUBCASE("nv_gpu_memory_used_bytes metric")
-  {
-    const std::string metrics_endpoint_text{R"(
-# HELP nv_gpu_memory_used_bytes GPU used memory, in bytes
-# TYPE nv_gpu_memory_used_bytes gauge
-nv_gpu_memory_used_bytes{gpu_uuid="GPU-00000000-0000-0000-0000-000000000000"} 50000000
-nv_gpu_memory_used_bytes{gpu_uuid="GPU-00000000-0000-0000-0000-000000000001"} 75000000
-    )"};
-    const std::string metric_id{"nv_gpu_memory_used_bytes"};
-    std::map<std::string, uint64_t> gpu_memory_used_bytes_per_gpu{};
-
-    ttcb.ParseAndStoreMetric<uint64_t>(
-        metrics_endpoint_text, metric_id, gpu_memory_used_bytes_per_gpu);
-    CHECK(gpu_memory_used_bytes_per_gpu.size() == 2);
-    CHECK(
-        gpu_memory_used_bytes_per_gpu
-            ["GPU-00000000-0000-0000-0000-000000000000"] == 50000000);
-    CHECK(
-        gpu_memory_used_bytes_per_gpu
-            ["GPU-00000000-0000-0000-0000-000000000001"] == 75000000);
-  }
-
-  SUBCASE("nv_gpu_memory_total_bytes metric")
-  {
-    const std::string metrics_endpoint_text{R"(
-# HELP nv_gpu_memory_total_bytes GPU total memory, in bytes
-# TYPE nv_gpu_memory_total_bytes gauge
-nv_gpu_memory_total_bytes{gpu_uuid="GPU-00000000-0000-0000-0000-000000000000"} 1000000000
-nv_gpu_memory_total_bytes{gpu_uuid="GPU-00000000-0000-0000-0000-000000000001"} 2000000000
-    )"};
-    const std::string metric_id{"nv_gpu_memory_total_bytes"};
-    std::map<std::string, uint64_t> gpu_memory_total_bytes_per_gpu{};
-
-    ttcb.ParseAndStoreMetric<uint64_t>(
-        metrics_endpoint_text, metric_id, gpu_memory_total_bytes_per_gpu);
-    CHECK(gpu_memory_total_bytes_per_gpu.size() == 2);
-    CHECK(
-        gpu_memory_total_bytes_per_gpu
-            ["GPU-00000000-0000-0000-0000-000000000000"] == 1000000000);
-    CHECK(
-        gpu_memory_total_bytes_per_gpu
-            ["GPU-00000000-0000-0000-0000-000000000001"] == 2000000000);
-  }
-}
-
-}}}}  // namespace triton::perfanalyzer::clientbackend::tritonremote
diff --git a/src/c++/perf_analyzer/client_backend/triton/triton_client_backend.cc b/src/c++/perf_analyzer/client_backend/triton/triton_client_backend.cc
deleted file mode 100644
index 419123e52..000000000
--- a/src/c++/perf_analyzer/client_backend/triton/triton_client_backend.cc
+++ /dev/null
@@ -1,855 +0,0 @@
-// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "triton_client_backend.h"
-
-#include <curl/curl.h>
-
-#include <regex>
-#include <stdexcept>
-
-#include "../../constants.h"
-#include "../../perf_analyzer_exception.h"
-#include "json_utils.h"
-
-namespace {
-
-triton::client::HttpSslOptions
-ParseHttpSslOptions(
-    const triton::perfanalyzer::clientbackend::SslOptionsBase& ssl_options)
-{
-  triton::client::HttpSslOptions http_ssl_options;
-
-  http_ssl_options.verify_peer = ssl_options.ssl_https_verify_peer;
-  http_ssl_options.verify_host = ssl_options.ssl_https_verify_host;
-  http_ssl_options.ca_info = ssl_options.ssl_https_ca_certificates_file;
-  if (ssl_options.ssl_https_client_certificate_type == "PEM") {
-    http_ssl_options.cert_type =
-        triton::client::HttpSslOptions::CERTTYPE::CERT_PEM;
-  } else if (ssl_options.ssl_https_client_certificate_type == "DER") {
-    http_ssl_options.cert_type =
-        triton::client::HttpSslOptions::CERTTYPE::CERT_DER;
-  }
-  http_ssl_options.cert = ssl_options.ssl_https_client_certificate_file;
-  if (ssl_options.ssl_https_private_key_type == "PEM") {
-    http_ssl_options.key_type =
-        triton::client::HttpSslOptions::KEYTYPE::KEY_PEM;
-  } else if (ssl_options.ssl_https_private_key_type == "DER") {
-    http_ssl_options.key_type =
-        triton::client::HttpSslOptions::KEYTYPE::KEY_DER;
-  }
-  http_ssl_options.key = ssl_options.ssl_https_private_key_file;
-
-  return http_ssl_options;
-}
-
-std::pair<bool, triton::client::SslOptions>
-ParseGrpcSslOptions(
-    const triton::perfanalyzer::clientbackend::SslOptionsBase& ssl_options)
-{
-  bool use_ssl = ssl_options.ssl_grpc_use_ssl;
-
-  triton::client::SslOptions grpc_ssl_options;
-  grpc_ssl_options.root_certificates =
-      ssl_options.ssl_grpc_root_certifications_file;
-  grpc_ssl_options.private_key = ssl_options.ssl_grpc_private_key_file;
-  grpc_ssl_options.certificate_chain =
-      ssl_options.ssl_grpc_certificate_chain_file;
-
-  return std::pair<bool, triton::client::SslOptions>{use_ssl, grpc_ssl_options};
-}
-
-}  // namespace
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace tritonremote {
-//==============================================================================
-
-Error
-TritonClientBackend::Create(
-    const std::string& url, const ProtocolType protocol,
-    const SslOptionsBase& ssl_options,
-    const std::map<std::string, std::vector<std::string>> trace_options,
-    const grpc_compression_algorithm compression_algorithm,
-    std::shared_ptr<Headers> http_headers, const bool verbose,
-    const std::string& metrics_url, const TensorFormat input_tensor_format,
-    const TensorFormat output_tensor_format,
-    std::unique_ptr<ClientBackend>* client_backend)
-{
-  std::unique_ptr<TritonClientBackend> triton_client_backend(
-      new TritonClientBackend(
-          protocol, compression_algorithm, http_headers, metrics_url,
-          input_tensor_format, output_tensor_format));
-  if (protocol == ProtocolType::HTTP) {
-    triton::client::HttpSslOptions http_ssl_options =
-        ParseHttpSslOptions(ssl_options);
-    RETURN_IF_TRITON_ERROR(tc::InferenceServerHttpClient::Create(
-        &(triton_client_backend->client_.http_client_), url, verbose,
-        http_ssl_options));
-    if (!trace_options.empty()) {
-      std::string response;
-      RETURN_IF_TRITON_ERROR(
-          triton_client_backend->client_.http_client_->UpdateTraceSettings(
-              &response, "", trace_options));
-    }
-  } else {
-    std::pair<bool, triton::client::SslOptions> grpc_ssl_options_pair =
-        ParseGrpcSslOptions(ssl_options);
-    bool use_ssl = grpc_ssl_options_pair.first;
-    triton::client::SslOptions grpc_ssl_options = grpc_ssl_options_pair.second;
-    RETURN_IF_TRITON_ERROR(tc::InferenceServerGrpcClient::Create(
-        &(triton_client_backend->client_.grpc_client_), url, verbose, use_ssl,
-        grpc_ssl_options));
-    if (!trace_options.empty()) {
-      inference::TraceSettingResponse response;
-      RETURN_IF_TRITON_ERROR(
-          triton_client_backend->client_.grpc_client_->UpdateTraceSettings(
-              &response, "", trace_options));
-    }
-  }
-
-  *client_backend = std::move(triton_client_backend);
-
-  return Error::Success;
-}
-
-Error
-TritonClientBackend::ServerExtensions(std::set<std::string>* extensions)
-{
-  extensions->clear();
-  if (protocol_ == ProtocolType::HTTP) {
-    std::string server_metadata;
-    FAIL_IF_TRITON_ERR(
-        client_.http_client_->ServerMetadata(&server_metadata, *http_headers_),
-        "unable to get server metadata");
-
-    rapidjson::Document server_metadata_json;
-    FAIL_IF_TRITON_ERR(
-        tc::ParseJson(&server_metadata_json, server_metadata),
-        "failed to parse server metadata");
-    for (const auto& extension :
-         server_metadata_json["extensions"].GetArray()) {
-      extensions->insert(
-          std::string(extension.GetString(), extension.GetStringLength()));
-    }
-  } else {
-    inference::ServerMetadataResponse server_metadata;
-    FAIL_IF_TRITON_ERR(
-        client_.grpc_client_->ServerMetadata(&server_metadata, *http_headers_),
-        "unable to get server metadata");
-    for (const auto& extension : server_metadata.extensions()) {
-      extensions->insert(extension);
-    }
-  }
-
-  return Error::Success;
-}
-
-Error
-TritonClientBackend::ModelMetadata(
-    rapidjson::Document* model_metadata, const std::string& model_name,
-    const std::string& model_version)
-{
-  if (protocol_ == ProtocolType::HTTP) {
-    std::string metadata;
-    RETURN_IF_TRITON_ERROR(client_.http_client_->ModelMetadata(
-        &metadata, model_name, model_version, *http_headers_));
-    RETURN_IF_TRITON_ERROR(tc::ParseJson(model_metadata, metadata));
-  } else {
-    inference::ModelMetadataResponse model_metadata_proto;
-    RETURN_IF_TRITON_ERROR(client_.grpc_client_->ModelMetadata(
-        &model_metadata_proto, model_name, model_version, *http_headers_));
-
-    std::string metadata;
-    ::google::protobuf::util::JsonPrintOptions options;
-    options.preserve_proto_field_names = true;
-    options.always_print_primitive_fields = true;
-    ::google::protobuf::util::MessageToJsonString(
-        model_metadata_proto, &metadata, options);
-
-    RETURN_IF_TRITON_ERROR(tc::ParseJson(model_metadata, metadata));
-  }
-
-  return Error::Success;
-}
-
-Error
-TritonClientBackend::ModelConfig(
-    rapidjson::Document* model_config, const std::string& model_name,
-    const std::string& model_version)
-{
-  if (protocol_ == ProtocolType::HTTP) {
-    std::string config;
-    RETURN_IF_TRITON_ERROR(client_.http_client_->ModelConfig(
-        &config, model_name, model_version, *http_headers_));
-    RETURN_IF_TRITON_ERROR(tc::ParseJson(model_config, config));
-  } else {
-    inference::ModelConfigResponse model_config_proto;
-    RETURN_IF_TRITON_ERROR(client_.grpc_client_->ModelConfig(
-        &model_config_proto, model_name, model_version, *http_headers_));
-
-    std::string config;
-    ::google::protobuf::util::JsonPrintOptions options;
-    options.preserve_proto_field_names = true;
-    options.always_print_primitive_fields = true;
-    ::google::protobuf::util::MessageToJsonString(
-        model_config_proto, &config, options);
-
-    rapidjson::Document full_config;
-    RETURN_IF_TRITON_ERROR(tc::ParseJson(&full_config, config));
-    model_config->CopyFrom(full_config["config"], model_config->GetAllocator());
-  }
-  return Error::Success;
-}
-
-Error
-TritonClientBackend::Infer(
-    InferResult** result, const InferOptions& options,
-    const std::vector<InferInput*>& inputs,
-    const std::vector<const InferRequestedOutput*>& outputs)
-{
-  std::vector<tc::InferInput*> triton_inputs;
-  ParseInferInputToTriton(inputs, &triton_inputs);
-
-  std::vector<const tc::InferRequestedOutput*> triton_outputs;
-  ParseInferRequestedOutputToTriton(outputs, &triton_outputs);
-
-  tc::InferOptions triton_options(options.model_name_);
-  ParseInferOptionsToTriton(options, &triton_options);
-
-  tc::InferResult* triton_result;
-
-  if (protocol_ == ProtocolType::GRPC) {
-    RETURN_IF_TRITON_ERROR(client_.grpc_client_->Infer(
-        &triton_result, triton_options, triton_inputs, triton_outputs,
-        *http_headers_, compression_algorithm_));
-  } else {
-    RETURN_IF_TRITON_ERROR(client_.http_client_->Infer(
-        &triton_result, triton_options, triton_inputs, triton_outputs,
-        *http_headers_));
-  }
-
-  *result = new TritonInferResult(triton_result);
-
-  return Error::Success;
-}
-
-Error
-TritonClientBackend::AsyncInfer(
-    OnCompleteFn callback, const InferOptions& options,
-    const std::vector<InferInput*>& inputs,
-    const std::vector<const InferRequestedOutput*>& outputs)
-{
-  auto wrapped_callback = [callback](tc::InferResult* client_result) {
-    InferResult* result = new TritonInferResult(client_result);
-    callback(result);
-  };
-
-  std::vector<tc::InferInput*> triton_inputs;
-  ParseInferInputToTriton(inputs, &triton_inputs);
-
-  std::vector<const tc::InferRequestedOutput*> triton_outputs;
-  ParseInferRequestedOutputToTriton(outputs, &triton_outputs);
-
-  tc::InferOptions triton_options(options.model_name_);
-  ParseInferOptionsToTriton(options, &triton_options);
-
-  if (protocol_ == ProtocolType::GRPC) {
-    RETURN_IF_TRITON_ERROR(client_.grpc_client_->AsyncInfer(
-        wrapped_callback, triton_options, triton_inputs, triton_outputs,
-        *http_headers_, compression_algorithm_));
-  } else {
-    RETURN_IF_TRITON_ERROR(client_.http_client_->AsyncInfer(
-        wrapped_callback, triton_options, triton_inputs, triton_outputs,
-        *http_headers_));
-  }
-
-  return Error::Success;
-}
-
-Error
-TritonClientBackend::StartStream(OnCompleteFn callback, bool enable_stats)
-{
-  auto wrapped_callback = [callback](tc::InferResult* client_result) {
-    InferResult* result = new TritonInferResult(client_result);
-    callback(result);
-  };
-
-  if (protocol_ == ProtocolType::GRPC) {
-    RETURN_IF_TRITON_ERROR(client_.grpc_client_->StartStream(
-        wrapped_callback, enable_stats, 0 /* stream_timeout */, *http_headers_,
-        compression_algorithm_));
-  } else {
-    return Error("HTTP does not support starting streams", pa::GENERIC_ERROR);
-  }
-
-  return Error::Success;
-}
-
-Error
-TritonClientBackend::AsyncStreamInfer(
-    const InferOptions& options, const std::vector<InferInput*>& inputs,
-    const std::vector<const InferRequestedOutput*>& outputs)
-{
-  std::vector<tc::InferInput*> triton_inputs;
-  ParseInferInputToTriton(inputs, &triton_inputs);
-
-  std::vector<const tc::InferRequestedOutput*> triton_outputs;
-  ParseInferRequestedOutputToTriton(outputs, &triton_outputs);
-
-  tc::InferOptions triton_options(options.model_name_);
-  ParseInferOptionsToTriton(options, &triton_options);
-
-  if (protocol_ == ProtocolType::GRPC) {
-    RETURN_IF_TRITON_ERROR(client_.grpc_client_->AsyncStreamInfer(
-        triton_options, triton_inputs, triton_outputs));
-  } else {
-    return Error(
-        "HTTP does not support streaming inferences", pa::GENERIC_ERROR);
-  }
-
-  return Error::Success;
-}
-
-Error
-TritonClientBackend::ClientInferStat(InferStat* infer_stat)
-{
-  tc::InferStat triton_infer_stat;
-  if (protocol_ == ProtocolType::GRPC) {
-    RETURN_IF_TRITON_ERROR(
-        client_.grpc_client_->ClientInferStat(&triton_infer_stat));
-  } else {
-    RETURN_IF_TRITON_ERROR(
-        client_.http_client_->ClientInferStat(&triton_infer_stat));
-  }
-
-  ParseInferStat(triton_infer_stat, infer_stat);
-
-  return Error::Success;
-}
-
-Error
-TritonClientBackend::ModelInferenceStatistics(
-    std::map<ModelIdentifier, ModelStatistics>* model_stats,
-    const std::string& model_name, const std::string& model_version)
-{
-  if (protocol_ == ProtocolType::GRPC) {
-    inference::ModelStatisticsResponse infer_stat;
-    RETURN_IF_TRITON_ERROR(client_.grpc_client_->ModelInferenceStatistics(
-        &infer_stat, model_name, model_version, *http_headers_));
-    ParseStatistics(infer_stat, model_stats);
-  } else {
-    std::string infer_stat;
-    RETURN_IF_TRITON_ERROR(client_.http_client_->ModelInferenceStatistics(
-        &infer_stat, model_name, model_version, *http_headers_));
-    rapidjson::Document infer_stat_json;
-    RETURN_IF_TRITON_ERROR(tc::ParseJson(&infer_stat_json, infer_stat));
-    ParseStatistics(infer_stat_json, model_stats);
-  }
-
-  return Error::Success;
-}
-
-Error
-TritonClientBackend::Metrics(triton::perfanalyzer::Metrics& metrics)
-{
-  try {
-    std::string metrics_endpoint_text{""};
-    AccessMetricsEndpoint(metrics_endpoint_text);
-    ParseAndStoreMetrics(metrics_endpoint_text, metrics);
-  }
-  catch (const PerfAnalyzerException& e) {
-    return Error(e.what(), pa::GENERIC_ERROR);
-  }
-  return Error::Success;
-}
-
-void
-TritonClientBackend::AccessMetricsEndpoint(std::string& metrics_endpoint_text)
-{
-  CURL* curl{curl_easy_init()};
-  if (curl == nullptr) {
-    throw triton::perfanalyzer::PerfAnalyzerException(
-        "Error calling curl_easy_init()", triton::perfanalyzer::GENERIC_ERROR);
-  }
-
-  const auto metrics_response_handler{
-      [](char* ptr, size_t size, size_t nmemb, std::string* userdata) {
-        userdata->append(ptr, size * nmemb);
-        return size * nmemb;
-      }};
-
-  curl_easy_setopt(curl, CURLOPT_URL, metrics_url_.c_str());
-  curl_easy_setopt(
-      curl, CURLOPT_WRITEFUNCTION,
-      static_cast<size_t (*)(char*, size_t, size_t, std::string*)>(
-          metrics_response_handler));
-  curl_easy_setopt(curl, CURLOPT_WRITEDATA, &metrics_endpoint_text);
-
-  CURLcode res{curl_easy_perform(curl)};
-
-  if (res != CURLE_OK) {
-    throw triton::perfanalyzer::PerfAnalyzerException(
-        "Unable to connect to Metrics endpoint " + metrics_url_,
-        triton::perfanalyzer::GENERIC_ERROR);
-  }
-
-  long response_code{0};
-  curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response_code);
-
-  if (response_code != 200) {
-    throw triton::perfanalyzer::PerfAnalyzerException(
-        "Metrics endpoint curling did not succeed.",
-        triton::perfanalyzer::GENERIC_ERROR);
-  }
-
-  curl_easy_cleanup(curl);
-}
-
-void
-TritonClientBackend::ParseAndStoreMetrics(
-    const std::string& metrics_endpoint_text,
-    triton::perfanalyzer::Metrics& metrics)
-{
-  ParseAndStoreMetric<double>(
-      metrics_endpoint_text, "nv_gpu_utilization",
-      metrics.gpu_utilization_per_gpu);
-  ParseAndStoreMetric<double>(
-      metrics_endpoint_text, "nv_gpu_power_usage",
-      metrics.gpu_power_usage_per_gpu);
-  ParseAndStoreMetric<uint64_t>(
-      metrics_endpoint_text, "nv_gpu_memory_used_bytes",
-      metrics.gpu_memory_used_bytes_per_gpu);
-  ParseAndStoreMetric<uint64_t>(
-      metrics_endpoint_text, "nv_gpu_memory_total_bytes",
-      metrics.gpu_memory_total_bytes_per_gpu);
-}
-
-Error
-TritonClientBackend::UnregisterAllSharedMemory()
-{
-  if (protocol_ == ProtocolType::GRPC) {
-    RETURN_IF_TRITON_ERROR(
-        client_.grpc_client_->UnregisterSystemSharedMemory("", *http_headers_));
-    RETURN_IF_TRITON_ERROR(
-        client_.grpc_client_->UnregisterCudaSharedMemory("", *http_headers_));
-  } else {
-    RETURN_IF_TRITON_ERROR(
-        client_.http_client_->UnregisterSystemSharedMemory("", *http_headers_));
-    RETURN_IF_TRITON_ERROR(
-        client_.http_client_->UnregisterCudaSharedMemory("", *http_headers_));
-  }
-
-  return Error::Success;
-}
-
-Error
-TritonClientBackend::RegisterSystemSharedMemory(
-    const std::string& name, const std::string& key, const size_t byte_size)
-{
-  if (protocol_ == ProtocolType::GRPC) {
-    RETURN_IF_TRITON_ERROR(client_.grpc_client_->RegisterSystemSharedMemory(
-        name, key, byte_size, 0 /* offset */, *http_headers_));
-
-  } else {
-    RETURN_IF_TRITON_ERROR(client_.http_client_->RegisterSystemSharedMemory(
-        name, key, byte_size, 0 /* offset */, *http_headers_));
-  }
-
-  return Error::Success;
-}
-
-Error
-TritonClientBackend::RegisterCudaSharedMemory(
-    const std::string& name, const cudaIpcMemHandle_t& handle,
-    const size_t byte_size)
-{
-  if (protocol_ == ProtocolType::GRPC) {
-    RETURN_IF_TRITON_ERROR(client_.grpc_client_->RegisterCudaSharedMemory(
-        name, handle, 0 /*device id*/, byte_size, *http_headers_));
-
-  } else {
-    RETURN_IF_TRITON_ERROR(client_.http_client_->RegisterCudaSharedMemory(
-        name, handle, 0 /*device id*/, byte_size, *http_headers_));
-  }
-
-  return Error::Success;
-}
-
-//
-// Shared Memory Utilities
-//
-Error
-TritonClientBackend::CreateSharedMemoryRegion(
-    std::string shm_key, size_t byte_size, int* shm_fd)
-{
-  RETURN_IF_TRITON_ERROR(
-      tc::CreateSharedMemoryRegion(shm_key, byte_size, shm_fd));
-
-  return Error::Success;
-}
-
-
-Error
-TritonClientBackend::MapSharedMemory(
-    int shm_fd, size_t offset, size_t byte_size, void** shm_addr)
-{
-  RETURN_IF_TRITON_ERROR(
-      tc::MapSharedMemory(shm_fd, offset, byte_size, shm_addr));
-
-  return Error::Success;
-}
-
-
-Error
-TritonClientBackend::CloseSharedMemory(int shm_fd)
-{
-  RETURN_IF_TRITON_ERROR(tc::CloseSharedMemory(shm_fd));
-
-  return Error::Success;
-}
-
-Error
-TritonClientBackend::UnlinkSharedMemoryRegion(std::string shm_key)
-{
-  RETURN_IF_TRITON_ERROR(tc::UnlinkSharedMemoryRegion(shm_key));
-
-  return Error::Success;
-}
-
-Error
-TritonClientBackend::UnmapSharedMemory(void* shm_addr, size_t byte_size)
-{
-  RETURN_IF_TRITON_ERROR(tc::UnmapSharedMemory(shm_addr, byte_size));
-
-  return Error::Success;
-}
-
-void
-TritonClientBackend::ParseInferInputToTriton(
-    const std::vector<InferInput*>& inputs,
-    std::vector<tc::InferInput*>* triton_inputs)
-{
-  for (const auto input : inputs) {
-    tc::InferInput* triton_input{dynamic_cast<TritonInferInput*>(input)->Get()};
-    triton_input->SetBinaryData(input_tensor_format_ == TensorFormat::BINARY);
-    triton_inputs->push_back(triton_input);
-  }
-}
-
-void
-TritonClientBackend::ParseInferRequestedOutputToTriton(
-    const std::vector<const InferRequestedOutput*>& outputs,
-    std::vector<const tc::InferRequestedOutput*>* triton_outputs)
-{
-  for (const auto output : outputs) {
-    tc::InferRequestedOutput* triton_output{
-        dynamic_cast<const TritonInferRequestedOutput*>(output)->Get()};
-    triton_output->SetBinaryData(input_tensor_format_ == TensorFormat::BINARY);
-    triton_outputs->push_back(triton_output);
-  }
-}
-
-void
-TritonClientBackend::ParseInferOptionsToTriton(
-    const InferOptions& options, tc::InferOptions* triton_options)
-{
-  triton_options->model_version_ = options.model_version_;
-  triton_options->request_id_ = options.request_id_;
-  if ((options.sequence_id_ != 0) || (options.sequence_id_str_ != "")) {
-    if (options.sequence_id_ != 0) {
-      triton_options->sequence_id_ = options.sequence_id_;
-    } else {
-      triton_options->sequence_id_str_ = options.sequence_id_str_;
-    }
-    triton_options->sequence_start_ = options.sequence_start_;
-    triton_options->sequence_end_ = options.sequence_end_;
-  }
-  triton_options->triton_enable_empty_final_response_ =
-      options.triton_enable_empty_final_response_;
-
-  for (auto& map_entry : options.request_parameters_) {
-    auto rp = tc::RequestParameter();
-    rp.name = map_entry.second.name;
-    rp.value = map_entry.second.value;
-    rp.type = map_entry.second.type;
-    triton_options->request_parameters[map_entry.first] = rp;
-  }
-}
-
-
-void
-TritonClientBackend::ParseStatistics(
-    const inference::ModelStatisticsResponse& infer_stat,
-    std::map<ModelIdentifier, ModelStatistics>* model_stats)
-{
-  model_stats->clear();
-  for (const auto& this_stat : infer_stat.model_stats()) {
-    auto it = model_stats
-                  ->emplace(
-                      std::make_pair(this_stat.name(), this_stat.version()),
-                      ModelStatistics())
-                  .first;
-    it->second.inference_count_ = this_stat.inference_count();
-    it->second.execution_count_ = this_stat.execution_count();
-    it->second.success_count_ = this_stat.inference_stats().success().count();
-    it->second.queue_count_ = this_stat.inference_stats().queue().count();
-    it->second.compute_input_count_ =
-        this_stat.inference_stats().compute_input().count();
-    it->second.compute_infer_count_ =
-        this_stat.inference_stats().compute_infer().count();
-    it->second.compute_output_count_ =
-        this_stat.inference_stats().compute_output().count();
-    it->second.cumm_time_ns_ = this_stat.inference_stats().success().ns();
-    it->second.queue_time_ns_ = this_stat.inference_stats().queue().ns();
-    it->second.compute_input_time_ns_ =
-        this_stat.inference_stats().compute_input().ns();
-    it->second.compute_infer_time_ns_ =
-        this_stat.inference_stats().compute_infer().ns();
-    it->second.compute_output_time_ns_ =
-        this_stat.inference_stats().compute_output().ns();
-    it->second.cache_hit_count_ =
-        this_stat.inference_stats().cache_hit().count();
-    it->second.cache_hit_time_ns_ =
-        this_stat.inference_stats().cache_hit().ns();
-    it->second.cache_miss_count_ =
-        this_stat.inference_stats().cache_miss().count();
-    it->second.cache_miss_time_ns_ =
-        this_stat.inference_stats().cache_miss().ns();
-  }
-}
-
-void
-TritonClientBackend::ParseStatistics(
-    const rapidjson::Document& infer_stat,
-    std::map<ModelIdentifier, ModelStatistics>* model_stats)
-{
-  model_stats->clear();
-  for (const auto& this_stat : infer_stat["model_stats"].GetArray()) {
-    auto it = model_stats
-                  ->emplace(
-                      std::make_pair(
-                          this_stat["name"].GetString(),
-                          this_stat["version"].GetString()),
-                      ModelStatistics())
-                  .first;
-    it->second.inference_count_ = this_stat["inference_count"].GetUint64();
-    it->second.execution_count_ = this_stat["execution_count"].GetUint64();
-    it->second.success_count_ =
-        this_stat["inference_stats"]["success"]["count"].GetUint64();
-    it->second.queue_count_ =
-        this_stat["inference_stats"]["queue"]["count"].GetUint64();
-    it->second.compute_input_count_ =
-        this_stat["inference_stats"]["compute_input"]["count"].GetUint64();
-    it->second.compute_infer_count_ =
-        this_stat["inference_stats"]["compute_infer"]["count"].GetUint64();
-    it->second.compute_output_count_ =
-        this_stat["inference_stats"]["compute_output"]["count"].GetUint64();
-    it->second.cumm_time_ns_ =
-        this_stat["inference_stats"]["success"]["ns"].GetUint64();
-    it->second.queue_time_ns_ =
-        this_stat["inference_stats"]["queue"]["ns"].GetUint64();
-    it->second.compute_input_time_ns_ =
-        this_stat["inference_stats"]["compute_input"]["ns"].GetUint64();
-    it->second.compute_infer_time_ns_ =
-        this_stat["inference_stats"]["compute_infer"]["ns"].GetUint64();
-    it->second.compute_output_time_ns_ =
-        this_stat["inference_stats"]["compute_output"]["ns"].GetUint64();
-    it->second.cache_hit_count_ =
-        this_stat["inference_stats"]["cache_hit"]["count"].GetUint64();
-    it->second.cache_hit_time_ns_ =
-        this_stat["inference_stats"]["cache_hit"]["ns"].GetUint64();
-    it->second.cache_miss_count_ =
-        this_stat["inference_stats"]["cache_miss"]["count"].GetUint64();
-    it->second.cache_miss_time_ns_ =
-        this_stat["inference_stats"]["cache_miss"]["ns"].GetUint64();
-  }
-}
-
-void
-TritonClientBackend::ParseInferStat(
-    const tc::InferStat& triton_infer_stat, InferStat* infer_stat)
-{
-  infer_stat->completed_request_count =
-      triton_infer_stat.completed_request_count;
-  infer_stat->cumulative_total_request_time_ns =
-      triton_infer_stat.cumulative_total_request_time_ns;
-  infer_stat->cumulative_send_time_ns =
-      triton_infer_stat.cumulative_send_time_ns;
-  infer_stat->cumulative_receive_time_ns =
-      triton_infer_stat.cumulative_receive_time_ns;
-}
-
-//==============================================================================
-
-Error
-TritonInferInput::Create(
-    InferInput** infer_input, const std::string& name,
-    const std::vector<int64_t>& dims, const std::string& datatype)
-{
-  TritonInferInput* local_infer_input = new TritonInferInput(name, datatype);
-
-  tc::InferInput* triton_infer_input;
-  RETURN_IF_TRITON_ERROR(
-      tc::InferInput::Create(&triton_infer_input, name, dims, datatype));
-  local_infer_input->input_.reset(triton_infer_input);
-
-  *infer_input = local_infer_input;
-  return Error::Success;
-}
-
-const std::vector<int64_t>&
-TritonInferInput::Shape() const
-{
-  return input_->Shape();
-}
-
-Error
-TritonInferInput::SetShape(const std::vector<int64_t>& shape)
-{
-  RETURN_IF_TRITON_ERROR(input_->SetShape(shape));
-  return Error::Success;
-}
-
-Error
-TritonInferInput::Reset()
-{
-  RETURN_IF_TRITON_ERROR(input_->Reset());
-  return Error::Success;
-}
-
-Error
-TritonInferInput::AppendRaw(const uint8_t* input, size_t input_byte_size)
-{
-  RETURN_IF_TRITON_ERROR(input_->AppendRaw(input, input_byte_size));
-  return Error::Success;
-}
-
-Error
-TritonInferInput::SetSharedMemory(
-    const std::string& name, size_t byte_size, size_t offset)
-{
-  RETURN_IF_TRITON_ERROR(input_->SetSharedMemory(name, byte_size, offset));
-  return Error::Success;
-}
-
-Error
-TritonInferInput::RawData(const uint8_t** buf, size_t* byte_size)
-{
-  RETURN_IF_TRITON_ERROR(input_->RawData(buf, byte_size));
-  return Error::Success;
-}
-
-TritonInferInput::TritonInferInput(
-    const std::string& name, const std::string& datatype)
-    : InferInput(BackendKind::TRITON, name, datatype)
-{
-}
-
-
-//==============================================================================
-
-Error
-TritonInferRequestedOutput::Create(
-    InferRequestedOutput** infer_output, const std::string& name,
-    const size_t class_count, const std::string& datatype)
-{
-  TritonInferRequestedOutput* local_infer_output =
-      new TritonInferRequestedOutput(name, datatype);
-
-  tc::InferRequestedOutput* triton_infer_output;
-  RETURN_IF_TRITON_ERROR(tc::InferRequestedOutput::Create(
-      &triton_infer_output, name, class_count, datatype));
-  local_infer_output->output_.reset(triton_infer_output);
-
-  *infer_output = local_infer_output;
-
-  return Error::Success;
-}
-
-Error
-TritonInferRequestedOutput::SetSharedMemory(
-    const std::string& region_name, const size_t byte_size, const size_t offset)
-{
-  RETURN_IF_TRITON_ERROR(
-      output_->SetSharedMemory(region_name, byte_size, offset));
-  return Error::Success;
-}
-
-
-TritonInferRequestedOutput::TritonInferRequestedOutput(
-    const std::string& name, const std::string& datatype)
-    : InferRequestedOutput(BackendKind::TRITON, name, datatype)
-{
-}
-
-//==============================================================================
-
-TritonInferResult::TritonInferResult(tc::InferResult* result)
-{
-  result_.reset(result);
-}
-
-Error
-TritonInferResult::Id(std::string* id) const
-{
-  RETURN_IF_TRITON_ERROR(result_->Id(id));
-  return Error::Success;
-}
-
-Error
-TritonInferResult::RequestStatus() const
-{
-  RETURN_IF_TRITON_ERROR(result_->RequestStatus());
-  return Error::Success;
-}
-
-Error
-TritonInferResult::RawData(
-    const std::string& output_name, const uint8_t** buf,
-    size_t* byte_size) const
-{
-  RETURN_IF_TRITON_ERROR(result_->RawData(output_name, buf, byte_size));
-  return Error::Success;
-}
-
-Error
-TritonInferResult::IsFinalResponse(bool* is_final_response) const
-{
-  RETURN_IF_TRITON_ERROR(result_->IsFinalResponse(is_final_response));
-  return Error::Success;
-}
-
-Error
-TritonInferResult::IsNullResponse(bool* is_null_response) const
-{
-  RETURN_IF_TRITON_ERROR(result_->IsNullResponse(is_null_response));
-  return Error::Success;
-}
-
-//==============================================================================
-
-}}}}  // namespace triton::perfanalyzer::clientbackend::tritonremote
diff --git a/src/c++/perf_analyzer/client_backend/triton/triton_client_backend.h b/src/c++/perf_analyzer/client_backend/triton/triton_client_backend.h
deleted file mode 100644
index fd48d32c2..000000000
--- a/src/c++/perf_analyzer/client_backend/triton/triton_client_backend.h
+++ /dev/null
@@ -1,344 +0,0 @@
-// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <cstdint>
-#include <map>
-#include <regex>
-#include <string>
-#include <type_traits>
-
-#include "../../constants.h"
-#include "../../metrics.h"
-#include "../../perf_utils.h"
-#include "../client_backend.h"
-#include "grpc_client.h"
-#include "http_client.h"
-#include "shm_utils.h"
-
-#define RETURN_IF_TRITON_ERROR(S)                          \
-  do {                                                     \
-    const tc::Error& status__ = (S);                       \
-    if (!status__.IsOk()) {                                \
-      return Error(status__.Message(), pa::GENERIC_ERROR); \
-    }                                                      \
-  } while (false)
-
-#define FAIL_IF_TRITON_ERR(X, MSG)                                 \
-  {                                                                \
-    const tc::Error err = (X);                                     \
-    if (!err.IsOk()) {                                             \
-      std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
-      exit(pa::GENERIC_ERROR);                                     \
-    }                                                              \
-  }
-
-namespace tc = triton::client;
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace tritonremote {
-
-#ifndef DOCTEST_CONFIG_DISABLE
-class TestTritonClientBackend;
-#endif
-
-//==============================================================================
-/// TritonClientBackend uses triton client C++ library to communicate with
-/// triton inference service.
-///
-class TritonClientBackend : public ClientBackend {
- public:
-  /// Create a triton client backend which can be used to interact with the
-  /// server.
-  /// \param url The inference server url and port.
-  /// \param protocol The protocol type used.
-  /// \param ssl_options The SSL options used with client backend.
-  /// \param http_headers Map of HTTP headers. The map key/value indicates
-  /// the header name/value.
-  /// \param verbose Enables the verbose mode.
-  /// \param metrics_url The inference server metrics url and port.
-  /// \param input_tensor_format The Triton inference request input tensor
-  /// format.
-  /// \param output_tensor_format The Triton inference response output tensor
-  /// format.
-  /// \param client_backend Returns a new TritonClientBackend object.
-  /// \return Error object indicating success or failure.
-  static Error Create(
-      const std::string& url, const ProtocolType protocol,
-      const SslOptionsBase& ssl_options,
-      const std::map<std::string, std::vector<std::string>> trace_options,
-      const grpc_compression_algorithm compression_algorithm,
-      std::shared_ptr<tc::Headers> http_headers, const bool verbose,
-      const std::string& metrics_url,
-      const cb::TensorFormat input_tensor_format,
-      const cb::TensorFormat output_tensor_format,
-      std::unique_ptr<ClientBackend>* client_backend);
-
-  /// See ClientBackend::ServerExtensions()
-  Error ServerExtensions(std::set<std::string>* server_extensions) override;
-
-  /// See ClientBackend::ModelMetadata()
-  Error ModelMetadata(
-      rapidjson::Document* model_metadata, const std::string& model_name,
-      const std::string& model_version) override;
-
-  /// See ClientBackend::ModelConfig()
-  Error ModelConfig(
-      rapidjson::Document* model_config, const std::string& model_name,
-      const std::string& model_version) override;
-
-  /// See ClientBackend::Infer()
-  Error Infer(
-      InferResult** result, const InferOptions& options,
-      const std::vector<InferInput*>& inputs,
-      const std::vector<const InferRequestedOutput*>& outputs) override;
-
-  /// See ClientBackend::AsyncInfer()
-  Error AsyncInfer(
-      OnCompleteFn callback, const InferOptions& options,
-      const std::vector<InferInput*>& inputs,
-      const std::vector<const InferRequestedOutput*>& outputs) override;
-
-  /// See ClientBackend::StartStream()
-  Error StartStream(OnCompleteFn callback, bool enable_stats) override;
-
-  /// See ClientBackend::AsyncStreamInfer()
-  Error AsyncStreamInfer(
-      const InferOptions& options, const std::vector<InferInput*>& inputs,
-      const std::vector<const InferRequestedOutput*>& outputs) override;
-
-  /// See ClientBackend::ClientInferStat()
-  Error ClientInferStat(InferStat* infer_stat) override;
-
-  /// See ClientBackend::ModelInferenceStatistics()
-  Error ModelInferenceStatistics(
-      std::map<ModelIdentifier, ModelStatistics>* model_stats,
-      const std::string& model_name = "",
-      const std::string& model_version = "") override;
-
-  /// See ClientBackend::Metrics()
-  Error Metrics(triton::perfanalyzer::Metrics& metrics) override;
-
-  /// See ClientBackend::UnregisterAllSharedMemory()
-  Error UnregisterAllSharedMemory() override;
-
-  /// See ClientBackend::RegisterSystemSharedMemory()
-  Error RegisterSystemSharedMemory(
-      const std::string& name, const std::string& key,
-      const size_t byte_size) override;
-
-  /// See ClientBackend::RegisterCudaSharedMemory()
-  Error RegisterCudaSharedMemory(
-      const std::string& name, const cudaIpcMemHandle_t& handle,
-      const size_t byte_size) override;
-
-  /// See ClientBackend::CreateSharedMemoryRegion()
-  Error CreateSharedMemoryRegion(
-      std::string shm_key, size_t byte_size, int* shm_fd) override;
-
-  /// See ClientBackend::MapSharedMemory()
-  Error MapSharedMemory(
-      int shm_fd, size_t offset, size_t byte_size, void** shm_addr) override;
-
-  /// See ClientBackend::CloseSharedMemory()
-  Error CloseSharedMemory(int shm_fd) override;
-
-  /// See ClientBackend::UnlinkSharedMemoryRegion()
-  Error UnlinkSharedMemoryRegion(std::string shm_key) override;
-
-  /// See ClientBackend::UnmapSharedMemory()
-  Error UnmapSharedMemory(void* shm_addr, size_t byte_size) override;
-
- private:
-  TritonClientBackend(
-      const ProtocolType protocol,
-      const grpc_compression_algorithm compression_algorithm,
-      std::shared_ptr<tc::Headers> http_headers, const std::string& metrics_url,
-      const cb::TensorFormat input_tensor_format,
-      const cb::TensorFormat output_tensor_format)
-      : ClientBackend(BackendKind::TRITON), protocol_(protocol),
-        compression_algorithm_(compression_algorithm),
-        http_headers_(http_headers), metrics_url_(metrics_url),
-        input_tensor_format_(input_tensor_format),
-        output_tensor_format_(output_tensor_format)
-  {
-  }
-
-  void ParseInferInputToTriton(
-      const std::vector<InferInput*>& inputs,
-      std::vector<tc::InferInput*>* triton_inputs);
-  void ParseInferRequestedOutputToTriton(
-      const std::vector<const InferRequestedOutput*>& outputs,
-      std::vector<const tc::InferRequestedOutput*>* triton_outputs);
-  void ParseInferOptionsToTriton(
-      const InferOptions& options, tc::InferOptions* triton_options);
-  void ParseStatistics(
-      const inference::ModelStatisticsResponse& infer_stat,
-      std::map<ModelIdentifier, ModelStatistics>* model_stats);
-  void ParseStatistics(
-      const rapidjson::Document& infer_stat,
-      std::map<ModelIdentifier, ModelStatistics>* model_stats);
-  void ParseInferStat(
-      const tc::InferStat& triton_infer_stat, InferStat* infer_stat);
-  void AccessMetricsEndpoint(std::string& metrics_endpoint_text);
-  void ParseAndStoreMetrics(
-      const std::string& metrics_endpoint_text,
-      triton::perfanalyzer::Metrics& metrics);
-
-  template <typename T>
-  void ParseAndStoreMetric(
-      const std::string& metrics_endpoint_text, const std::string metric_id,
-      std::map<std::string, T>& metric_per_gpu)
-  {
-    std::regex metric_regex(
-        R"(\n)" + metric_id + R"(\{gpu_uuid\=\"([^"]+)\"\} (\d+\.?\d*))");
-    std::sregex_iterator metric_regex_match_begin{std::sregex_iterator(
-        metrics_endpoint_text.begin(), metrics_endpoint_text.end(),
-        metric_regex)};
-
-    for (std::sregex_iterator i{metric_regex_match_begin};
-         i != std::sregex_iterator(); i++) {
-      const std::smatch& match{*i};
-      const std::string& gpu_uuid{match[1].str()};
-      T metric{};
-      if (std::is_same<T, double>::value) {
-        metric = std::stod(match[2].str());
-      } else if (std::is_same<T, uint64_t>::value) {
-        metric = static_cast<uint64_t>(std::stod(match[2].str()));
-      }
-      metric_per_gpu[gpu_uuid] = metric;
-    }
-  }
-
-  /// Union to represent the underlying triton client belonging to one of
-  /// the protocols
-  union TritonClient {
-    TritonClient()
-    {
-      new (&http_client_) std::unique_ptr<tc::InferenceServerHttpClient>{};
-    }
-    ~TritonClient() {}
-
-    std::unique_ptr<tc::InferenceServerHttpClient> http_client_;
-    std::unique_ptr<tc::InferenceServerGrpcClient> grpc_client_;
-  } client_;
-
-  const ProtocolType protocol_{UNKNOWN};
-  const grpc_compression_algorithm compression_algorithm_{GRPC_COMPRESS_NONE};
-  std::shared_ptr<tc::Headers> http_headers_;
-  const std::string metrics_url_{""};
-  const cb::TensorFormat input_tensor_format_{cb::TensorFormat::UNKNOWN};
-  const cb::TensorFormat output_tensor_format_{cb::TensorFormat::UNKNOWN};
-
-#ifndef DOCTEST_CONFIG_DISABLE
-  friend TestTritonClientBackend;
-
- public:
-  TritonClientBackend() = default;
-#endif
-};
-
-//==============================================================
-/// TritonInferInput is a wrapper around InferInput object of
-/// triton client library.
-///
-class TritonInferInput : public InferInput {
- public:
-  static Error Create(
-      InferInput** infer_input, const std::string& name,
-      const std::vector<int64_t>& dims, const std::string& datatype);
-  /// Returns the raw InferInput object required by triton client library.
-  tc::InferInput* Get() const { return input_.get(); }
-  /// See InferInput::Shape()
-  const std::vector<int64_t>& Shape() const override;
-  /// See InferInput::SetShape()
-  Error SetShape(const std::vector<int64_t>& shape) override;
-  /// See InferInput::Reset()
-  Error Reset() override;
-  /// See InferInput::AppendRaw()
-  Error AppendRaw(const uint8_t* input, size_t input_byte_size) override;
-  /// See InferInput::SetSharedMemory()
-  Error SetSharedMemory(
-      const std::string& name, size_t byte_size, size_t offset = 0) override;
-  /// See InferInput::RawData()
-  Error RawData(const uint8_t** buf, size_t* byte_size) override;
-
- private:
-  explicit TritonInferInput(
-      const std::string& name, const std::string& datatype);
-
-  std::unique_ptr<tc::InferInput> input_;
-};
-
-//==============================================================
-/// TritonInferRequestedOutput is a wrapper around
-/// InferRequestedOutput object of triton client library.
-///
-class TritonInferRequestedOutput : public InferRequestedOutput {
- public:
-  static Error Create(
-      InferRequestedOutput** infer_output, const std::string& name,
-      const size_t class_count = 0, const std::string& datatype = "");
-  /// Returns the raw InferRequestedOutput object required by triton client
-  /// library.
-  tc::InferRequestedOutput* Get() const { return output_.get(); }
-  // See InferRequestedOutput::SetSharedMemory()
-  Error SetSharedMemory(
-      const std::string& region_name, const size_t byte_size,
-      const size_t offset = 0) override;
-
- private:
-  explicit TritonInferRequestedOutput(
-      const std::string& name, const std::string& datatype);
-
-  std::unique_ptr<tc::InferRequestedOutput> output_;
-};
-
-//==============================================================
-/// TritonInferResult is a wrapper around InferResult object of
-/// triton client library.
-///
-class TritonInferResult : public InferResult {
- public:
-  explicit TritonInferResult(tc::InferResult* result);
-  /// See InferResult::Id()
-  Error Id(std::string* id) const override;
-  /// See InferResult::RequestStatus()
-  Error RequestStatus() const override;
-  /// See InferResult::RawData()
-  Error RawData(
-      const std::string& output_name, const uint8_t** buf,
-      size_t* byte_size) const override;
-  /// See InferResult::IsFinalResponse()
-  Error IsFinalResponse(bool* is_final_response) const override;
-  /// See InferResult::IsNullResponse()
-  Error IsNullResponse(bool* is_null_response) const override;
-
- private:
-  std::unique_ptr<tc::InferResult> result_;
-};
-
-}}}}  // namespace triton::perfanalyzer::clientbackend::tritonremote
diff --git a/src/c++/perf_analyzer/client_backend/triton_c_api/CMakeLists.txt b/src/c++/perf_analyzer/client_backend/triton_c_api/CMakeLists.txt
deleted file mode 100644
index 5e21b7449..000000000
--- a/src/c++/perf_analyzer/client_backend/triton_c_api/CMakeLists.txt
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-cmake_minimum_required (VERSION 3.18)
-
-set(
-    TRITON_C_API_CLIENT_BACKEND_SRCS
-    triton_c_api_backend.cc
-    shared_library.cc
-    triton_loader.cc
-    shared_memory_manager.cc
-    scoped_defer.cc
-)
-
-set(
-    TRITON_C_API_CLIENT_BACKEND_HDRS
-    triton_c_api_backend.h
-    shared_library.h
-    shared_memory_manager.h
-    triton_loader.h
-    c_api_infer_results.h
-    scoped_defer.h
-)
-
-add_library(
-    triton-c-api-backend-library  EXCLUDE_FROM_ALL OBJECT
-    ${TRITON_C_API_CLIENT_BACKEND_SRCS}
-    ${TRITON_C_API_CLIENT_BACKEND_HDRS}
-)
-
-target_link_libraries(
-  triton-c-api-backend-library
-  grpcclient_static
-  httpclient_static
-  triton-core-serverapi   # from repo-core
-)
diff --git a/src/c++/perf_analyzer/client_backend/triton_c_api/c_api_infer_results.h b/src/c++/perf_analyzer/client_backend/triton_c_api/c_api_infer_results.h
deleted file mode 100644
index 440a94c0b..000000000
--- a/src/c++/perf_analyzer/client_backend/triton_c_api/c_api_infer_results.h
+++ /dev/null
@@ -1,62 +0,0 @@
-// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "common.h"
-
-namespace tc = triton::client;
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace tritoncapi {
-
-/// This class is used to pass inference status and id to upstream backend.
-/// Created so that the API is similar to `triton, torchserver,
-/// tensorflow_serving` APIs
-class InferResult {
- public:
-  static void Create(
-      InferResult** infer_result, const tc::Error& err, const std::string& id)
-  {
-    *infer_result = reinterpret_cast<InferResult*>(new InferResult(err, id));
-  }
-
-  tc::Error Id(std::string* id) const
-  {
-    *id = request_id_;
-    return tc::Error::Success;
-  }
-  tc::Error RequestStatus() const { return status_; }
-
- private:
-  InferResult(const tc::Error& err, const std::string& id)
-      : status_(err), request_id_(id)
-  {
-  }
-
-  std::string request_id_;
-  tc::Error status_;
-};
-}}}}  // namespace triton::perfanalyzer::clientbackend::tritoncapi
diff --git a/src/c++/perf_analyzer/client_backend/triton_c_api/scoped_defer.cc b/src/c++/perf_analyzer/client_backend/triton_c_api/scoped_defer.cc
deleted file mode 100644
index ff25eb0f4..000000000
--- a/src/c++/perf_analyzer/client_backend/triton_c_api/scoped_defer.cc
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "scoped_defer.h"
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace tritoncapi {
-ScopedDefer::ScopedDefer(std::function<void()> task)
-{
-  task_ = task;
-  done_ = false;
-}
-
-void
-ScopedDefer::Complete()
-{
-  if (!done_) {
-    task_();
-    done_ = true;
-  }
-}
-
-ScopedDefer::~ScopedDefer()
-{
-  if (!done_) {
-    task_();
-  }
-}
-
-}}}}  // namespace triton::perfanalyzer::clientbackend::tritoncapi
diff --git a/src/c++/perf_analyzer/client_backend/triton_c_api/scoped_defer.h b/src/c++/perf_analyzer/client_backend/triton_c_api/scoped_defer.h
deleted file mode 100644
index c5fcc7ea0..000000000
--- a/src/c++/perf_analyzer/client_backend/triton_c_api/scoped_defer.h
+++ /dev/null
@@ -1,44 +0,0 @@
-// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#pragma once
-#include <functional>
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace tritoncapi {
-
-class ScopedDefer {
- public:
-  ScopedDefer(std::function<void()> task);
-  ~ScopedDefer();
-  void Complete();
-
- private:
-  std::function<void()> task_;
-  bool done_;
-};
-
-}}}}  // namespace triton::perfanalyzer::clientbackend::tritoncapi
diff --git a/src/c++/perf_analyzer/client_backend/triton_c_api/shared_library.cc b/src/c++/perf_analyzer/client_backend/triton_c_api/shared_library.cc
deleted file mode 100644
index 8c06860e6..000000000
--- a/src/c++/perf_analyzer/client_backend/triton_c_api/shared_library.cc
+++ /dev/null
@@ -1,91 +0,0 @@
-// Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#include "shared_library.h"
-
-#include <dlfcn.h>
-
-#include <iostream>
-
-/// FIXME: Duplication of server/src/core/shared_library.cc
-/// Separate shared_library to common library and delete this
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace tritoncapi {
-
-Error
-OpenLibraryHandle(const std::string& path, void** handle)
-{
-  *handle = dlopen(path.c_str(), RTLD_NOW | RTLD_LOCAL);
-  if (*handle == nullptr) {
-    return Error("unable to load backend library: " + std::string(dlerror()));
-  }
-  return Error::Success;
-}
-
-Error
-CloseLibraryHandle(void* handle)
-{
-  if (handle != nullptr) {
-    if (dlclose(handle) != 0) {
-      return Error(
-          "unable to unload backend library: " + std::string(dlerror()));
-    }
-  }
-  return Error::Success;
-}
-
-Error
-GetEntrypoint(
-    void* handle, const std::string& name, const bool optional, void** befn)
-{
-  *befn = nullptr;
-  dlerror();
-  void* fn = dlsym(handle, name.c_str());
-  const char* dlsym_error = dlerror();
-  if (dlsym_error != nullptr) {
-    if (optional) {
-      return Error::Success;
-    }
-
-    std::string errstr(dlsym_error);  // need copy as dlclose overwrites
-    return Error(
-        "unable to find required entrypoint '" + name +
-        "' in backend library: " + errstr);
-  }
-
-  if (fn == nullptr) {
-    if (optional) {
-      return Error::Success;
-    }
-
-    return Error(
-        "unable to find required entrypoint '" + name + "' in backend library");
-  }
-
-  *befn = fn;
-  return Error::Success;
-}
-}}}}  // namespace triton::perfanalyzer::clientbackend::tritoncapi
diff --git a/src/c++/perf_analyzer/client_backend/triton_c_api/shared_library.h b/src/c++/perf_analyzer/client_backend/triton_c_api/shared_library.h
deleted file mode 100644
index dbc49e4da..000000000
--- a/src/c++/perf_analyzer/client_backend/triton_c_api/shared_library.h
+++ /dev/null
@@ -1,44 +0,0 @@
-// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <string>
-
-#include "../client_backend.h"
-/// FIXME: Duplication of server/src/core/shared_library.h
-/// Separate shared_library to common library and delete this
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace tritoncapi {
-
-Error OpenLibraryHandle(const std::string& path, void** handle);
-
-Error CloseLibraryHandle(void* handle);
-
-Error GetEntrypoint(
-    void* handle, const std::string& name, const bool optional, void** befn);
-
-}}}}  // namespace triton::perfanalyzer::clientbackend::tritoncapi
diff --git a/src/c++/perf_analyzer/client_backend/triton_c_api/shared_memory_manager.cc b/src/c++/perf_analyzer/client_backend/triton_c_api/shared_memory_manager.cc
deleted file mode 100644
index 0658daedd..000000000
--- a/src/c++/perf_analyzer/client_backend/triton_c_api/shared_memory_manager.cc
+++ /dev/null
@@ -1,208 +0,0 @@
-// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "shared_memory_manager.h"
-
-#include <errno.h>
-#include <fcntl.h>
-#include <sys/mman.h>
-#include <unistd.h>
-
-#include "common.h"
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace tritoncapi {
-
-SharedMemoryManager::~SharedMemoryManager()
-{
-  UnregisterAll(TRITONSERVER_MEMORY_CPU);
-  UnregisterAll(TRITONSERVER_MEMORY_GPU);
-}
-
-#ifdef TRITON_ENABLE_GPU
-Error
-SharedMemoryManager::RegisterCUDAMemory(
-    const std::string& name, void* dev_ptr, const size_t byte_size,
-    const int device_id)
-{
-  // Serialize all operations that write/read current shared memory regions
-  std::lock_guard<std::mutex> lock(mu_);
-
-  // If name is already in shared_memory_map_ then return error saying already
-  // registered
-  if (shared_memory_map_.find(name) != shared_memory_map_.end()) {
-    return Error(
-        std::string("shared memory region '" + name + "' already in manager"));
-  }
-
-  shared_memory_map_.insert(std::make_pair(
-      name, std::unique_ptr<MemoryInfo>(new MemoryInfo(
-                name, 0 /* offset */, byte_size, dev_ptr,
-                TRITONSERVER_MEMORY_GPU, device_id))));
-  return Error::Success;
-}
-#endif  // TRITON_ENABLE_GPU
-
-Error
-SharedMemoryManager::RegisterSystemMemory(
-    const std::string& name, void* ptr, const size_t byte_size)
-{
-  // Serialize all operations that write/read current shared memory regions
-  std::lock_guard<std::mutex> lock(mu_);
-
-  // If name is already in shared_memory_map_ then return error saying already
-  // registered
-  if (shared_memory_map_.find(name) != shared_memory_map_.end()) {
-    return Error("shared memory region '" + name + "' already in manager");
-  }
-
-  shared_memory_map_.insert(std::make_pair(
-      name, std::make_unique<MemoryInfo>(
-                name, 0 /* offset */, byte_size, ptr, TRITONSERVER_MEMORY_CPU,
-                0 /* device id */)));
-
-  return Error::Success;
-}
-
-Error
-SharedMemoryManager::GetMemoryInfo(
-    const std::string& name, size_t offset, size_t byte_size,
-    void** shm_mapped_addr, TRITONSERVER_MemoryType* memory_type,
-    int64_t* device_id)
-{
-  // protect shared_memory_map_ from concurrent access
-  std::lock_guard<std::mutex> lock(mu_);
-
-  auto it = shared_memory_map_.find(name);
-  if (it == shared_memory_map_.end()) {
-    return Error(
-        std::string("Unable to find shared memory region: '" + name + "'"));
-  }
-
-  // validate offset
-  size_t shm_region_end = 0;
-  if (it->second->kind_ == TRITONSERVER_MEMORY_CPU) {
-    shm_region_end = it->second->offset_;
-  }
-  if (it->second->byte_size_ > 0) {
-    shm_region_end += it->second->byte_size_ - 1;
-  }
-  if (offset > shm_region_end) {
-    return Error(
-        std::string("Invalid offset for shared memory region: '" + name + "'")
-            .c_str());
-  }
-  // validate byte_size + offset is within memory bounds
-  size_t total_req_shm = offset + byte_size - 1;
-  if (total_req_shm > shm_region_end) {
-    return Error(std::string(
-                     "Invalid offset + byte size for shared memory region: '" +
-                     name + "'")
-                     .c_str());
-  }
-
-  if (it->second->kind_ == TRITONSERVER_MEMORY_CPU) {
-    *shm_mapped_addr = (void*)((uint8_t*)it->second->mapped_addr_ +
-                               it->second->offset_ + offset);
-  } else {
-    *shm_mapped_addr = (void*)((uint8_t*)it->second->mapped_addr_ + offset);
-  }
-
-  *memory_type = it->second->kind_;
-  *device_id = it->second->device_id_;
-
-  return Error::Success;
-}
-
-
-Error
-SharedMemoryManager::Unregister(
-    const std::string& name, TRITONSERVER_MemoryType memory_type)
-{
-  // Serialize all operations that write/read current shared memory regions
-  std::lock_guard<std::mutex> lock(mu_);
-
-  return UnregisterHelper(name, memory_type);
-}
-
-Error
-SharedMemoryManager::UnregisterAll(TRITONSERVER_MemoryType memory_type)
-{
-  // Serialize all operations that write/read current shared memory regions
-  std::lock_guard<std::mutex> lock(mu_);
-  std::string error_message = "Failed to unregister the following ";
-  std::vector<std::string> unregister_fails;
-
-  if (memory_type == TRITONSERVER_MEMORY_CPU) {
-    error_message += "system shared memory regions: ";
-    for (auto& it : shared_memory_map_) {
-      if (it.second->kind_ == TRITONSERVER_MEMORY_CPU) {
-        Error err = UnregisterHelper(it.first, memory_type);
-        if (!err.IsOk()) {
-          unregister_fails.push_back(it.first);
-        }
-      }
-    }
-  } else if (memory_type == TRITONSERVER_MEMORY_GPU) {
-    error_message += "cuda shared memory regions: ";
-    for (auto& it : shared_memory_map_) {
-      if (it.second->kind_ == TRITONSERVER_MEMORY_GPU) {
-        Error err = UnregisterHelper(it.first, memory_type);
-        if (!err.IsOk()) {
-          unregister_fails.push_back(it.first);
-        }
-      }
-    }
-  }
-
-  if (!unregister_fails.empty()) {
-    for (auto unreg_fail : unregister_fails) {
-      error_message += unreg_fail + " ,";
-    }
-    return Error(error_message);
-  }
-
-  return Error::Success;
-}
-
-Error
-SharedMemoryManager::UnregisterHelper(
-    const std::string& name, TRITONSERVER_MemoryType memory_type)
-{
-  // Must hold the lock on register_mu_ while calling this function.
-  auto it = shared_memory_map_.find(name);
-
-  if (it == shared_memory_map_.end()) {
-    return Error("Shared memory region " + name + " doesn't exist.");
-  }
-
-  // Remove region information from shared_memory_map_
-  shared_memory_map_.erase(it);
-
-  return Error::Success;
-}
-
-}}}}  // namespace triton::perfanalyzer::clientbackend::tritoncapi
diff --git a/src/c++/perf_analyzer/client_backend/triton_c_api/shared_memory_manager.h b/src/c++/perf_analyzer/client_backend/triton_c_api/shared_memory_manager.h
deleted file mode 100644
index 6b2082c44..000000000
--- a/src/c++/perf_analyzer/client_backend/triton_c_api/shared_memory_manager.h
+++ /dev/null
@@ -1,141 +0,0 @@
-// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <triton/core/tritonserver.h>
-
-#include <cstring>
-#include <map>
-#include <memory>
-#include <mutex>
-
-#include "../client_backend.h"
-
-#ifdef TRITON_ENABLE_GPU
-#include <cuda_runtime_api.h>
-#endif  // TRITON_ENABLE_GPU
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace tritoncapi {
-
-class SharedMemoryManager {
- public:
-  SharedMemoryManager() = default;
-  ~SharedMemoryManager();
-
-#ifdef TRITON_ENABLE_GPU
-  /// Add a memory block representing memory in CUDA (GPU) memory
-  /// to the manager. Return an Error if a memory block of the same name
-  /// already exists in the manager.
-  /// \param name The name of the memory block.
-  /// \param dev_ptr The device pointer
-  /// \param byte_size The size, in bytes of the block.
-  /// \param device id The GPU number the memory region is in.
-  /// \return an Error indicating success or failure.
-  Error RegisterCUDAMemory(
-      const std::string& name, void* dev_ptr, const size_t byte_size,
-      const int device_id);
-#endif  // TRITON_ENABLE_GPU
-
-  /// Add a system memory block to the manager.
-  /// Return an Error if a shared memory block of the same name
-  /// already exists in the manager.
-  /// \param name The name of the memory block.
-  /// \param ptr The device pointer
-  /// \param byte_size The size, in bytes of the block.
-  /// \return an Error indicating success or failure.
-  Error RegisterSystemMemory(
-      const std::string& name, void* ptr, const size_t byte_size);
-
-  /// Get the access information for the shared memory block
-  /// with the specified name. Return an Error
-  /// if named block doesn't exist.
-  /// \param name The name of the shared memory block to get.
-  /// \param offset The offset in the block
-  /// \param byte_size The byte size to request for the shm region
-  /// \param shm_mapped_addr Returns the pointer to the shared
-  /// memory block with the specified name and offset
-  /// \param memory_type Returns the type of the memory
-  /// \param device_id Returns the device id associated with the
-  /// memory block
-  /// \return an Error indicating success or failure.
-  Error GetMemoryInfo(
-      const std::string& name, size_t offset, size_t byte_size,
-      void** shm_mapped_addr, TRITONSERVER_MemoryType* memory_type,
-      int64_t* device_id);
-
-  /// Removes the named shared memory block of the specified type from
-  /// the manager. Any future attempt to get the details of this block
-  /// will result in an array till another block with the same name is
-  /// added to the manager.
-  /// \param name The name of the shared memory block to remove.
-  /// \param memory_type The type of memory to unregister.
-  /// \return an Error indicating success or failure.
-  Error Unregister(
-      const std::string& name, TRITONSERVER_MemoryType memory_type);
-
-  /// Unregister all shared memory blocks of specified type from the manager.
-  /// \param memory_type The type of memory to unregister.
-  /// \return an Error indicating success or failure.
-  Error UnregisterAll(TRITONSERVER_MemoryType memory_type);
-
- private:
-  /// A helper function to remove the named shared memory blocks of
-  /// specified type
-  Error UnregisterHelper(
-      const std::string& name, TRITONSERVER_MemoryType memory_type);
-
-  /// A struct that records the shared memory regions registered by the shared
-  /// memory manager.
-  struct MemoryInfo {
-    MemoryInfo(
-        const std::string& name, const size_t offset, const size_t byte_size,
-        void* mapped_addr, const TRITONSERVER_MemoryType kind,
-        const int64_t device_id)
-        : name_(name), offset_(offset), byte_size_(byte_size),
-          mapped_addr_(mapped_addr), kind_(kind), device_id_(device_id)
-    {
-    }
-
-    std::string name_;
-    size_t offset_;
-    size_t byte_size_;
-    void* mapped_addr_;
-    TRITONSERVER_MemoryType kind_;
-    int64_t device_id_;
-  };
-
-  using SharedMemoryStateMap =
-      std::map<std::string, std::unique_ptr<MemoryInfo>>;
-
-  // A map between the name and the details of the associated
-  // shared memory block
-  SharedMemoryStateMap shared_memory_map_;
-
-  // A mutex to protect the concurrent access to shared_memory_map_
-  std::mutex mu_;
-};
-}}}}  // namespace triton::perfanalyzer::clientbackend::tritoncapi
diff --git a/src/c++/perf_analyzer/client_backend/triton_c_api/triton_c_api_backend.cc b/src/c++/perf_analyzer/client_backend/triton_c_api/triton_c_api_backend.cc
deleted file mode 100644
index e97f1ea80..000000000
--- a/src/c++/perf_analyzer/client_backend/triton_c_api/triton_c_api_backend.cc
+++ /dev/null
@@ -1,401 +0,0 @@
-// Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "triton_c_api_backend.h"
-
-#include "c_api_infer_results.h"
-#include "json_utils.h"
-#include "triton_loader.h"
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace tritoncapi {
-
-//==============================================================================
-
-Error
-TritonCApiClientBackend::Create(
-    const std::string& triton_server_path,
-    const std::string& model_repository_path, const bool verbose,
-    std::unique_ptr<ClientBackend>* client_backend)
-{
-  if (triton_server_path.empty()) {
-    return Error(
-        "--triton-server-path should not be empty when using "
-        "service-kind=triton_c_api.");
-  }
-
-  if (model_repository_path.empty()) {
-    return Error(
-        "--model-repository should not be empty when using "
-        "service-kind=triton_c_api.");
-  }
-
-  std::unique_ptr<TritonCApiClientBackend> triton_client_backend(
-      new TritonCApiClientBackend());
-  RETURN_IF_ERROR(
-      TritonLoader::Create(triton_server_path, model_repository_path, verbose));
-  *client_backend = std::move(triton_client_backend);
-  return Error::Success;
-}
-
-Error
-TritonCApiClientBackend::ServerExtensions(std::set<std::string>* extensions)
-{
-  rapidjson::Document server_metadata_json;
-  RETURN_IF_ERROR(triton_loader_->ServerMetaData(&server_metadata_json));
-  for (const auto& extension : server_metadata_json["extensions"].GetArray()) {
-    extensions->insert(
-        std::string(extension.GetString(), extension.GetStringLength()));
-  }
-  return Error::Success;
-}
-
-Error
-TritonCApiClientBackend::ModelMetadata(
-    rapidjson::Document* model_metadata, const std::string& model_name,
-    const std::string& model_version)
-{
-  if (!triton_loader_->ModelIsLoaded()) {
-    triton_loader_->LoadModel(model_name, model_version);
-  }
-  RETURN_IF_ERROR(triton_loader_->ModelMetadata(model_metadata));
-  return Error::Success;
-}
-
-Error
-TritonCApiClientBackend::ModelConfig(
-    rapidjson::Document* model_config, const std::string& model_name,
-    const std::string& model_version)
-{
-  if (!triton_loader_->ModelIsLoaded()) {
-    triton_loader_->LoadModel(model_name, model_version);
-  }
-  RETURN_IF_ERROR(
-      triton_loader_->ModelConfig(model_config, model_name, model_version));
-  return Error::Success;
-}
-
-Error
-TritonCApiClientBackend::Infer(
-    cb::InferResult** result, const InferOptions& options,
-    const std::vector<InferInput*>& inputs,
-    const std::vector<const InferRequestedOutput*>& outputs)
-{
-  std::vector<tc::InferInput*> triton_inputs;
-  ParseInferInputToTriton(inputs, &triton_inputs);
-
-  std::vector<const tc::InferRequestedOutput*> triton_outputs;
-  ParseInferRequestedOutputToTriton(outputs, &triton_outputs);
-
-  tc::InferOptions triton_options(options.model_name_);
-  ParseInferOptionsToTriton(options, &triton_options);
-
-  capi::InferResult* triton_result;
-  RETURN_IF_ERROR(triton_loader_->Infer(
-      triton_options, triton_inputs, triton_outputs, &triton_result));
-
-  *result = new TritonCApiInferResult(triton_result);
-  return Error::Success;
-}
-
-
-Error
-TritonCApiClientBackend::ClientInferStat(InferStat* infer_stat)
-{
-  tc::InferStat triton_infer_stat;
-
-  triton_loader_->ClientInferStat(&triton_infer_stat);
-  ParseInferStat(triton_infer_stat, infer_stat);
-  return Error::Success;
-}
-
-Error
-TritonCApiClientBackend::ModelInferenceStatistics(
-    std::map<ModelIdentifier, ModelStatistics>* model_stats,
-    const std::string& model_name, const std::string& model_version)
-{
-  rapidjson::Document infer_stat_json;
-  RETURN_IF_ERROR(triton_loader_->ModelInferenceStatistics(
-      model_name, model_version, &infer_stat_json));
-  ParseStatistics(infer_stat_json, model_stats);
-
-  return Error::Success;
-}
-
-Error
-TritonCApiClientBackend::UnregisterAllSharedMemory()
-{
-  RETURN_IF_ERROR(triton_loader_->UnregisterAllSharedMemory());
-  return Error::Success;
-}
-
-Error
-TritonCApiClientBackend::RegisterSystemMemory(
-    const std::string& name, void* ptr, const size_t byte_size)
-{
-  RETURN_IF_ERROR(triton_loader_->RegisterSystemMemory(name, ptr, byte_size));
-  return Error::Success;
-}
-
-#ifdef TRITON_ENABLE_GPU
-Error
-TritonCApiClientBackend::RegisterCudaMemory(
-    const std::string& name, void* handle, const size_t byte_size)
-{
-  RETURN_IF_ERROR(triton_loader_->RegisterCudaMemory(name, handle, byte_size));
-  return Error::Success;
-}
-#endif  // TRITON_ENABLE_GPU
-
-void
-TritonCApiClientBackend::ParseInferInputToTriton(
-    const std::vector<InferInput*>& inputs,
-    std::vector<tc::InferInput*>* triton_inputs)
-{
-  for (const auto input : inputs) {
-    triton_inputs->push_back(
-        (dynamic_cast<TritonCApiInferInput*>(input))->Get());
-  }
-}
-
-void
-TritonCApiClientBackend::ParseInferRequestedOutputToTriton(
-    const std::vector<const InferRequestedOutput*>& outputs,
-    std::vector<const tc::InferRequestedOutput*>* triton_outputs)
-{
-  for (const auto output : outputs) {
-    triton_outputs->push_back(
-        (dynamic_cast<const TritonCApiInferRequestedOutput*>(output))->Get());
-  }
-}
-
-void
-TritonCApiClientBackend::ParseInferOptionsToTriton(
-    const InferOptions& options, tc::InferOptions* triton_options)
-{
-  triton_options->model_version_ = options.model_version_;
-  triton_options->request_id_ = options.request_id_;
-  if ((options.sequence_id_ != 0) || (options.sequence_id_str_ != "")) {
-    if (options.sequence_id_ != 0) {
-      triton_options->sequence_id_ = options.sequence_id_;
-    } else {
-      triton_options->sequence_id_str_ = options.sequence_id_str_;
-    }
-    triton_options->sequence_start_ = options.sequence_start_;
-    triton_options->sequence_end_ = options.sequence_end_;
-  }
-}
-
-void
-TritonCApiClientBackend::ParseStatistics(
-    const rapidjson::Document& infer_stat,
-    std::map<ModelIdentifier, ModelStatistics>* model_stats)
-{
-  model_stats->clear();
-  for (const auto& this_stat : infer_stat["model_stats"].GetArray()) {
-    auto it = model_stats
-                  ->emplace(
-                      std::make_pair(
-                          this_stat["name"].GetString(),
-                          this_stat["version"].GetString()),
-                      ModelStatistics())
-                  .first;
-    it->second.inference_count_ = this_stat["inference_count"].GetUint64();
-    it->second.execution_count_ = this_stat["execution_count"].GetUint64();
-    it->second.success_count_ =
-        this_stat["inference_stats"]["success"]["count"].GetUint64();
-    it->second.queue_count_ =
-        this_stat["inference_stats"]["queue"]["count"].GetUint64();
-    it->second.compute_input_count_ =
-        this_stat["inference_stats"]["compute_input"]["count"].GetUint64();
-    it->second.compute_infer_count_ =
-        this_stat["inference_stats"]["compute_infer"]["count"].GetUint64();
-    it->second.compute_output_count_ =
-        this_stat["inference_stats"]["compute_output"]["count"].GetUint64();
-    it->second.cumm_time_ns_ =
-        this_stat["inference_stats"]["success"]["ns"].GetUint64();
-    it->second.queue_time_ns_ =
-        this_stat["inference_stats"]["queue"]["ns"].GetUint64();
-    it->second.compute_input_time_ns_ =
-        this_stat["inference_stats"]["compute_input"]["ns"].GetUint64();
-    it->second.compute_infer_time_ns_ =
-        this_stat["inference_stats"]["compute_infer"]["ns"].GetUint64();
-    it->second.compute_output_time_ns_ =
-        this_stat["inference_stats"]["compute_output"]["ns"].GetUint64();
-    it->second.cache_hit_count_ =
-        this_stat["inference_stats"]["cache_hit"]["count"].GetUint64();
-    it->second.cache_hit_time_ns_ =
-        this_stat["inference_stats"]["cache_hit"]["ns"].GetUint64();
-    it->second.cache_miss_count_ =
-        this_stat["inference_stats"]["cache_miss"]["count"].GetUint64();
-    it->second.cache_miss_time_ns_ =
-        this_stat["inference_stats"]["cache_miss"]["ns"].GetUint64();
-  }
-}
-
-void
-TritonCApiClientBackend::ParseInferStat(
-    const tc::InferStat& triton_infer_stat, InferStat* infer_stat)
-{
-  infer_stat->completed_request_count =
-      triton_infer_stat.completed_request_count;
-  infer_stat->cumulative_total_request_time_ns =
-      triton_infer_stat.cumulative_total_request_time_ns;
-  infer_stat->cumulative_send_time_ns =
-      triton_infer_stat.cumulative_send_time_ns;
-  infer_stat->cumulative_receive_time_ns =
-      triton_infer_stat.cumulative_receive_time_ns;
-}
-
-//==============================================================================
-
-Error
-TritonCApiInferInput::Create(
-    InferInput** infer_input, const std::string& name,
-    const std::vector<int64_t>& dims, const std::string& datatype)
-{
-  TritonCApiInferInput* local_infer_input =
-      new TritonCApiInferInput(name, datatype);
-
-  tc::InferInput* triton_infer_input;
-  RETURN_IF_TRITON_ERROR(
-      tc::InferInput::Create(&triton_infer_input, name, dims, datatype));
-  local_infer_input->input_.reset(triton_infer_input);
-
-  *infer_input = local_infer_input;
-  return Error::Success;
-}
-
-const std::vector<int64_t>&
-TritonCApiInferInput::Shape() const
-{
-  return input_->Shape();
-}
-
-Error
-TritonCApiInferInput::SetShape(const std::vector<int64_t>& shape)
-{
-  RETURN_IF_TRITON_ERROR(input_->SetShape(shape));
-  return Error::Success;
-}
-
-Error
-TritonCApiInferInput::Reset()
-{
-  RETURN_IF_TRITON_ERROR(input_->Reset());
-  return Error::Success;
-}
-
-Error
-TritonCApiInferInput::AppendRaw(const uint8_t* input, size_t input_byte_size)
-{
-  RETURN_IF_TRITON_ERROR(input_->AppendRaw(input, input_byte_size));
-  return Error::Success;
-}
-
-Error
-TritonCApiInferInput::SetSharedMemory(
-    const std::string& name, size_t byte_size, size_t offset)
-{
-  RETURN_IF_TRITON_ERROR(input_->SetSharedMemory(name, byte_size, offset));
-  return Error::Success;
-}
-
-TritonCApiInferInput::TritonCApiInferInput(
-    const std::string& name, const std::string& datatype)
-    : InferInput(BackendKind::TRITON_C_API, name, datatype)
-{
-}
-
-
-//==============================================================================
-
-Error
-TritonCApiInferRequestedOutput::Create(
-    InferRequestedOutput** infer_output, const std::string& name,
-    const size_t class_count, const std::string& datatype)
-{
-  TritonCApiInferRequestedOutput* local_infer_output =
-      new TritonCApiInferRequestedOutput(name);
-
-  tc::InferRequestedOutput* triton_infer_output;
-  RETURN_IF_TRITON_ERROR(tc::InferRequestedOutput::Create(
-      &triton_infer_output, name, class_count, datatype));
-  local_infer_output->output_.reset(triton_infer_output);
-
-  *infer_output = local_infer_output;
-
-  return Error::Success;
-}
-
-Error
-TritonCApiInferRequestedOutput::SetSharedMemory(
-    const std::string& name, size_t byte_size, size_t offset)
-{
-  RETURN_IF_TRITON_ERROR(output_->SetSharedMemory(name, byte_size, offset));
-  return Error::Success;
-}
-
-TritonCApiInferRequestedOutput::TritonCApiInferRequestedOutput(
-    const std::string& name)
-    : InferRequestedOutput(BackendKind::TRITON_C_API, name)
-{
-}
-
-//==============================================================================
-
-TritonCApiInferResult::TritonCApiInferResult(capi::InferResult* result)
-{
-  result_.reset(result);
-}
-
-Error
-TritonCApiInferResult::Id(std::string* id) const
-{
-  RETURN_IF_TRITON_ERROR(result_->Id(id));
-  return Error::Success;
-}
-
-Error
-TritonCApiInferResult::RequestStatus() const
-{
-  RETURN_IF_TRITON_ERROR(result_->RequestStatus());
-  return Error::Success;
-}
-
-Error
-TritonCApiInferResult::RawData(
-    const std::string& output_name, const uint8_t** buf,
-    size_t* byte_size) const
-{
-  return Error(
-      "Output retrieval is not currently supported for Triton C API client "
-      "backend");
-}
-
-//==============================================================================
-
-}}}}  // namespace triton::perfanalyzer::clientbackend::tritoncapi
diff --git a/src/c++/perf_analyzer/client_backend/triton_c_api/triton_c_api_backend.h b/src/c++/perf_analyzer/client_backend/triton_c_api/triton_c_api_backend.h
deleted file mode 100644
index 0f9f5defe..000000000
--- a/src/c++/perf_analyzer/client_backend/triton_c_api/triton_c_api_backend.h
+++ /dev/null
@@ -1,224 +0,0 @@
-// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <string>
-
-#include "../client_backend.h"
-#include "shared_memory_manager.h"
-#include "triton_loader.h"
-
-#define RETURN_IF_TRITON_ERROR(S)       \
-  do {                                  \
-    const tc::Error& status__ = (S);    \
-    if (!status__.IsOk()) {             \
-      return Error(status__.Message()); \
-    }                                   \
-  } while (false)
-
-#define FAIL_IF_TRITON_ERR(X, MSG)                                 \
-  {                                                                \
-    const tc::Error err = (X);                                     \
-    if (!err.IsOk()) {                                             \
-      std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
-      exit(1);                                                     \
-    }                                                              \
-  }
-
-namespace tc = triton::client;
-namespace cb = triton::perfanalyzer::clientbackend;
-namespace capi = triton::perfanalyzer::clientbackend::tritoncapi;
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace tritoncapi {
-
-class InferResult;
-
-//==============================================================================
-/// TritonCApiClientBackend uses triton client C++ library to communicate with
-/// triton inference service. This uses the local C++ library
-///
-class TritonCApiClientBackend : public ClientBackend {
- public:
-  /// Create a triton client backend which can be used to interact with the
-  /// server.
-  /// \param triton_server_path Tritonserver library that contains
-  /// lib/libtritonserver.so.
-  /// \param model_repository_path The model repository.
-  /// \param verbose Enables the verbose mode of TritonServer.
-  /// \param client_backend Returns a new TritonCApiClientBackend object.
-  /// \return Error object indicating success
-  /// or failure.
-  static Error Create(
-      const std::string& triton_server_path,
-      const std::string& model_repository_path, const bool verbose,
-      std::unique_ptr<ClientBackend>* client_backend);
-
-  ~TritonCApiClientBackend() { triton_loader_->Delete(); }
-
-  /// See ClientBackend::ServerExtensions()
-  Error ServerExtensions(std::set<std::string>* server_extensions) override;
-
-  /// See ClientBackend::ModelMetadata()
-  Error ModelMetadata(
-      rapidjson::Document* model_metadata, const std::string& model_name,
-      const std::string& model_version) override;
-
-  /// See ClientBackend::ModelConfig()
-  Error ModelConfig(
-      rapidjson::Document* model_config, const std::string& model_name,
-      const std::string& model_version) override;
-
-  /// See ClientBackend::Infer()
-  Error Infer(
-      cb::InferResult** result, const InferOptions& options,
-      const std::vector<InferInput*>& inputs,
-      const std::vector<const InferRequestedOutput*>& outputs) override;
-
-  /// See ClientBackend::ClientInferStat()
-  Error ClientInferStat(InferStat* infer_stat) override;
-
-  /// See ClientBackend::ModelInferenceStatistics()
-  Error ModelInferenceStatistics(
-      std::map<ModelIdentifier, ModelStatistics>* model_stats,
-      const std::string& model_name = "",
-      const std::string& model_version = "") override;
-
-#ifdef TRITON_ENABLE_GPU
-  /// See ClientBackend::RegisterCudaMemory
-  Error RegisterCudaMemory(
-      const std::string& name, void* handle, const size_t byte_size) override;
-#endif  // TRITON_ENABLE_GPU
-
-  /// See ClientBackend::RegisterSystemMemory
-  Error RegisterSystemMemory(
-      const std::string& name, void* ptr, const size_t byte_size) override;
-
-  /// See ClientBackend::UnregisterAllSharedMemory
-  Error UnregisterAllSharedMemory();
-
- private:
-  TritonCApiClientBackend()
-      : ClientBackend(BackendKind::TRITON_C_API),
-        triton_loader_(TritonLoader::GetSingleton())
-  {
-  }
-  void ParseInferInputToTriton(
-      const std::vector<InferInput*>& inputs,
-      std::vector<tc::InferInput*>* triton_inputs);
-  void ParseInferRequestedOutputToTriton(
-      const std::vector<const InferRequestedOutput*>& outputs,
-      std::vector<const tc::InferRequestedOutput*>* triton_outputs);
-  void ParseInferOptionsToTriton(
-      const InferOptions& options, tc::InferOptions* triton_options);
-  void ParseStatistics(
-      const rapidjson::Document& infer_stat,
-      std::map<ModelIdentifier, ModelStatistics>* model_stats);
-  void ParseInferStat(
-      const tc::InferStat& triton_infer_stat, InferStat* infer_stat);
-  TritonLoader* triton_loader_;
-};
-
-//==============================================================
-/// TritonCApiInferInput is a wrapper around InferInput object of
-/// triton client library.
-///
-class TritonCApiInferInput : public InferInput {
- public:
-  static Error Create(
-      InferInput** infer_input, const std::string& name,
-      const std::vector<int64_t>& dims, const std::string& datatype);
-
-  /// Returns the raw InferInput object required by triton client library.
-  tc::InferInput* Get() const { return input_.get(); }
-
-  /// See InferInput::Shape()
-  const std::vector<int64_t>& Shape() const override;
-
-  /// See InferInput::SetShape()
-  Error SetShape(const std::vector<int64_t>& shape) override;
-
-  /// See InferInput::Reset()
-  Error Reset() override;
-
-  /// See InferInput::AppendRaw()
-  Error AppendRaw(const uint8_t* input, size_t input_byte_size) override;
-
-  /// See InferInput::SetSharedMemory()
-  Error SetSharedMemory(
-      const std::string& name, size_t byte_size, size_t offset = 0) override;
-
- private:
-  explicit TritonCApiInferInput(
-      const std::string& name, const std::string& datatype);
-
-  std::unique_ptr<tc::InferInput> input_;
-};
-
-//==============================================================
-/// TritonCApiInferRequestedOutput is a wrapper around
-/// InferRequestedOutput object of triton client library.
-///
-class TritonCApiInferRequestedOutput : public InferRequestedOutput {
- public:
-  static Error Create(
-      InferRequestedOutput** infer_output, const std::string& name,
-      const size_t class_count = 0, const std::string& datatype = "");
-  /// Returns the raw InferRequestedOutput object required by triton client
-  /// library.
-  tc::InferRequestedOutput* Get() const { return output_.get(); }
-
-  /// See InferInput::SetSharedMemory()
-  Error SetSharedMemory(
-      const std::string& name, size_t byte_size, size_t offset = 0) override;
-
- private:
-  explicit TritonCApiInferRequestedOutput(const std::string& name);
-
-  std::unique_ptr<tc::InferRequestedOutput> output_;
-};
-
-//==============================================================
-/// TritonCApiInferResult is a wrapper around InferResult object of
-/// the C API library.
-///
-class TritonCApiInferResult : public cb::InferResult {
- public:
-  explicit TritonCApiInferResult(capi::InferResult* result);
-  /// See InferResult::Id()
-  Error Id(std::string* id) const override;
-  /// See InferResult::RequestStatus()
-  Error RequestStatus() const override;
-  /// See InferResult::RawData()
-  Error RawData(
-      const std::string& output_name, const uint8_t** buf,
-      size_t* byte_size) const override;
-
- private:
-  std::unique_ptr<capi::InferResult> result_;
-};
-
-}}}}  // namespace triton::perfanalyzer::clientbackend::tritoncapi
diff --git a/src/c++/perf_analyzer/client_backend/triton_c_api/triton_loader.cc b/src/c++/perf_analyzer/client_backend/triton_c_api/triton_loader.cc
deleted file mode 100644
index 35f7657f3..000000000
--- a/src/c++/perf_analyzer/client_backend/triton_c_api/triton_loader.cc
+++ /dev/null
@@ -1,1274 +0,0 @@
-// Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#define TRITON_INFERENCE_SERVER_CLIENT_CLASS \
-  triton::perfanalyzer::clientbackend::tritoncapi::TritonLoader
-
-#include "triton_loader.h"
-
-#include <rapidjson/document.h>
-#include <rapidjson/error/en.h>
-#include <sys/stat.h>
-
-#include <future>
-#include <sstream>
-#include <string>
-#include <thread>
-#include <unordered_map>
-
-#include "c_api_infer_results.h"
-#include "scoped_defer.h"
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace tritoncapi {
-namespace {
-
-struct AllocPayload {
-  struct OutputInfo {
-    enum Kind { BINARY, SHM };
-
-    Kind kind_;
-    void* base_;
-    uint64_t byte_size_;
-    TRITONSERVER_MemoryType memory_type_;
-    int64_t device_id_;
-
-    // For shared memory
-    OutputInfo(
-        void* base, uint64_t byte_size, TRITONSERVER_MemoryType memory_type,
-        int64_t device_id)
-        : kind_(SHM), base_(base), byte_size_(byte_size),
-          memory_type_(memory_type), device_id_(device_id)
-    {
-    }
-  };
-
-  ~AllocPayload()
-  {
-    for (auto it : output_map_) {
-      delete it.second;
-    }
-  }
-
-  std::unordered_map<std::string, OutputInfo*> output_map_;
-};
-
-bool helper_verbose = false;
-/// Helper function for allocating memory
-TRITONSERVER_Error*
-ResponseAlloc(
-    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
-    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
-    int64_t preferred_memory_type_id, void* userp, void** buffer,
-    void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
-    int64_t* actual_memory_type_id)
-{
-  // Initially attempt to make the actual memory type and id that we
-  // allocate be the same as preferred memory type
-  *actual_memory_type = preferred_memory_type;
-  *actual_memory_type_id = preferred_memory_type_id;
-
-  // This variable indicates whether the buffer should be freed or not.
-  bool* should_free = new bool;
-  *buffer_userp = should_free;
-  *should_free = false;
-
-  // If 'byte_size' is zero just return 'buffer' == nullptr, we don't
-  // need to do any other book-keeping.
-  if (byte_size == 0) {
-    *buffer = nullptr;
-    *buffer_userp = nullptr;
-    if (helper_verbose) {
-      std::cout << "allocated " << byte_size << " bytes for result tensor "
-                << tensor_name << std::endl;
-    }
-  } else {
-    AllocPayload* alloc_payload = reinterpret_cast<AllocPayload*>(userp);
-    auto output_map_it = alloc_payload->output_map_.find(tensor_name);
-    if (output_map_it == alloc_payload->output_map_.end()) {
-      void* allocated_ptr = nullptr;
-      *actual_memory_type = TRITONSERVER_MEMORY_CPU;
-      *actual_memory_type_id = 0;
-      allocated_ptr = malloc(byte_size);
-      *should_free = true;
-
-      if (allocated_ptr != nullptr) {
-        *buffer = allocated_ptr;
-      }
-    } else {
-      // It is in shared memory
-      AllocPayload::OutputInfo* output_info = output_map_it->second;
-      if (byte_size > output_info->byte_size_) {
-        return TritonLoader::GetSingleton()->ErrorNew(
-            TRITONSERVER_ERROR_INTERNAL,
-            std::string(
-                "shared memory size specified with the request for output '" +
-                std::string(tensor_name) + "' (" +
-                std::to_string(output_info->byte_size_) +
-                " bytes) should be at least " + std::to_string(byte_size) +
-                " bytes to hold the results")
-                .c_str());
-      }
-      *actual_memory_type = output_info->memory_type_;
-      *actual_memory_type_id = output_info->device_id_;
-      *buffer = output_info->base_;
-    }
-  }
-
-  return nullptr;  // Success
-}
-
-/// Helper function for releasing memory
-TRITONSERVER_Error*
-ResponseRelease(
-    TRITONSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp,
-    size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id)
-{
-  bool* should_free = reinterpret_cast<bool*>(buffer_userp);
-  switch (memory_type) {
-    case TRITONSERVER_MEMORY_CPU:
-      if (*should_free) {
-        free(buffer);
-      }
-      break;
-  }
-
-  free(should_free);
-  return nullptr;  // Success
-}
-
-void
-InferRequestComplete(
-    TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp)
-{
-  TritonLoader::GetSingleton()->DeleteInferRequest(request);
-}
-
-
-void
-InferResponseComplete(
-    TRITONSERVER_InferenceResponse* response, const uint32_t flags, void* userp)
-{
-  if (response != nullptr) {
-    // Send 'response' to the future.
-    std::promise<TRITONSERVER_InferenceResponse*>* p =
-        reinterpret_cast<std::promise<TRITONSERVER_InferenceResponse*>*>(userp);
-    p->set_value(response);
-    delete p;
-  }
-}
-
-Error
-GetModelVersionFromString(const std::string& version_string, int64_t* version)
-{
-  if (version_string.empty()) {
-    *version = 1;
-    return Error::Success;
-  }
-
-  try {
-    *version = std::stol(version_string);
-  }
-  catch (std::exception& e) {
-    return Error(
-        std::string(
-            "Failed to get model version from specified version string '" +
-            version_string + "' (details: " + e.what() +
-            "), version should be an integral value > 0")
-            .c_str());
-  }
-
-  if (*version < 0) {
-    return Error(std::string(
-                     "invalid model version specified '" + version_string +
-                     "' , version should be an integral value > 0")
-                     .c_str());
-  }
-
-  return Error::Success;
-}
-
-Error
-FolderExists(const std::string& path)
-{
-  struct stat buffer;
-  if (!stat(path.c_str(), &buffer)) {
-    return Error::Success;
-  } else {
-    return Error("Unable to find filepath: " + path);
-  }
-}
-}  // namespace
-
-Error
-TritonLoader::Create(
-    const std::string& triton_server_path,
-    const std::string& model_repository_path, bool verbose)
-{
-  if (!GetSingleton()->ServerIsReady()) {
-    GetSingleton()->ClearHandles();
-    RETURN_IF_ERROR(GetSingleton()->PopulateInternals(
-        triton_server_path, model_repository_path, verbose));
-    RETURN_IF_ERROR(GetSingleton()->LoadServerLibrary());
-    RETURN_IF_ERROR(GetSingleton()->StartTriton());
-  }
-
-  return Error::Success;
-}
-
-Error
-TritonLoader::Delete()
-{
-  if (server_ != nullptr) {
-    server_is_ready_ = false;
-    model_is_loaded_ = false;
-    server_.reset();
-  }
-  return Error::Success;
-}
-
-Error
-TritonLoader::PopulateInternals(
-    const std::string& triton_server_path,
-    const std::string& model_repository_path, bool verbose)
-{
-  RETURN_IF_ERROR(FolderExists(triton_server_path));
-  RETURN_IF_ERROR(FolderExists(model_repository_path));
-
-  triton_server_path_ = triton_server_path;
-  model_repository_path_ = model_repository_path;
-  verbose_ = verbose;
-  verbose_level_ = verbose_ ? 1 : 0;
-  return Error::Success;
-}
-
-Error
-TritonLoader::StartTriton()
-{
-  // Check API version.
-  uint32_t api_version_major, api_version_minor;
-  REPORT_TRITONSERVER_ERROR(
-      api_version_fn_(&api_version_major, &api_version_minor));
-  if ((TRITONSERVER_API_VERSION_MAJOR != api_version_major) ||
-      (TRITONSERVER_API_VERSION_MINOR > api_version_minor)) {
-    std::stringstream sstream;
-    sstream << "triton server API version mismatch. \n"
-            << "Expected version major:" << TRITONSERVER_API_VERSION_MAJOR
-            << ", minor:" << TRITONSERVER_API_VERSION_MINOR << "\n"
-            << "  Actual version major:" << api_version_major
-            << ", minor:" << api_version_minor;
-    return Error(sstream.str());
-  }
-  // Create the server...
-  TRITONSERVER_ServerOptions* server_options = nullptr;
-  RETURN_IF_TRITONSERVER_ERROR(
-      options_new_fn_(&server_options), "creating server options");
-  RETURN_IF_TRITONSERVER_ERROR(
-      options_set_model_repo_path_fn_(
-          server_options, model_repository_path_.c_str()),
-      "setting model repository path");
-  RETURN_IF_TRITONSERVER_ERROR(
-      set_cuda_memory_pool_byte_size_(server_options, 0, 1073741824),
-      "setting cuda memory pool byte size failed.");
-  RETURN_IF_TRITONSERVER_ERROR(
-      set_log_verbose_fn_(server_options, verbose_level_),
-      "setting verbose logging level");
-  RETURN_IF_TRITONSERVER_ERROR(
-      set_log_info_fn_(server_options, verbose_),
-      "setting if log verbose level is true");
-  RETURN_IF_TRITONSERVER_ERROR(
-      set_backend_directory_fn_(
-          server_options, (triton_server_path_ + "/backends").c_str()),
-      "setting backend directory");
-  RETURN_IF_TRITONSERVER_ERROR(
-      set_repo_agent_directory_fn_(
-          server_options, (triton_server_path_ + "/repoagents").c_str()),
-      "setting repository agent directory");
-  RETURN_IF_TRITONSERVER_ERROR(
-      set_strict_model_config_fn_(server_options, true),
-      "setting strict model configuration");
-  double min_compute_capability = 0;
-  // FIXME: Do not have GPU support right now
-  RETURN_IF_TRITONSERVER_ERROR(
-      set_min_supported_compute_capability_fn_(
-          server_options, min_compute_capability),
-      "setting minimum supported CUDA compute capability");
-  TRITONSERVER_Server* server_ptr = nullptr;
-  RETURN_IF_TRITONSERVER_ERROR(
-      server_new_fn_(&server_ptr, server_options), "creating server");
-  RETURN_IF_TRITONSERVER_ERROR(
-      server_options_delete_fn_(server_options), "deleting server options");
-  std::shared_ptr<TRITONSERVER_Server> shared_server(
-      server_ptr, server_delete_fn_);
-  server_ = shared_server;
-
-  // Wait until the server is both live and ready.
-  size_t health_iters = 0;
-  while (true) {
-    bool live, ready;
-    RETURN_IF_TRITONSERVER_ERROR(
-        server_is_live_fn_(server_.get(), &live),
-        "unable to get server liveness");
-    RETURN_IF_TRITONSERVER_ERROR(
-        server_is_ready_fn_(server_.get(), &ready),
-        "unable to get server readiness");
-    if (live && ready) {
-      server_is_ready_ = true;
-      break;
-    }
-
-    if (++health_iters >= 10) {
-      return Error("failed to find healthy inference server");
-    }
-
-    std::this_thread::sleep_for(std::chrono::milliseconds(500));
-  }
-  // Print status of the server.
-  if (verbose_) {
-    TRITONSERVER_Message* server_metadata_message;
-    RETURN_IF_TRITONSERVER_ERROR(
-        server_metadata_fn_(server_.get(), &server_metadata_message),
-        "unable to get server metadata message");
-    const char* buffer;
-    size_t byte_size;
-    RETURN_IF_TRITONSERVER_ERROR(
-        message_serialize_to_json_fn_(
-            server_metadata_message, &buffer, &byte_size),
-        "unable to serialize server metadata message");
-
-    RETURN_IF_TRITONSERVER_ERROR(
-        message_delete_fn_(server_metadata_message),
-        "deleting status metadata");
-  }
-
-  return Error::Success;
-}
-
-Error
-TritonLoader::ServerMetaData(rapidjson::Document* server_metadata)
-{
-  if (!ServerIsReady()) {
-    return Error("Model is not loaded and/or server is not ready");
-  }
-  TRITONSERVER_Message* server_metadata_message;
-  RETURN_IF_TRITONSERVER_ERROR(
-      server_metadata_fn_(server_.get(), &server_metadata_message),
-      "unable to get server metadata message");
-  const char* buffer;
-  size_t byte_size;
-  RETURN_IF_TRITONSERVER_ERROR(
-      message_serialize_to_json_fn_(
-          server_metadata_message, &buffer, &byte_size),
-      "unable to serialize server metadata message");
-  server_metadata->Parse(buffer, byte_size);
-  if (server_metadata->HasParseError()) {
-    return Error(
-        "error: failed to parse server metadata from JSON: " +
-        std::string(GetParseError_En(server_metadata->GetParseError())) +
-        " at " + std::to_string(server_metadata->GetErrorOffset()));
-  }
-  RETURN_IF_TRITONSERVER_ERROR(
-      message_delete_fn_(server_metadata_message), "deleting status metadata");
-  return Error::Success;
-}
-
-Error
-TritonLoader::LoadModel(
-    const std::string& model_name, const std::string& model_version)
-{
-  if (!ServerIsReady()) {
-    return Error("server is not ready, abort!");
-  }
-  model_name_ = model_name;
-
-  RETURN_IF_ERROR(GetModelVersionFromString(model_version, &model_version_));
-  // Wait for the model to become available.
-  bool is_ready = false;
-  size_t health_iters = 0;
-
-  // some error handling
-  if (model_repository_path_.empty()) {
-    return Error("Need to specify model repository");
-  }
-  while (!is_ready) {
-    RETURN_IF_TRITONSERVER_ERROR(
-        model_is_ready_fn_(
-            server_.get(), model_name_.c_str(), model_version_, &is_ready),
-        "unable to get model readiness");
-    if (!is_ready) {
-      if (++health_iters >= 10) {
-        return Error("model failed to be ready in 10 iterations");
-      }
-      std::this_thread::sleep_for(std::chrono::milliseconds(500));
-      continue;
-    }
-  }
-  // flag to confirm model is correct and loaded
-  model_is_loaded_ = true;
-  return Error::Success;
-}
-
-Error
-TritonLoader::ModelMetadata(rapidjson::Document* model_metadata)
-{
-  if (!ModelIsLoaded() || !ServerIsReady()) {
-    return Error("Model is not loaded and/or server is not ready");
-  }
-  TRITONSERVER_Message* model_metadata_message;
-
-  // get model metadata
-  RETURN_IF_TRITONSERVER_ERROR(
-      model_metadata_fn_(
-          server_.get(), model_name_.c_str(), model_version_,
-          &model_metadata_message),
-      "unable to get model metadata message");
-  const char* buffer;
-  size_t byte_size;
-  RETURN_IF_TRITONSERVER_ERROR(
-      message_serialize_to_json_fn_(
-          model_metadata_message, &buffer, &byte_size),
-      "unable to serialize model status protobuf");
-
-  model_metadata->Parse(buffer, byte_size);
-  if (model_metadata->HasParseError()) {
-    return Error(
-        "error: failed to parse model metadata from JSON: " +
-        std::string(GetParseError_En(model_metadata->GetParseError())) +
-        " at " + std::to_string(model_metadata->GetErrorOffset()));
-  }
-
-  RETURN_IF_TRITONSERVER_ERROR(
-      message_delete_fn_(model_metadata_message), "deleting status protobuf");
-
-  if (strcmp((*model_metadata)["name"].GetString(), model_name_.c_str())) {
-    return Error("unable to find metadata for model");
-  }
-
-  bool found_version = false;
-  if (model_metadata->HasMember("versions")) {
-    for (const auto& version : (*model_metadata)["versions"].GetArray()) {
-      if (strcmp(version.GetString(), std::to_string(model_version_).c_str()) ==
-          0) {
-        found_version = true;
-        break;
-      }
-    }
-  }
-  if (!found_version) {
-    std::string msg = "unable to find version " +
-                      std::to_string(model_version_) + " status for model";
-    return Error(msg);
-  }
-  return Error::Success;
-}
-
-Error
-TritonLoader::ModelConfig(
-    rapidjson::Document* model_config, const std::string& model_name,
-    const std::string& model_version)
-{
-  if (!ModelIsLoaded() || !ServerIsReady()) {
-    return Error("Model is not loaded and/or server is not ready");
-  }
-  TRITONSERVER_Message* model_config_message;
-  uint32_t config_version = 1;
-  RETURN_IF_TRITONSERVER_ERROR(
-      model_config_fn_(
-          (server_).get(), model_name.c_str(), model_version_, config_version,
-          &model_config_message),
-      "unable to get model config message");
-  const char* buffer;
-  size_t byte_size;
-  RETURN_IF_TRITONSERVER_ERROR(
-      message_serialize_to_json_fn_(model_config_message, &buffer, &byte_size),
-      "unable to serialize model config status protobuf");
-
-  model_config->Parse(buffer, byte_size);
-  if (model_config->HasParseError()) {
-    return Error(
-        "error: failed to parse model config from JSON: " +
-        std::string(GetParseError_En(model_config->GetParseError())) + " at " +
-        std::to_string(model_config->GetErrorOffset()));
-  }
-
-  RETURN_IF_TRITONSERVER_ERROR(
-      message_delete_fn_(model_config_message),
-      "deleting server config status protobuf");
-
-  return Error::Success;
-}
-
-Error
-TritonLoader::LoadServerLibrary()
-{
-  std::string full_path = triton_server_path_ + server_library_path_;
-  RETURN_IF_ERROR(FolderExists(full_path));
-  RETURN_IF_ERROR(OpenLibraryHandle(full_path, &dlhandle_));
-
-  TritonServerApiVersionFn_t apifn;
-  TritonServerOptionsNewFn_t onfn;
-  TritonServerOptionSetModelRepoPathFn_t rpfn;
-  TritonServerSetLogVerboseFn_t slvfn;
-
-  TritonServerSetBackendDirFn_t sbdfn;
-  TritonServerSetRepoAgentDirFn_t srdfn;
-  TritonServerSetStrictModelConfigFn_t ssmcfn;
-  TritonServerSetMinSupportedComputeCapabilityFn_t smsccfn;
-
-  TritonServerNewFn_t snfn;
-  TritonServerOptionsDeleteFn_t odfn;
-  TritonServerDeleteFn_t sdfn;
-  TritonServerIsLiveFn_t ilfn;
-
-  TritonServerIsReadyFn_t irfn;
-  TritonServerMetadataFn_t smfn;
-  TritonServerMessageSerializeToJsonFn_t stjfn;
-  TritonServerMessageDeleteFn_t mdfn;
-
-  TritonServerModelIsReadyFn_t mirfn;
-  TritonServerModelMetadataFn_t mmfn;
-  TritonServerResponseAllocatorNewFn_t ranfn;
-  TritonServerInferenceRequestNewFn_t irnfn;
-
-  TritonServerInferenceRequestSetIdFn_t irsifn;
-  TritonServerInferenceRequestSetReleaseCallbackFn_t irsrcfn;
-  TritonServerInferenceRequestAddInputFn_t iraifn;
-  TritonServerInferenceRequestAddRequestedOutputFn_t irarofn;
-
-  TritonServerInferenceRequestAppendInputDataFn_t iraidfn;
-  TritonServerInferenceRequestSetResponseCallbackFn_t irsrescfn;
-  TritonServerInferAsyncFn_t iafn;
-  TritonServerInferenceResponseErrorFn_t irefn;
-
-  TritonServerInferenceResponseDeleteFn_t irdfn;
-  TritonServerResponseAllocatorDeleteFn_t radfn;
-  TritonServerErrorNewFn_t enfn;
-
-  TritonServerMemoryTypeStringFn_t mtsfn;
-  TritonServerInferenceResponseOutputCountFn_t irocfn;
-  TritonServerDataTypeStringFn_t dtsfn;
-
-  TritonServerErrorDeleteFn_t edfn;
-  TritonServerErrorCodeToStringFn_t ectsfn;
-  TritonServerErrorMessageFn_t emfn;
-  TritonServerModelConfigFn_t mcfn;
-  TritonServerInferenceRequestSetCorrelationIdFn_t scidfn;
-  TritonServerInferenceRequestSetStringCorrelationIdFn_t sscidfn;
-
-  TritonServerInferenceRequestSetFlagsFn_t sffn;
-  TritonServerInferenceRequestSetPriorityFn_t spfn;
-  TritonServerInferenceRequestSetTimeoutMicrosecondsFn_t stmsfn;
-  TritonServerStringToDatatypeFn_t stdtfn;
-
-  TritonServerInferenceResponseOutputFn_t irofn;
-  TritonServerRequestIdFn_t ridfn;
-  TritonServerRequestDeleteFn_t rdfn;
-  TritonServerModelStatisticsFn_t msfn;
-
-  TritonSeverUnloadModelFn_t umfn;
-  TritonSeverSetLogInfoFn_t slifn;
-  TritonServerSetCudaMemoryPoolByteSizeFn_t scmpbsfn;
-
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_ApiVersion", false /* optional */,
-      reinterpret_cast<void**>(&apifn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_ServerOptionsNew", false /* optional */,
-      reinterpret_cast<void**>(&onfn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_ServerOptionsSetModelRepositoryPath",
-      false /* optional */, reinterpret_cast<void**>(&rpfn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_ServerOptionsSetLogVerbose",
-      false /* optional */, reinterpret_cast<void**>(&slvfn)));
-
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_ServerOptionsSetBackendDirectory",
-      false /* optional */, reinterpret_cast<void**>(&sbdfn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_ServerOptionsSetRepoAgentDirectory",
-      false /* optional */, reinterpret_cast<void**>(&srdfn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_ServerOptionsSetStrictModelConfig",
-      false /* optional */, reinterpret_cast<void**>(&ssmcfn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability",
-      false /* optional */, reinterpret_cast<void**>(&smsccfn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_ServerOptionsSetCudaMemoryPoolByteSize",
-      false /* optional */, reinterpret_cast<void**>(&scmpbsfn)));
-
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_ServerNew", false /* optional */,
-      reinterpret_cast<void**>(&snfn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_ServerOptionsDelete", false /* optional */,
-      reinterpret_cast<void**>(&odfn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_ServerDelete", false /* optional */,
-      reinterpret_cast<void**>(&sdfn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_ServerIsLive", false /* optional */,
-      reinterpret_cast<void**>(&ilfn)));
-
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_ServerIsReady", false /* optional */,
-      reinterpret_cast<void**>(&irfn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_ServerMetadata", false /* optional */,
-      reinterpret_cast<void**>(&smfn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_MessageSerializeToJson", false /* optional */,
-      reinterpret_cast<void**>(&stjfn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_MessageDelete", false /* optional */,
-      reinterpret_cast<void**>(&mdfn)));
-
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_ServerModelIsReady", false /* optional */,
-      reinterpret_cast<void**>(&mirfn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_ServerModelMetadata", false /* optional */,
-      reinterpret_cast<void**>(&mmfn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_ResponseAllocatorNew", false /* optional */,
-      reinterpret_cast<void**>(&ranfn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_InferenceRequestNew", false /* optional */,
-      reinterpret_cast<void**>(&irnfn)));
-
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_InferenceRequestSetId", false /* optional */,
-      reinterpret_cast<void**>(&irsifn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_InferenceRequestSetReleaseCallback",
-      false /* optional */, reinterpret_cast<void**>(&irsrcfn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_InferenceRequestAddInput", false /* optional */,
-      reinterpret_cast<void**>(&iraifn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_InferenceRequestAddRequestedOutput",
-      false /* optional */, reinterpret_cast<void**>(&irarofn)));
-
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_InferenceRequestAppendInputData",
-      false /* optional */, reinterpret_cast<void**>(&iraidfn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_InferenceRequestSetResponseCallback",
-      false /* optional */, reinterpret_cast<void**>(&irsrescfn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_ServerInferAsync", false /* optional */,
-      reinterpret_cast<void**>(&iafn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_InferenceResponseError", false /* optional */,
-      reinterpret_cast<void**>(&irefn)));
-
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_InferenceResponseDelete", false /* optional */,
-      reinterpret_cast<void**>(&irdfn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_ResponseAllocatorDelete", false /* optional */,
-      reinterpret_cast<void**>(&radfn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_ErrorNew", false /* optional */,
-      reinterpret_cast<void**>(&enfn)));
-
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_MemoryTypeString", false /* optional */,
-      reinterpret_cast<void**>(&mtsfn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_InferenceResponseOutputCount",
-      false /* optional */, reinterpret_cast<void**>(&irocfn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_DataTypeString", false /* optional */,
-      reinterpret_cast<void**>(&dtsfn)));
-
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_ErrorDelete", false /* optional */,
-      reinterpret_cast<void**>(&edfn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_ErrorCodeString", false /* optional */,
-      reinterpret_cast<void**>(&ectsfn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_ErrorMessage", false /* optional */,
-      reinterpret_cast<void**>(&emfn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_ServerModelConfig", false /* optional */,
-      reinterpret_cast<void**>(&mcfn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_InferenceRequestSetCorrelationId",
-      false /* optional */, reinterpret_cast<void**>(&scidfn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_InferenceRequestSetCorrelationIdString",
-      false /* optional */, reinterpret_cast<void**>(&sscidfn)));
-
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_InferenceRequestSetFlags", false /* optional */,
-      reinterpret_cast<void**>(&sffn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_InferenceRequestSetPriorityUInt64",
-      false /* optional */, reinterpret_cast<void**>(&spfn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_InferenceRequestSetTimeoutMicroseconds",
-      false /* optional */, reinterpret_cast<void**>(&stmsfn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_StringToDataType", false /* optional */,
-      reinterpret_cast<void**>(&stdtfn)));
-
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_InferenceResponseOutput", false /* optional */,
-      reinterpret_cast<void**>(&irofn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_InferenceRequestId", false /* optional */,
-      reinterpret_cast<void**>(&ridfn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_InferenceRequestDelete", false /* optional */,
-      reinterpret_cast<void**>(&rdfn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_ServerModelStatistics", false /* optional */,
-      reinterpret_cast<void**>(&msfn)));
-
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_ServerUnloadModel", false /* optional */,
-      reinterpret_cast<void**>(&umfn)));
-  RETURN_IF_ERROR(GetEntrypoint(
-      dlhandle_, "TRITONSERVER_ServerOptionsSetLogInfo", false /* optional */,
-      reinterpret_cast<void**>(&slifn)));
-
-
-  api_version_fn_ = apifn;
-  options_new_fn_ = onfn;
-  options_set_model_repo_path_fn_ = rpfn;
-  set_log_verbose_fn_ = slvfn;
-
-  set_backend_directory_fn_ = sbdfn;
-  set_repo_agent_directory_fn_ = srdfn;
-  set_strict_model_config_fn_ = ssmcfn;
-  set_min_supported_compute_capability_fn_ = smsccfn;
-
-  server_new_fn_ = snfn;
-  server_options_delete_fn_ = odfn;
-  server_delete_fn_ = sdfn;
-  server_is_live_fn_ = ilfn;
-
-  server_is_ready_fn_ = irfn;
-  server_metadata_fn_ = smfn;
-  message_serialize_to_json_fn_ = stjfn;
-  message_delete_fn_ = mdfn;
-
-  model_is_ready_fn_ = mirfn;
-  model_metadata_fn_ = mmfn;
-  response_allocator_new_fn_ = ranfn;
-  inference_request_new_fn_ = irnfn;
-
-  inference_request_set_id_fn_ = irsifn;
-  inference_request_set_release_callback_fn_ = irsrcfn;
-  inference_request_add_input_fn_ = iraifn;
-  inference_request_add_requested_output_fn_ = irarofn;
-
-  inference_request_append_input_data_fn_ = iraidfn;
-  inference_request_set_response_callback_fn_ = irsrescfn;
-  infer_async_fn_ = iafn;
-  inference_response_error_fn_ = irefn;
-
-  inference_response_delete_fn_ = irdfn;
-  response_allocator_delete_fn_ = radfn;
-  error_new_fn_ = enfn;
-
-  memory_type_string_fn_ = mtsfn;
-  inference_response_output_count_fn_ = irocfn;
-  data_type_string_fn_ = dtsfn;
-
-  error_delete_fn_ = edfn;
-  error_code_to_string_fn_ = ectsfn;
-  error_message_fn_ = emfn;
-  model_config_fn_ = mcfn;
-  set_correlation_id_fn_ = scidfn;
-  set_string_correlation_id_fn_ = sscidfn;
-
-  set_flags_fn_ = sffn;
-  set_priority_fn_ = spfn;
-  set_timeout_ms_fn_ = stmsfn;
-  string_to_datatype_fn_ = stdtfn;
-
-  inference_response_output_fn_ = irofn;
-  request_id_fn_ = ridfn;
-  request_delete_fn_ = rdfn;
-  model_statistics_fn_ = msfn;
-
-  unload_model_fn_ = umfn;
-  set_log_info_fn_ = slifn;
-  set_cuda_memory_pool_byte_size_ = scmpbsfn;
-
-  return Error::Success;
-}
-
-void
-TritonLoader::ClearHandles()
-{
-  dlhandle_ = nullptr;
-
-  api_version_fn_ = nullptr;
-  options_new_fn_ = nullptr;
-  options_set_model_repo_path_fn_ = nullptr;
-  set_log_verbose_fn_ = nullptr;
-
-  set_backend_directory_fn_ = nullptr;
-  set_repo_agent_directory_fn_ = nullptr;
-  set_strict_model_config_fn_ = nullptr;
-  set_min_supported_compute_capability_fn_ = nullptr;
-
-  server_new_fn_ = nullptr;
-  server_options_delete_fn_ = nullptr;
-  server_delete_fn_ = nullptr;
-  server_is_live_fn_ = nullptr;
-
-  server_is_ready_fn_ = nullptr;
-  server_metadata_fn_ = nullptr;
-  message_serialize_to_json_fn_ = nullptr;
-  message_delete_fn_ = nullptr;
-
-  model_is_ready_fn_ = nullptr;
-  model_metadata_fn_ = nullptr;
-  response_allocator_new_fn_ = nullptr;
-  inference_request_new_fn_ = nullptr;
-
-  inference_request_set_id_fn_ = nullptr;
-  inference_request_set_release_callback_fn_ = nullptr;
-  inference_request_add_input_fn_ = nullptr;
-  inference_request_add_requested_output_fn_ = nullptr;
-
-  inference_request_append_input_data_fn_ = nullptr;
-  inference_request_set_response_callback_fn_ = nullptr;
-  infer_async_fn_ = nullptr;
-  inference_response_error_fn_ = nullptr;
-
-  inference_response_delete_fn_ = nullptr;
-  response_allocator_delete_fn_ = nullptr;
-  error_new_fn_ = nullptr;
-
-  memory_type_string_fn_ = nullptr;
-  inference_response_output_count_fn_ = nullptr;
-  data_type_string_fn_ = nullptr;
-  error_message_fn_ = nullptr;
-
-  error_delete_fn_ = nullptr;
-  error_code_to_string_fn_ = nullptr;
-  model_config_fn_ = nullptr;
-  set_correlation_id_fn_ = nullptr;
-  set_string_correlation_id_fn_ = nullptr;
-
-  set_flags_fn_ = nullptr;
-  set_priority_fn_ = nullptr;
-  set_timeout_ms_fn_ = nullptr;
-  string_to_datatype_fn_ = nullptr;
-
-  inference_response_output_fn_ = nullptr;
-  request_id_fn_ = nullptr;
-  request_delete_fn_ = nullptr;
-  model_statistics_fn_ = nullptr;
-  unload_model_fn_ = nullptr;
-  set_log_info_fn_ = nullptr;
-}
-
-Error
-TritonLoader::FileExists(std::string& filepath)
-{
-  std::ifstream ifile;
-  ifile.open(filepath);
-  if (!ifile) {
-    return Error("unable to find local Triton library: " + filepath);
-  } else {
-    return Error::Success;
-  }
-}
-
-Error
-TritonLoader::Infer(
-    const tc::InferOptions& options, const std::vector<tc::InferInput*>& inputs,
-    const std::vector<const tc::InferRequestedOutput*>& outputs,
-    InferResult** result)
-{
-  Error error = Error::Success;
-  if (!ServerIsReady() || !ModelIsLoaded()) {
-    return Error("Server is not ready and/or requested model is not loaded");
-  }
-
-  TRITONSERVER_ResponseAllocator* allocator = nullptr;
-  TRITONSERVER_InferenceRequest* irequest = nullptr;
-  TRITONSERVER_InferenceResponse* completed_response = nullptr;
-  tc::RequestTimers timer;
-  timer.Reset();
-  timer.CaptureTimestamp(tc::RequestTimers::Kind::REQUEST_START);
-
-  RETURN_IF_ERROR(InitializeRequest(options, outputs, &allocator, &irequest));
-  ScopedDefer error_handler([&error, &completed_response, &allocator, this] {
-    error = CleanUp(completed_response, allocator);
-  });
-  RETURN_IF_ERROR(AddInputs(inputs, irequest));
-  RETURN_IF_ERROR(AddOutputs(outputs, irequest));
-
-  AllocPayload alloc_payload;
-  for (auto& output : outputs) {
-    if (output->IsSharedMemory()) {
-      std::string shm_name;
-      size_t shm_byte_size;
-      size_t offset;
-      // TODO: Error handling
-      output->SharedMemoryInfo(&shm_name, &shm_byte_size, &offset);
-
-      void* buf;
-      TRITONSERVER_MemoryType memory_type;
-      int64_t memory_type_id;
-      RETURN_IF_ERROR(shm_manager_->GetMemoryInfo(
-          shm_name, offset, shm_byte_size, &buf, &memory_type,
-          &memory_type_id));
-
-      alloc_payload.output_map_.emplace(
-          std::piecewise_construct, std::forward_as_tuple(output->Name()),
-          std::forward_as_tuple(new AllocPayload::OutputInfo(
-              buf, shm_byte_size, memory_type, memory_type_id)));
-    }
-  }
-
-  const char* cid = nullptr;
-  RETURN_IF_TRITONSERVER_ERROR(
-      request_id_fn_(irequest, &cid), "Failed to get request id");
-  std::string id = cid;
-
-  // Perform inference...
-  timer.CaptureTimestamp(tc::RequestTimers::Kind::SEND_START);
-  auto p = new std::promise<TRITONSERVER_InferenceResponse*>();
-  std::future<TRITONSERVER_InferenceResponse*> completed = p->get_future();
-  RETURN_IF_TRITONSERVER_ERROR(
-      inference_request_set_response_callback_fn_(
-          irequest, allocator, &alloc_payload /* response_allocator_userp */,
-          InferResponseComplete, reinterpret_cast<void*>(p)),
-      "setting response callback");
-  RETURN_IF_TRITONSERVER_ERROR(
-      infer_async_fn_((server_).get(), irequest, nullptr /* trace */),
-      "running inference");
-  timer.CaptureTimestamp(tc::RequestTimers::Kind::SEND_END);
-
-  // Wait for the inference to complete.
-  completed_response = completed.get();
-
-  RETURN_IF_TRITONSERVER_ERROR(
-      inference_response_error_fn_(completed_response),
-      "inference response error");
-
-  timer.CaptureTimestamp(tc::RequestTimers::Kind::RECV_START);
-  timer.CaptureTimestamp(tc::RequestTimers::Kind::RECV_END);
-  timer.CaptureTimestamp(tc::RequestTimers::Kind::REQUEST_END);
-
-  tc::Error err = UpdateInferStat(timer);
-  if (!err.IsOk()) {
-    std::cerr << "Failed to update context stat: " << err << std::endl;
-  }
-
-  InferResult::Create(result, err, id);
-
-  // CleanUp the response allocators
-  error_handler.Complete();
-
-  return error;
-}
-
-Error
-TritonLoader::CleanUp(
-    TRITONSERVER_InferenceResponse* completed_response,
-    TRITONSERVER_ResponseAllocator* allocator)
-{
-  TRITONSERVER_Error* response_err = nullptr;
-  if (completed_response != nullptr) {
-    response_err = inference_response_delete_fn_(completed_response);
-  }
-  TRITONSERVER_Error* allocator_err = response_allocator_delete_fn_(allocator);
-  RETURN_IF_TRITONSERVER_ERROR(response_err, "deleting inference response");
-  RETURN_IF_TRITONSERVER_ERROR(allocator_err, "deleting response allocator");
-  return Error::Success;
-}
-
-Error
-TritonLoader::InitializeRequest(
-    const tc::InferOptions& options,
-    const std::vector<const tc::InferRequestedOutput*>& outputs,
-    TRITONSERVER_ResponseAllocator** allocator,
-    TRITONSERVER_InferenceRequest** irequest)
-{
-  // Create the allocator that will be used to allocate buffers for
-  // the result tensors.
-  RETURN_IF_TRITONSERVER_ERROR(
-      GetSingleton()->response_allocator_new_fn_(
-          allocator,
-          reinterpret_cast<
-              TRITONSERVER_Error* (*)(TRITONSERVER_ResponseAllocator* allocator,
-                                      const char* tensor_name, size_t byte_size,
-                                      TRITONSERVER_MemoryType memory_type,
-                                      int64_t memory_type_id, void* userp,
-                                      void** buffer, void** buffer_userp,
-                                      TRITONSERVER_MemoryType*
-                                          actual_memory_type,
-                                      int64_t* actual_memory_type_id)>(
-              ResponseAlloc),
-          reinterpret_cast<
-              TRITONSERVER_Error* (*)(TRITONSERVER_ResponseAllocator* allocator,
-                                      void* buffer, void* buffer_userp,
-                                      size_t byte_size,
-                                      TRITONSERVER_MemoryType memory_type,
-                                      int64_t memory_type_id)>(ResponseRelease),
-          nullptr /* start_fn */),
-      "creating response allocator");
-
-  // set up inference request
-  RETURN_IF_TRITONSERVER_ERROR(
-      inference_request_new_fn_(
-          irequest, (server_).get(), model_name_.c_str(), model_version_),
-      "creating inference request");
-  RETURN_IF_TRITONSERVER_ERROR(
-      inference_request_set_id_fn_(*irequest, options.request_id_.c_str()),
-      "setting ID for the request");
-  if ((options.sequence_id_ != 0) || (options.sequence_id_str_ != "") ||
-      (options.priority_ != 0) || (options.server_timeout_ != 0) ||
-      outputs.empty()) {
-    if (options.sequence_id_ != 0) {
-      RETURN_IF_TRITONSERVER_ERROR(
-          set_correlation_id_fn_(*irequest, options.sequence_id_),
-          "setting sequence ID for the request");
-    } else if (options.sequence_id_str_ != "") {
-      RETURN_IF_TRITONSERVER_ERROR(
-          set_string_correlation_id_fn_(
-              *irequest, options.sequence_id_str_.c_str()),
-          "setting sequence ID for the request");
-    }
-    uint32_t flags = 0;
-    if (options.sequence_start_) {
-      flags |= TRITONSERVER_REQUEST_FLAG_SEQUENCE_START;
-    }
-    if (options.sequence_end_) {
-      flags |= TRITONSERVER_REQUEST_FLAG_SEQUENCE_END;
-    }
-    RETURN_IF_TRITONSERVER_ERROR(
-        set_flags_fn_(*irequest, flags),
-        "setting inference flags for the request");
-  }
-  if (options.priority_ != 0) {
-    RETURN_IF_TRITONSERVER_ERROR(
-        set_priority_fn_(*irequest, options.priority_),
-        "setting priority for the request");
-  }
-  if (options.server_timeout_ != 0) {
-    RETURN_IF_TRITONSERVER_ERROR(
-        set_timeout_ms_fn_(*irequest, options.server_timeout_),
-        "setting timeout for the request");
-  }
-  RETURN_IF_TRITONSERVER_ERROR(
-      inference_request_set_release_callback_fn_(
-          *irequest, InferRequestComplete, nullptr /* request_release_userp */),
-      "setting request release callback");
-  return Error::Success;
-}
-
-Error
-TritonLoader::AddInputs(
-    const std::vector<tc::InferInput*>& inputs,
-    TRITONSERVER_InferenceRequest* irequest)
-{
-  for (auto io : inputs) {
-    const char* input_name = io->Name().c_str();
-    const char* datatype = io->Datatype().c_str();
-    const TRITONSERVER_DataType dtype = string_to_datatype_fn_(datatype);
-    std::vector<int64_t> shape_vec;
-    for (const int64_t dim : io->Shape()) {  // this is a vector, just use it
-      shape_vec.push_back(dim);
-    }
-
-    RETURN_IF_TRITONSERVER_ERROR(
-        inference_request_add_input_fn_(
-            irequest, input_name, dtype, &shape_vec[0], shape_vec.size()),
-        "setting input for the request");
-    size_t byte_size;
-    tc::Error err = io->ByteSize(&byte_size);
-    if (!err.IsOk()) {
-      return Error(err.Message());
-    }
-    if (byte_size == 0) {
-      RETURN_IF_TRITONSERVER_ERROR(
-          inference_request_append_input_data_fn_(
-              irequest, input_name, nullptr, 0 /* byte_size */,
-              TRITONSERVER_MEMORY_CPU /* memory type */,
-              0 /* memory_type_id */),
-          "appending input data with byte size zero");
-    } else {
-      if (!io->IsSharedMemory()) {
-        io->PrepareForRequest();
-        bool end_of_input = false;
-        while (!end_of_input) {
-          const uint8_t* buf;
-          size_t buf_size;
-          io->GetNext(&buf, &buf_size, &end_of_input);
-          if (buf != nullptr) {
-            RETURN_IF_TRITONSERVER_ERROR(
-                inference_request_append_input_data_fn_(
-                    irequest, input_name, const_cast<uint8_t*>(buf), buf_size,
-                    TRITONSERVER_MEMORY_CPU /* memory_type */,
-                    0 /* memory_type_id */),
-                "appending data to tritonserver");
-          }
-        }
-      } else {
-        std::string shm_name;
-        size_t shm_byte_size;
-        size_t offset;
-        // TODO: Error handling
-        io->SharedMemoryInfo(&shm_name, &shm_byte_size, &offset);
-        void* buf;
-        TRITONSERVER_MemoryType memory_type;
-        int64_t memory_type_id;
-        RETURN_IF_ERROR(shm_manager_->GetMemoryInfo(
-            shm_name, offset, shm_byte_size, &buf, &memory_type,
-            &memory_type_id));
-        RETURN_IF_TRITONSERVER_ERROR(
-            inference_request_append_input_data_fn_(
-                irequest, input_name, buf, byte_size,
-                memory_type /* memory_type */,
-                memory_type_id /* memory_type_id */),
-            "appending data to tritonserver");
-      }
-    }
-  }
-
-
-  return Error::Success;
-}
-
-Error
-TritonLoader::AddOutputs(
-    const std::vector<const tc::InferRequestedOutput*>& outputs,
-    TRITONSERVER_InferenceRequest* irequest)
-{
-  for (auto io : outputs) {
-    const char* output_name = io->Name().c_str();
-    RETURN_IF_TRITONSERVER_ERROR(
-        inference_request_add_requested_output_fn_(irequest, output_name),
-        "setting output for the request");
-  }
-  return Error::Success;
-}
-
-
-Error
-TritonLoader::ModelInferenceStatistics(
-    const std::string& model_name, const std::string& model_version,
-    rapidjson::Document* infer_stat)
-{
-  if (ServerIsReady() && ModelIsLoaded()) {
-    TRITONSERVER_Message* model_stats_message = nullptr;
-    int64_t requested_model_version;
-    auto err =
-        GetModelVersionFromString(model_version, &requested_model_version);
-    if (err.IsOk()) {
-      RETURN_IF_TRITONSERVER_ERROR(
-          model_statistics_fn_(
-              (server_).get(), model_name.c_str(), requested_model_version,
-              &model_stats_message),
-          "getting model statistics from server");
-
-      const char* buffer;
-      size_t byte_size;
-      RETURN_IF_TRITONSERVER_ERROR(
-          message_serialize_to_json_fn_(
-              model_stats_message, &buffer, &byte_size),
-          "serializing message to json");
-
-      infer_stat->Parse(buffer, byte_size);
-      if (infer_stat->HasParseError()) {
-        return Error(
-            "error: failed to parse server metadata from JSON: " +
-            std::string(GetParseError_En(infer_stat->GetParseError())) +
-            " at " + std::to_string(infer_stat->GetErrorOffset()));
-      }
-      RETURN_IF_TRITONSERVER_ERROR(
-          message_delete_fn_(model_stats_message),
-          "deleting inference statistics message");
-    }
-    return err;
-  } else {
-    return Error(
-        "Trying to get model statistics while server is not started or model "
-        "is not ready");
-  }
-}
-
-TritonLoader*
-TritonLoader::GetSingleton()
-{
-  static TritonLoader loader;
-  return &loader;
-}
-
-TritonLoader::~TritonLoader()
-{
-  FAIL_IF_ERR(Delete(), "dereferencing server instance...");
-  FAIL_IF_ERR(CloseLibraryHandle(dlhandle_), "error on closing triton loader");
-  ClearHandles();
-}
-
-#ifdef TRITON_ENABLE_GPU
-Error
-TritonLoader::RegisterCudaMemory(
-    const std::string& name, void* handle, const size_t byte_size)
-{
-  RETURN_IF_ERROR(shm_manager_->RegisterCUDAMemory(
-      name, handle, byte_size, 0 /* device id */));
-  return Error::Success;
-}
-#endif  // TRITON_ENABLE_GPU
-
-Error
-TritonLoader::RegisterSystemMemory(
-    const std::string& name, void* ptr, const size_t byte_size)
-{
-  RETURN_IF_ERROR(shm_manager_->RegisterSystemMemory(name, ptr, byte_size));
-  return Error::Success;
-}
-
-Error
-TritonLoader::UnregisterAllSharedMemory()
-{
-  RETURN_IF_ERROR(shm_manager_->UnregisterAll(TRITONSERVER_MEMORY_GPU));
-  RETURN_IF_ERROR(shm_manager_->UnregisterAll(TRITONSERVER_MEMORY_GPU));
-  return Error::Success;
-}
-
-TRITONSERVER_Error*
-TritonLoader::ErrorNew(TRITONSERVER_Error_Code code, const char* message)
-{
-  return error_new_fn_(code, message);
-}
-
-}}}}  // namespace triton::perfanalyzer::clientbackend::tritoncapi
diff --git a/src/c++/perf_analyzer/client_backend/triton_c_api/triton_loader.h b/src/c++/perf_analyzer/client_backend/triton_c_api/triton_loader.h
deleted file mode 100644
index 1a18176c8..000000000
--- a/src/c++/perf_analyzer/client_backend/triton_c_api/triton_loader.h
+++ /dev/null
@@ -1,519 +0,0 @@
-// Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <rapidjson/document.h>
-#include <rapidjson/error/en.h>
-
-#include <fstream>
-#include <iostream>
-#include <memory>
-#include <string>
-
-#include "../client_backend.h"
-#include "common.h"
-#include "shared_library.h"
-#include "shared_memory_manager.h"
-#include "triton/core/tritonserver.h"
-
-// If TRITONSERVER error is non-OK, return the corresponding status.
-#define RETURN_IF_TRITONSERVER_ERROR(E, MSG)                                \
-  do {                                                                      \
-    TRITONSERVER_Error* err__ = (E);                                        \
-    if (err__ != nullptr) {                                                 \
-      std::cout << "error: " << (MSG) << ": "                               \
-                << GetSingleton()->error_code_to_string_fn_(err__) << " - " \
-                << GetSingleton()->error_message_fn_(err__) << std::endl;   \
-      Error newErr = Error(MSG);                                            \
-      GetSingleton()->error_delete_fn_(err__);                              \
-      return newErr;                                                        \
-    }                                                                       \
-  } while (false)
-
-#define FAIL_IF_TRITONSERVER_ERROR(E, MSG)                                  \
-  do {                                                                      \
-    TRITONSERVER_Error* err__ = (E);                                        \
-    if (err__ != nullptr) {                                                 \
-      std::cerr << "error: " << (MSG) << ": "                               \
-                << GetSingleton()->error_code_to_string_fn_(err__) << " - " \
-                << GetSingleton()->error_message_fn_(err__) << std::endl;   \
-      Error newErr = Error(MSG);                                            \
-      GetSingleton()->error_delete_fn_(err__);                              \
-      exit(1);                                                              \
-    }                                                                       \
-  } while (false)
-
-#define REPORT_TRITONSERVER_ERROR(E)                                      \
-  do {                                                                    \
-    TRITONSERVER_Error* err__ = (E);                                      \
-    if (err__ != nullptr) {                                               \
-      std::cout << GetSingleton()->error_message_fn_(err__) << std::endl; \
-      GetSingleton()->error_delete_fn_(err__);                            \
-    }                                                                     \
-  } while (false)
-
-namespace tc = triton::client;
-
-namespace triton { namespace perfanalyzer { namespace clientbackend {
-namespace tritoncapi {
-
-class InferResult;
-
-class TritonLoader : public tc::InferenceServerClient {
- public:
-  ~TritonLoader();
-
-  static Error Create(
-      const std::string& triton_server_path,
-      const std::string& model_repository_path, bool verbose);
-
-  Error Delete();
-  Error StartTriton();
-
-  Error LoadModel(
-      const std::string& model_name, const std::string& model_version);
-
-  Error ModelMetadata(rapidjson::Document* model_metadata);
-
-  Error ModelConfig(
-      rapidjson::Document* model_config, const std::string& model_name,
-      const std::string& model_version);
-
-  Error ServerMetaData(rapidjson::Document* server_metadata);
-
-  Error Infer(
-      const tc::InferOptions& options,
-      const std::vector<tc::InferInput*>& inputs,
-      const std::vector<const tc::InferRequestedOutput*>& outputs,
-      InferResult** result);
-
-  Error CleanUp(
-      TRITONSERVER_InferenceResponse* completed_response,
-      TRITONSERVER_ResponseAllocator* allocator);
-
-  Error ModelInferenceStatistics(
-      const std::string& model_name, const std::string& model_version,
-      rapidjson::Document* infer_stat);
-
-  Error ClientInferStat(tc::InferStat* infer_stat)
-  {
-    *infer_stat = infer_stat_;
-    return Error::Success;
-  }
-
-#ifdef TRITON_ENABLE_GPU
-  Error RegisterCudaMemory(
-      const std::string& name, void* handle, const size_t byte_size);
-#endif  // TRITON_ENABLE_GPU
-
-  Error RegisterSystemMemory(
-      const std::string& name, void* ptr, const size_t byte_size);
-
-  Error UnregisterAllSharedMemory();
-
-  TRITONSERVER_Error* ErrorNew(
-      TRITONSERVER_Error_Code code, const char* message);
-
-  bool ModelIsLoaded() { return model_is_loaded_; }
-  bool ServerIsReady() { return server_is_ready_; }
-
-  TRITONSERVER_Error* DeleteInferRequest(
-      TRITONSERVER_InferenceRequest* irequest)
-  {
-    return request_delete_fn_(irequest);
-  }
-  static TritonLoader* GetSingleton();
-
-  // TRITONSERVER_ApiVersion
-  typedef TRITONSERVER_Error* (*TritonServerApiVersionFn_t)(
-      uint32_t* major, uint32_t* minor);
-  // TRITONSERVER_ServerOptionsNew
-  typedef TRITONSERVER_Error* (*TritonServerOptionsNewFn_t)(
-      TRITONSERVER_ServerOptions** options);
-  // TRITONSERVER_ServerOptionsSetModelRepositoryPath
-  typedef TRITONSERVER_Error* (*TritonServerOptionSetModelRepoPathFn_t)(
-      TRITONSERVER_ServerOptions* options, const char* model_repository_path);
-  // TRITONSERVER_ServerOptionsSetLogVerbose
-  typedef TRITONSERVER_Error* (*TritonServerSetLogVerboseFn_t)(
-      TRITONSERVER_ServerOptions* options, int level);
-
-  // TRITONSERVER_ServerOptionsSetBackendDirectory
-  typedef TRITONSERVER_Error* (*TritonServerSetBackendDirFn_t)(
-      TRITONSERVER_ServerOptions* options, const char* backend_dir);
-
-  // TRITONSERVER_ServerOptionsSetRepoAgentDirectory
-  typedef TRITONSERVER_Error* (*TritonServerSetRepoAgentDirFn_t)(
-      TRITONSERVER_ServerOptions* options, const char* repoagent_dir);
-
-  // TRITONSERVER_ServerOptionsSetStrictModelConfig
-  typedef TRITONSERVER_Error* (*TritonServerSetStrictModelConfigFn_t)(
-      TRITONSERVER_ServerOptions* options, bool strict);
-
-  // TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability
-  typedef TRITONSERVER_Error* (
-      *TritonServerSetMinSupportedComputeCapabilityFn_t)(
-      TRITONSERVER_ServerOptions* options, double cc);
-
-  // TRITONSERVER_ServerNew
-  typedef TRITONSERVER_Error* (*TritonServerNewFn_t)(
-      TRITONSERVER_Server** server, TRITONSERVER_ServerOptions* option);
-
-  // TRITONSERVER_ServerOptionsDelete
-  typedef TRITONSERVER_Error* (*TritonServerOptionsDeleteFn_t)(
-      TRITONSERVER_ServerOptions* options);
-
-  // TRITONSERVER_ServerDelete
-  typedef TRITONSERVER_Error* (*TritonServerDeleteFn_t)(
-      TRITONSERVER_Server* server);
-
-  // TRITONSERVER_ServerIsLive
-  typedef TRITONSERVER_Error* (*TritonServerIsLiveFn_t)(
-      TRITONSERVER_Server* server, bool* live);
-
-  // TRITONSERVER_ServerIsReady
-  typedef TRITONSERVER_Error* (*TritonServerIsReadyFn_t)(
-      TRITONSERVER_Server* server, bool* ready);
-
-  // TRITONSERVER_ServerMetadata
-  typedef TRITONSERVER_Error* (*TritonServerMetadataFn_t)(
-      TRITONSERVER_Server* server, TRITONSERVER_Message** server_metadata);
-
-  // TRITONSERVER_MessageSerializeToJson
-  typedef TRITONSERVER_Error* (*TritonServerMessageSerializeToJsonFn_t)(
-      TRITONSERVER_Message* message, const char** base, size_t* byte_size);
-
-  // TRITONSERVER_MessageDelete
-  typedef TRITONSERVER_Error* (*TritonServerMessageDeleteFn_t)(
-      TRITONSERVER_Message* message);
-
-  // TRITONSERVER_ServerModelIsReady
-  typedef TRITONSERVER_Error* (*TritonServerModelIsReadyFn_t)(
-      TRITONSERVER_Server* server, const char* model_name,
-      const int64_t model_version, bool* ready);
-
-  // TRITONSERVER_ServerModelMetadata
-  typedef TRITONSERVER_Error* (*TritonServerModelMetadataFn_t)(
-      TRITONSERVER_Server* server, const char* model_name,
-      const int64_t model_version, TRITONSERVER_Message** model_metadata);
-
-  // TRITONSERVER_ResponseAllocatorNew
-  typedef TRITONSERVER_Error* (*TritonServerResponseAllocatorNewFn_t)(
-      TRITONSERVER_ResponseAllocator** allocator,
-      TRITONSERVER_ResponseAllocatorAllocFn_t alloc_fn,
-      TRITONSERVER_ResponseAllocatorReleaseFn_t release_fn,
-      TRITONSERVER_ResponseAllocatorStartFn_t start_fn);
-
-  // TRITONSERVER_InferenceRequestNew
-  typedef TRITONSERVER_Error* (*TritonServerInferenceRequestNewFn_t)(
-      TRITONSERVER_InferenceRequest** inference_request,
-      TRITONSERVER_Server* server, const char* model_name,
-      const int64_t model_version);
-
-  // TRITONSERVER_InferenceRequestSetId
-  typedef TRITONSERVER_Error* (*TritonServerInferenceRequestSetIdFn_t)(
-      TRITONSERVER_InferenceRequest* inference_request, const char* id);
-
-  // TRITONSERVER_InferenceRequestSetReleaseCallback
-  typedef TRITONSERVER_Error* (
-      *TritonServerInferenceRequestSetReleaseCallbackFn_t)(
-      TRITONSERVER_InferenceRequest* inference_request,
-      TRITONSERVER_InferenceRequestReleaseFn_t request_release_fn,
-      void* request_release_userp);
-
-  // TRITONSERVER_InferenceRequestAddInput
-  typedef TRITONSERVER_Error* (*TritonServerInferenceRequestAddInputFn_t)(
-      TRITONSERVER_InferenceRequest* inference_request, const char* name,
-      const TRITONSERVER_DataType datatype, const int64_t* shape,
-      uint64_t dim_count);
-
-  // TRITONSERVER_InferenceRequestAddRequestedOutput
-  typedef TRITONSERVER_Error* (
-      *TritonServerInferenceRequestAddRequestedOutputFn_t)(
-      TRITONSERVER_InferenceRequest* inference_request, const char* name);
-
-  // TRITONSERVER_InferenceRequestAppendInputData
-  typedef TRITONSERVER_Error* (
-      *TritonServerInferenceRequestAppendInputDataFn_t)(
-      TRITONSERVER_InferenceRequest* inference_request, const char* name,
-      const void* base, size_t byte_size, TRITONSERVER_MemoryType memory_type,
-      int64_t memory_type_i);
-
-  // TRITONSERVER_InferenceRequestSetResponseCallback
-  typedef TRITONSERVER_Error* (
-      *TritonServerInferenceRequestSetResponseCallbackFn_t)(
-      TRITONSERVER_InferenceRequest* inference_request,
-      TRITONSERVER_ResponseAllocator* response_allocator,
-      void* response_allocator_userp,
-      TRITONSERVER_InferenceResponseCompleteFn_t response_fn,
-      void* response_userp);
-
-  // TRITONSERVER_ServerInferAsync
-  typedef TRITONSERVER_Error* (*TritonServerInferAsyncFn_t)(
-      TRITONSERVER_Server* server,
-      TRITONSERVER_InferenceRequest* inference_request,
-      TRITONSERVER_InferenceTrace* trace);
-
-  // TRITONSERVER_InferenceResponseError
-  typedef TRITONSERVER_Error* (*TritonServerInferenceResponseErrorFn_t)(
-      TRITONSERVER_InferenceResponse* inference_response);
-
-  // TRITONSERVER_InferenceResponseDelete
-  typedef TRITONSERVER_Error* (*TritonServerInferenceResponseDeleteFn_t)(
-      TRITONSERVER_InferenceResponse* inference_response);
-
-  // TRITONSERVER_InferenceRequestRemoveAllInputData
-  typedef TRITONSERVER_Error* (
-      *TritonServerInferenceRequestRemoveAllInputDataFn_t)(
-      TRITONSERVER_InferenceRequest* inference_request, const char* name);
-
-  // TRITONSERVER_ResponseAllocatorDelete
-  typedef TRITONSERVER_Error* (*TritonServerResponseAllocatorDeleteFn_t)(
-      TRITONSERVER_ResponseAllocator* allocator);
-
-  // TRITONSERVER_ErrorNew
-  typedef TRITONSERVER_Error* (*TritonServerErrorNewFn_t)(
-      TRITONSERVER_Error_Code code, const char* msg);
-
-  // TRITONSERVER_MemoryTypeString
-  typedef const char* (*TritonServerMemoryTypeStringFn_t)(
-      TRITONSERVER_MemoryType memtype);
-
-  // TRITONSERVER_InferenceResponseOutputCount
-  typedef TRITONSERVER_Error* (*TritonServerInferenceResponseOutputCountFn_t)(
-      TRITONSERVER_InferenceResponse* inference_response, uint32_t* count);
-
-  // TRITONSERVER_DataTypeString
-  typedef const char* (*TritonServerDataTypeStringFn_t)(
-      TRITONSERVER_DataType datatype);
-
-  // TRITONSERVER_ErrorMessage
-  typedef const char* (*TritonServerErrorMessageFn_t)(
-      TRITONSERVER_Error* error);
-
-  // TRITONSERVER_ErrorDelete
-  typedef void (*TritonServerErrorDeleteFn_t)(TRITONSERVER_Error* error);
-
-  // TRITONSERVER_ErrorCodeString
-  typedef const char* (*TritonServerErrorCodeToStringFn_t)(
-      TRITONSERVER_Error* error);
-
-  // TRITONSERVER_ServerModelConfig
-  typedef TRITONSERVER_Error* (*TritonServerModelConfigFn_t)(
-      TRITONSERVER_Server* server, const char* model_name,
-      const int64_t model_version, const uint32_t config_version,
-      TRITONSERVER_Message** model_config);
-
-  // TRITONSERVER_InferenceRequestSetCorrelationId
-  typedef TRITONSERVER_Error* (
-      *TritonServerInferenceRequestSetCorrelationIdFn_t)(
-      TRITONSERVER_InferenceRequest* inference_request,
-      uint64_t correlation_id);
-
-  // TRITONSERVER_InferenceRequestSetCorrelationId
-  typedef TRITONSERVER_Error* (
-      *TritonServerInferenceRequestSetStringCorrelationIdFn_t)(
-      TRITONSERVER_InferenceRequest* inference_request,
-      const char* correlation_id);
-
-  // TRITONSERVER_InferenceRequestSetFlags
-  typedef TRITONSERVER_Error* (*TritonServerInferenceRequestSetFlagsFn_t)(
-      TRITONSERVER_InferenceRequest* inference_request, uint32_t flags);
-
-  // TRITONSERVER_InferenceRequestSetPriorityUInt64
-  typedef TRITONSERVER_Error* (*TritonServerInferenceRequestSetPriorityFn_t)(
-      TRITONSERVER_InferenceRequest* inference_request, uint64_t priority);
-
-  // TRITONSERVER_InferenceRequestSetTimeoutMicroseconds
-  typedef TRITONSERVER_Error* (
-      *TritonServerInferenceRequestSetTimeoutMicrosecondsFn_t)(
-      TRITONSERVER_InferenceRequest* inference_request, uint64_t timeout_us);
-
-  // TRITONSERVER_StringToDataType
-  typedef TRITONSERVER_DataType (*TritonServerStringToDatatypeFn_t)(
-      const char* dtype);
-
-  // TRITONSERVER_InferenceResponseOutput
-  typedef TRITONSERVER_Error* (*TritonServerInferenceResponseOutputFn_t)(
-      TRITONSERVER_InferenceResponse* inference_response, const uint32_t index,
-      const char** name, TRITONSERVER_DataType* datatype, const int64_t** shape,
-      uint64_t* dim_count, const void** base, size_t* byte_size,
-      TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id,
-      void** userp);
-
-  // TRITONSERVER_InferenceRequestId
-  typedef TRITONSERVER_Error* (*TritonServerRequestIdFn_t)(
-      TRITONSERVER_InferenceRequest* inference_request, const char** id);
-
-  // TRITONSERVER_InferenceRequestDelete
-  typedef TRITONSERVER_Error* (*TritonServerRequestDeleteFn_t)(
-      TRITONSERVER_InferenceRequest* inference_request);
-
-  // TRITONSERVER_ServerModelStatistics
-  typedef TRITONSERVER_Error* (*TritonServerModelStatisticsFn_t)(
-      TRITONSERVER_Server* server, const char* model_name,
-      const int64_t model_version, TRITONSERVER_Message** model_stats);
-
-  // TRITONSERVER_ServerUnloadModel
-  typedef TRITONSERVER_Error* (*TritonSeverUnloadModelFn_t)(
-      TRITONSERVER_Server* server, const char* model_name);
-
-  // TRITONSERVER_ServerOptionsSetLogInfo
-  typedef TRITONSERVER_Error* (*TritonSeverSetLogInfoFn_t)(
-      TRITONSERVER_ServerOptions* options, bool log);
-
-  // TRITONSERVER_ServerOptionsSetCudaMemoryPoolByteSize
-  typedef TRITONSERVER_Error* (*TritonServerSetCudaMemoryPoolByteSizeFn_t)(
-      TRITONSERVER_ServerOptions* options, int gpu_device, uint64_t size);
-
- private:
-  TritonLoader()
-      : InferenceServerClient(
-            false /* verbose flag that is set later during ::Create*/)
-  {
-    verbose_level_ = 0;
-    enforce_memory_type_ = false;
-    requested_memory_type_ = TRITONSERVER_MEMORY_CPU;
-    model_is_loaded_ = false;
-    server_is_ready_ = false;
-    shm_manager_ = std::make_unique<SharedMemoryManager>();
-  }
-
-  Error PopulateInternals(
-      const std::string& triton_server_path,
-      const std::string& model_repository_path, bool verbose);
-
-  /// Load all tritonserver.h functions onto triton_loader
-  /// internal handles
-  Error LoadServerLibrary();
-
-  void ClearHandles();
-
-  /// Check if file exists in the current directory
-  /// \param filepath Path of library to check
-  /// \return perfanalyzer::clientbackend::Error
-  Error FileExists(std::string& filepath);
-
-  Error InitializeRequest(
-      const tc::InferOptions& options,
-      const std::vector<const tc::InferRequestedOutput*>& outputs,
-      TRITONSERVER_ResponseAllocator** allocator,
-      TRITONSERVER_InferenceRequest** irequest);
-
-  Error AddInputs(
-      const std::vector<tc::InferInput*>& inputs,
-      TRITONSERVER_InferenceRequest* irequest);
-
-  Error AddOutputs(
-      const std::vector<const tc::InferRequestedOutput*>& outputs,
-      TRITONSERVER_InferenceRequest* irequest);
-
-  void* dlhandle_;
-  TritonServerApiVersionFn_t api_version_fn_;
-  TritonServerOptionsNewFn_t options_new_fn_;
-  TritonServerOptionSetModelRepoPathFn_t options_set_model_repo_path_fn_;
-  TritonServerSetLogVerboseFn_t set_log_verbose_fn_;
-
-  TritonServerSetBackendDirFn_t set_backend_directory_fn_;
-  TritonServerSetRepoAgentDirFn_t set_repo_agent_directory_fn_;
-  TritonServerSetStrictModelConfigFn_t set_strict_model_config_fn_;
-  TritonServerSetMinSupportedComputeCapabilityFn_t
-      set_min_supported_compute_capability_fn_;
-
-  TritonServerNewFn_t server_new_fn_;
-  TritonServerOptionsDeleteFn_t server_options_delete_fn_;
-  TritonServerDeleteFn_t server_delete_fn_;
-  TritonServerIsLiveFn_t server_is_live_fn_;
-
-  TritonServerIsReadyFn_t server_is_ready_fn_;
-  TritonServerMetadataFn_t server_metadata_fn_;
-  TritonServerMessageSerializeToJsonFn_t message_serialize_to_json_fn_;
-  TritonServerMessageDeleteFn_t message_delete_fn_;
-
-  TritonServerModelIsReadyFn_t model_is_ready_fn_;
-  TritonServerModelMetadataFn_t model_metadata_fn_;
-  TritonServerResponseAllocatorNewFn_t response_allocator_new_fn_;
-  TritonServerInferenceRequestNewFn_t inference_request_new_fn_;
-
-  TritonServerInferenceRequestSetIdFn_t inference_request_set_id_fn_;
-  TritonServerInferenceRequestSetReleaseCallbackFn_t
-      inference_request_set_release_callback_fn_;
-  TritonServerInferenceRequestAddInputFn_t inference_request_add_input_fn_;
-  TritonServerInferenceRequestAddRequestedOutputFn_t
-      inference_request_add_requested_output_fn_;
-
-  TritonServerInferenceRequestAppendInputDataFn_t
-      inference_request_append_input_data_fn_;
-  TritonServerInferenceRequestSetResponseCallbackFn_t
-      inference_request_set_response_callback_fn_;
-  TritonServerInferAsyncFn_t infer_async_fn_;
-  TritonServerInferenceResponseErrorFn_t inference_response_error_fn_;
-
-  TritonServerInferenceResponseDeleteFn_t inference_response_delete_fn_;
-  TritonServerResponseAllocatorDeleteFn_t response_allocator_delete_fn_;
-  TritonServerErrorNewFn_t error_new_fn_;
-
-  TritonServerMemoryTypeStringFn_t memory_type_string_fn_;
-  TritonServerInferenceResponseOutputCountFn_t
-      inference_response_output_count_fn_;
-  TritonServerDataTypeStringFn_t data_type_string_fn_;
-  TritonServerErrorMessageFn_t error_message_fn_;
-
-  TritonServerErrorDeleteFn_t error_delete_fn_;
-  TritonServerErrorCodeToStringFn_t error_code_to_string_fn_;
-  TritonServerModelConfigFn_t model_config_fn_;
-  TritonServerInferenceRequestSetCorrelationIdFn_t set_correlation_id_fn_;
-  TritonServerInferenceRequestSetStringCorrelationIdFn_t
-      set_string_correlation_id_fn_;
-
-  TritonServerInferenceRequestSetFlagsFn_t set_flags_fn_;
-  TritonServerInferenceRequestSetPriorityFn_t set_priority_fn_;
-  TritonServerInferenceRequestSetTimeoutMicrosecondsFn_t set_timeout_ms_fn_;
-  TritonServerStringToDatatypeFn_t string_to_datatype_fn_;
-
-  TritonServerInferenceResponseOutputFn_t inference_response_output_fn_;
-  TritonServerRequestIdFn_t request_id_fn_;
-  TritonServerRequestDeleteFn_t request_delete_fn_;
-  TritonServerModelStatisticsFn_t model_statistics_fn_;
-
-  TritonSeverUnloadModelFn_t unload_model_fn_;
-  TritonSeverSetLogInfoFn_t set_log_info_fn_;
-  TritonServerSetCudaMemoryPoolByteSizeFn_t set_cuda_memory_pool_byte_size_;
-
-  std::shared_ptr<TRITONSERVER_Server> server_{nullptr};
-  std::string triton_server_path_{};
-  const std::string server_library_path_{"/lib/libtritonserver.so"};
-  int verbose_level_{0};
-  TRITONSERVER_MemoryType requested_memory_type_{TRITONSERVER_MEMORY_CPU};
-  bool enforce_memory_type_{false};
-  std::string model_repository_path_{""};
-  std::string model_name_{""};
-  int64_t model_version_{-1};
-  bool model_is_loaded_{false};
-  bool server_is_ready_{false};
-  std::unique_ptr<SharedMemoryManager> shm_manager_{nullptr};
-};
-
-}}}}  // namespace triton::perfanalyzer::clientbackend::tritoncapi
diff --git a/src/c++/perf_analyzer/command_line_parser.cc b/src/c++/perf_analyzer/command_line_parser.cc
deleted file mode 100644
index 8003be711..000000000
--- a/src/c++/perf_analyzer/command_line_parser.cc
+++ /dev/null
@@ -1,2017 +0,0 @@
-// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-
-#include "command_line_parser.h"
-
-#include <getopt.h>
-
-#include <algorithm>
-#include <iomanip>
-#include <iostream>
-#include <string>
-
-#include "perf_analyzer_exception.h"
-
-namespace triton { namespace perfanalyzer {
-
-PAParamsPtr
-CLParser::Parse(int argc, char** argv)
-{
-  ParseCommandLine(argc, argv);
-  VerifyOptions();
-
-  return params_;
-}
-
-std::vector<std::string>
-SplitString(const std::string& str, const std::string& delimiter = ":")
-{
-  std::vector<std::string> substrs;
-  size_t pos = 0;
-  while (pos != std::string::npos) {
-    size_t colon_pos = str.find(":", pos);
-    substrs.push_back(str.substr(pos, colon_pos - pos));
-    if (colon_pos == std::string::npos) {
-      pos = colon_pos;
-    } else {
-      pos = colon_pos + 1;
-    }
-  }
-  return substrs;
-}
-
-void
-ToLowerCase(std::string& s)
-{
-  std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c) {
-    return std::tolower(c);
-  });
-}
-
-// Used to format the usage message
-std::string
-CLParser::FormatMessage(std::string str, int offset) const
-{
-  int width = 60;
-  int current_pos = offset;
-  while (current_pos + width < int(str.length())) {
-    int n = str.rfind(' ', current_pos + width);
-    if (n != int(std::string::npos)) {
-      str.replace(n, 1, "\n\t ");
-      current_pos += (width + 10);
-    }
-  }
-  return str;
-}
-
-void
-CLParser::Usage(const std::string& msg)
-{
-  if (!msg.empty()) {
-    std::cerr << "Error: " << msg << std::endl;
-  }
-
-  std::cerr << "Usage: " << argv_[0] << " [options]" << std::endl;
-  std::cerr << "==== SYNOPSIS ====\n \n";
-  std::cerr << "\t--version " << std::endl;
-  std::cerr << "\t-m <model name>" << std::endl;
-  std::cerr << "\t-x <model version>" << std::endl;
-  std::cerr << "\t--bls-composing-models <string>" << std::endl;
-  std::cerr << "\t--model-signature-name <model signature name>" << std::endl;
-  std::cerr
-      << "\t--service-kind "
-         "<\"triton\"|\"openai\"|\"tfserving\"|\"torchserve\"|\"triton_c_api\">"
-      << std::endl;
-  std::cerr << "\t--endpoint <string>" << std::endl;
-  std::cerr << "\t-v" << std::endl;
-  std::cerr << std::endl;
-  std::cerr << "I. MEASUREMENT PARAMETERS: " << std::endl;
-  std::cerr << "\t--async (-a)" << std::endl;
-  std::cerr << "\t--sync" << std::endl;
-  std::cerr << "\t--measurement-interval (-p) <measurement window (in msec)>"
-            << std::endl;
-  std::cerr << "\t--concurrency-range <start:end:step>" << std::endl;
-  std::cerr << "\t--periodic-concurrency-range <start:end:step>" << std::endl;
-  std::cerr << "\t--request-period <number of responses>" << std::endl;
-  std::cerr << "\t--request-rate-range <start:end:step>" << std::endl;
-  std::cerr << "\t--request-distribution <\"poisson\"|\"constant\">"
-            << std::endl;
-  std::cerr << "\t--request-intervals <path to file containing time intervals "
-               "in microseconds>"
-            << std::endl;
-  std::cerr << "\t--serial-sequences" << std::endl;
-  std::cerr << "\t--binary-search" << std::endl;
-  std::cerr << "\t--num-of-sequences <number of concurrent sequences>"
-            << std::endl;
-  std::cerr << "\t--latency-threshold (-l) <latency threshold (in msec)>"
-            << std::endl;
-  std::cerr << "\t--max-threads <thread counts>" << std::endl;
-  std::cerr << "\t--stability-percentage (-s) <deviation threshold for stable "
-               "measurement (in percentage)>"
-            << std::endl;
-  std::cerr << "\t--max-trials (-r)  <maximum number of measurements for each "
-               "profiling>"
-            << std::endl;
-  std::cerr << "\t--percentile <percentile>" << std::endl;
-  std::cerr << "\t--request-count <number of requests>" << std::endl;
-  std::cerr << "\tDEPRECATED OPTIONS" << std::endl;
-  std::cerr << "\t-t <number of concurrent requests>" << std::endl;
-  std::cerr << "\t-c <maximum concurrency>" << std::endl;
-  std::cerr << "\t-d" << std::endl;
-  std::cerr << std::endl;
-  std::cerr << "II. INPUT DATA OPTIONS: " << std::endl;
-  std::cerr << "\t-b <batch size>" << std::endl;
-  std::cerr << "\t--input-data <\"zero\"|\"random\"|<path>>" << std::endl;
-  std::cerr << "\t--shared-memory <\"system\"|\"cuda\"|\"none\">" << std::endl;
-  std::cerr << "\t--output-shared-memory-size <size in bytes>" << std::endl;
-  std::cerr << "\t--shape <name:shape>" << std::endl;
-  std::cerr << "\t--sequence-length <length>" << std::endl;
-  std::cerr << "\t--sequence-length-variation <variation>" << std::endl;
-  std::cerr << "\t--sequence-id-range <start:end>" << std::endl;
-  std::cerr << "\t--string-length <length>" << std::endl;
-  std::cerr << "\t--string-data <string>" << std::endl;
-  std::cerr << "\t--input-tensor-format [binary|json]" << std::endl;
-  std::cerr << "\t--output-tensor-format [binary|json]" << std::endl;
-  std::cerr << "\tDEPRECATED OPTIONS" << std::endl;
-  std::cerr << "\t-z" << std::endl;
-  std::cerr << "\t--data-directory <path>" << std::endl;
-  std::cerr << std::endl;
-  std::cerr << "III. SERVER DETAILS: " << std::endl;
-  std::cerr << "\t-u <URL for inference service>" << std::endl;
-  std::cerr << "\t-i <Protocol used to communicate with inference service>"
-            << std::endl;
-  std::cerr << "\t--ssl-grpc-use-ssl <bool>" << std::endl;
-  std::cerr << "\t--ssl-grpc-root-certifications-file <path>" << std::endl;
-  std::cerr << "\t--ssl-grpc-private-key-file <path>" << std::endl;
-  std::cerr << "\t--ssl-grpc-certificate-chain-file <path>" << std::endl;
-  std::cerr << "\t--ssl-https-verify-peer <number>" << std::endl;
-  std::cerr << "\t--ssl-https-verify-host <number>" << std::endl;
-  std::cerr << "\t--ssl-https-ca-certificates-file <path>" << std::endl;
-  std::cerr << "\t--ssl-https-client-certificate-file <path>" << std::endl;
-  std::cerr << "\t--ssl-https-client-certificate-type <string>" << std::endl;
-  std::cerr << "\t--ssl-https-private-key-file <path>" << std::endl;
-  std::cerr << "\t--ssl-https-private-key-type <string>" << std::endl;
-  std::cerr << std::endl;
-  std::cerr << "IV. OTHER OPTIONS: " << std::endl;
-  std::cerr << "\t-f <filename for storing report in csv format>" << std::endl;
-  std::cerr << "\t--profile-export-file <path>" << std::endl;
-  std::cerr << "\t-H <HTTP header>" << std::endl;
-  std::cerr << "\t--streaming" << std::endl;
-  std::cerr << "\t--grpc-compression-algorithm <compression_algorithm>"
-            << std::endl;
-  std::cerr << "\t--trace-level" << std::endl;
-  std::cerr << "\t--trace-rate" << std::endl;
-  std::cerr << "\t--trace-count" << std::endl;
-  std::cerr << "\t--log-frequency" << std::endl;
-  std::cerr << "\t--collect-metrics" << std::endl;
-  std::cerr << "\t--metrics-url" << std::endl;
-  std::cerr << "\t--metrics-interval" << std::endl;
-  std::cerr << std::endl;
-  std::cerr << "==== OPTIONS ==== \n \n";
-
-  std::cerr << FormatMessage(
-                   " --version: print the current version of Perf Analyzer.",
-                   18)
-            << std::endl;
-
-  std::cerr
-      << std::setw(9) << std::left << " -m: "
-      << FormatMessage(
-             "This is a required argument and is used to specify the model"
-             " against which to run perf_analyzer.",
-             9)
-      << std::endl;
-  std::cerr << std::setw(9) << std::left << " -x: "
-            << FormatMessage(
-                   "The version of the above model to be used. If not specified"
-                   " the most recent version (that is, the highest numbered"
-                   " version) of the model will be used.",
-                   9)
-            << std::endl;
-  std::cerr << FormatMessage(
-                   " --model-signature-name: The signature name of the saved "
-                   "model to use. Default value is \"serving_default\". This "
-                   "option will be ignored if --service-kind is not "
-                   "\"tfserving\".",
-                   18)
-            << std::endl;
-
-  std::cerr
-      << FormatMessage(
-             " --service-kind: Describes the kind of service perf_analyzer to "
-             "generate load for. The options are \"triton\", \"openai\", "
-             "\"triton_c_api\", \"tfserving\" and \"torchserve\". Default "
-             "value is \"triton\". Note in order to use \"openai\" you must "
-             "specify an endpoint via --endpoint. "
-             "Note in order to use \"torchserve\" backend --input-data option "
-             "must point to a json file holding data in the following format "
-             "{\"data\" : [{\"TORCHSERVE_INPUT\" : [\"<complete path to the "
-             "content file>\"]}, {...}...]}. The type of file here will depend "
-             "on the model. In order to use \"triton_c_api\" you must specify "
-             "the Triton server install path and the model repository path via "
-             "the --triton-server-directory and --model-repository flags",
-             18)
-      << std::endl;
-
-  std::cerr
-      << FormatMessage(
-             " --endpoint: Describes what endpoint to send requests to on the "
-             "server. This is required when using \"openai\" service-kind, and "
-             "is ignored for all other cases. Currently only "
-             "\"v1/chat/completions\" is confirmed to work.",
-             18)
-      << std::endl;
-
-  std::cerr << std::setw(9) << std::left
-            << " -v: " << FormatMessage("Enables verbose mode.", 9)
-            << std::endl;
-  std::cerr << std::setw(9) << std::left
-            << " -v -v: " << FormatMessage("Enables extra verbose mode.", 9)
-            << std::endl;
-  std::cerr << std::endl;
-  std::cerr << "I. MEASUREMENT PARAMETERS: " << std::endl;
-  std::cerr
-      << FormatMessage(
-             " --async (-a): Enables asynchronous mode in perf_analyzer. "
-             "By default, perf_analyzer will use synchronous API to "
-             "request inference. However, if the model is sequential "
-             "then default mode is asynchronous. Specify --sync to "
-             "operate sequential models in synchronous mode. In synchronous "
-             "mode, perf_analyzer will start threads equal to the concurrency "
-             "level. Use asynchronous mode to limit the number of threads, yet "
-             "maintain the concurrency.",
-             18)
-      << std::endl;
-  std::cerr << FormatMessage(
-                   " --sync: Force enables synchronous mode in perf_analyzer. "
-                   "Can be used to operate perf_analyzer with sequential model "
-                   "in synchronous mode.",
-                   18)
-            << std::endl;
-  std::cerr
-      << FormatMessage(
-             " --measurement-interval (-p): Indicates the time interval used "
-             "for each measurement in milliseconds. The perf analyzer will "
-             "sample a time interval specified by -p and take measurement over "
-             "the requests completed within that time interval. The default "
-             "value is 5000 msec.",
-             18)
-      << std::endl;
-  std::cerr << FormatMessage(
-                   " --measurement-mode <\"time_windows\"|\"count_windows\">: "
-                   "Indicates the mode used for stabilizing measurements."
-                   " \"time_windows\" will create windows such that the length "
-                   "of each window is equal to --measurement-interval. "
-                   "\"count_windows\" will create "
-                   "windows such that there are at least "
-                   "--measurement-request-count requests in each window.",
-                   18)
-            << std::endl;
-  std::cerr
-      << FormatMessage(
-             " --measurement-request-count: "
-             "Indicates the minimum number of requests to be collected in each "
-             "measurement window when \"count_windows\" mode is used. This "
-             "mode can "
-             "be enabled using the --measurement-mode flag.",
-             18)
-      << std::endl;
-  std::cerr
-      << FormatMessage(
-             " --concurrency-range <start:end:step>: Determines the range of "
-             "concurrency levels covered by the perf_analyzer. The "
-             "perf_analyzer "
-             "will start from the concurrency level of 'start' and go till "
-             "'end' with a stride of 'step'. The default value of 'end' and "
-             "'step' are 1. If 'end' is not specified then perf_analyzer will "
-             "run for a single concurrency level determined by 'start'. If "
-             "'end' is set as 0, then the concurrency limit will be "
-             "incremented by 'step' till latency threshold is met. 'end' and "
-             "--latency-threshold can not be both 0 simultaneously. 'end' can "
-             "not be 0 for sequence models while using asynchronous mode.",
-             18)
-      << std::endl;
-  std::cerr
-      << FormatMessage(
-             " --periodic-concurrency-range <start:end:step>: Determines the "
-             "range of concurrency levels in the similar but slightly "
-             "different manner as the --concurrency-range. Perf Analyzer will "
-             "start from the concurrency level of 'start' and increase by "
-             "'step' each time. Unlike --concurrency-range, the 'end' "
-             "indicates the *total* number of concurrency since the 'start' "
-             "(including) and will stop increasing once the cumulative number "
-             "of concurrent requests has reached the 'end'. The user can "
-             "specify *when* to periodically increase the concurrency level "
-             "using the --request-period option. The concurrency level will "
-             "periodically increase for every n-th response specified by "
-             "--request-period. Since this disables stability check in Perf "
-             "Analyzer and reports response timestamps only, the user must "
-             "provide --profile-export-file to specify where to dump all the "
-             "measured timestamps. The default values of 'start', 'end', and "
-             "'step' are 1.",
-             18)
-      << std::endl;
-  std::cerr
-      << FormatMessage(
-             " --request-period <n>: Indicates the number of responses that "
-             "each request must receive before new, concurrent requests are "
-             "sent when --periodic-concurrency-range is specified. Default "
-             "value is 10.",
-             18)
-      << std::endl;
-  std::cerr
-      << FormatMessage(
-             " --request-parameter <name:value:type>: Specifies a custom "
-             "parameter that can be sent to a Triton backend as part of the "
-             "request. For example, providing '--request-parameter "
-             "max_tokens:256:int' to the command line will set an additional "
-             "parameter 'max_tokens' of type 'int' to 256 as part of the "
-             "request. The --request-parameter may be specified multiple times "
-             "for different custom parameters.",
-             18)
-      << std::endl;
-  std::cerr
-      << FormatMessage(
-             " --request-rate-range <start:end:step>: Determines the range of "
-             "request rates for load generated by analyzer. This option can "
-             "take floating-point values. The search along the request rate "
-             "range is enabled only when using this option. If not specified, "
-             "then analyzer will search along the concurrency-range. The "
-             "perf_analyzer will start from the request rate of 'start' and go "
-             "till 'end' with a stride of 'step'. The default values of "
-             "'start', 'end' and 'step' are all 1.0. If 'end' is not specified "
-             "then perf_analyzer will run for a single request rate as "
-             "determined by 'start'. If 'end' is set as 0.0, then the request "
-             "rate will be incremented by 'step' till latency threshold is "
-             "met. 'end' and --latency-threshold can not be both 0 "
-             "simultaneously.",
-             18)
-      << std::endl;
-  std::cerr
-      << FormatMessage(
-             " --request-distribution <\"poisson\"|\"constant\">: Specifies "
-             "the time interval distribution between dispatching inference "
-             "requests to the server. Poisson distribution closely mimics the "
-             "real-world work load on a server. This option is ignored if not "
-             "using --request-rate-range. By default, this option is set to be "
-             "constant.",
-             18)
-      << std::endl;
-  std::cerr
-      << FormatMessage(
-             " --request-intervals: Specifies a path to a file containing time "
-             "intervals in microseconds. Each time interval should be in a new "
-             "line. The analyzer will try to maintain time intervals between "
-             "successive generated requests to be as close as possible in this "
-             "file. This option can be used to apply custom load to server "
-             "with a certain pattern of interest. The analyzer will loop "
-             "around the file if the duration of execution exceeds to that "
-             "accounted for by the intervals. This option can not be used with "
-             "--request-rate-range or --concurrency-range.",
-             18)
-      << std::endl;
-  std::cerr
-      << FormatMessage(
-             " --binary-search: Enables the binary search on the specified "
-             "search range. This option requires 'start' and 'end' to be "
-             "expilicitly specified in the --concurrency-range or "
-             "--request-rate-range. When using this option, 'step' is more "
-             "like the precision. Lower the 'step', more the number of "
-             "iterations along the search path to find suitable convergence. "
-             "By default, linear search is used.",
-             18)
-      << std::endl;
-
-  std::cerr << FormatMessage(
-                   " --num-of-sequences: Sets the number of concurrent "
-                   "sequences for sequence models. This option is ignored when "
-                   "--request-rate-range is not specified. By default, its "
-                   "value is 4.",
-                   18)
-            << std::endl;
-
-  std::cerr
-      << FormatMessage(
-             " --latency-threshold (-l): Sets the limit on the observed "
-             "latency. Analyzer will terminate the concurrency search once "
-             "the measured latency exceeds this threshold. By default, "
-             "latency threshold is set 0 and the perf_analyzer will run "
-             "for entire --concurrency-range.",
-             18)
-      << std::endl;
-  std::cerr
-      << FormatMessage(
-             " --max-threads: Sets the maximum number of threads that will be "
-             "created for providing desired concurrency or request rate. "
-             "However, when running"
-             "in synchronous mode with concurrency-range having explicit 'end' "
-             "specification,"
-             "this value will be ignored. Default is 4 if --request-rate-range "
-             "is specified otherwise default is 16.",
-             18)
-      << std::endl;
-  std::cerr
-      << FormatMessage(
-             " --stability-percentage (-s): Indicates the allowed variation in "
-             "latency measurements when determining if a result is stable. The "
-             "measurement is considered as stable if the ratio of max / min "
-             "from the recent 3 measurements is within (stability percentage)% "
-             "in terms of both infer per second and latency. Default is "
-             "10(%).",
-             18)
-      << std::endl;
-  std::cerr << FormatMessage(
-                   " --max-trials (-r): Indicates the maximum number of "
-                   "measurements for each concurrency level visited during "
-                   "search. The perf analyzer will take multiple measurements "
-                   "and report the measurement until it is stable. The perf "
-                   "analyzer will abort if the measurement is still unstable "
-                   "after the maximum number of measurements. The default "
-                   "value is 10.",
-                   18)
-            << std::endl;
-  std::cerr
-      << FormatMessage(
-             " --percentile: Indicates the confidence value as a percentile "
-             "that will be used to determine if a measurement is stable. For "
-             "example, a value of 85 indicates that the 85th percentile "
-             "latency will be used to determine stability. The percentile will "
-             "also be reported in the results. The default is -1 indicating "
-             "that the average latency is used to determine stability",
-             18)
-      << std::endl;
-  std::cerr
-      << FormatMessage(
-             " --request-count: Specifies a total number of requests to "
-             "use for measurement. The default is 0, which means that there is "
-             "no request count and the measurement will proceed using windows "
-             "until stabilization is detected.",
-             18)
-      << std::endl;
-  std::cerr << FormatMessage(
-                   " --serial-sequences: Enables serial sequence mode "
-                   "where a maximum of one request is outstanding at a time "
-                   "for any given sequence. The default is false.",
-                   18)
-            << std::endl;
-  std::cerr << std::endl;
-  std::cerr << "II. INPUT DATA OPTIONS: " << std::endl;
-  std::cerr << std::setw(9) << std::left
-            << " -b: " << FormatMessage("Batch size for each request sent.", 9)
-            << std::endl;
-  std::cerr
-      << FormatMessage(
-             " --input-data: Select the type of data that will be used "
-             "for input in inference requests. The available options are "
-             "\"zero\", \"random\", path to a directory or a json file. If the "
-             "option is path to a directory then the directory must "
-             "contain a binary/text file for each non-string/string input "
-             "respectively, named the same as the input. Each "
-             "file must contain the data required for that input for a batch-1 "
-             "request. Each binary file should contain the raw binary "
-             "representation of the input in row-major order for non-string "
-             "inputs. The text file should contain all strings needed by "
-             "batch-1, each in a new line, listed in row-major order. When "
-             "pointing to a json file, user must adhere to the format "
-             "described in the Performance Analyzer documentation. By "
-             "specifying json data users can control data used with every "
-             "request. Multiple data streams can be specified for a sequence "
-             "model and the analyzer will select a data stream in a "
-             "round-robin fashion for every new sequence. Multiple json files "
-             "can also be provided (--input-data json_file1 --input-data "
-             "json-file2 and so on) and the analyzer will append data streams "
-             "from each file. When using --service-kind=torchserve make sure "
-             "this option points to a json file. Default is \"random\".",
-             18)
-      << std::endl;
-  std::cerr << FormatMessage(
-                   " --shared-memory <\"system\"|\"cuda\"|\"none\">: Specifies "
-                   "the type of the shared memory to use for input and output "
-                   "data. Default is none.",
-                   18)
-            << std::endl;
-
-  std::cerr
-      << FormatMessage(
-             " --output-shared-memory-size: The size in bytes of the shared "
-             "memory region to allocate per output tensor. Only needed when "
-             "one or more of the outputs are of string type and/or variable "
-             "shape. The value should be larger than the size of the largest "
-             "output tensor the model is expected to return. The analyzer will "
-             "use the following formula to calculate the total shared memory "
-             "to allocate: output_shared_memory_size * number_of_outputs * "
-             "batch_size. Defaults to 100KB.",
-             18)
-      << std::endl;
-
-  std::cerr << FormatMessage(
-                   " --shape: The shape used for the specified input. The "
-                   "argument must be specified as 'name:shape' where the shape "
-                   "is a comma-separated list for dimension sizes, for example "
-                   "'--shape input_name:1,2,3' indicate tensor shape [ 1, 2, 3 "
-                   "]. --shape may be specified multiple times to specify "
-                   "shapes for different inputs.",
-                   18)
-            << std::endl;
-  std::cerr << FormatMessage(
-                   " --sequence-length: Indicates the base length of a "
-                   "sequence used for sequence models. A sequence with length "
-                   "X will be composed of X requests to be sent as the "
-                   "elements in the sequence. The actual length of the sequence"
-                   "will be within +/- Y% of the base length, where Y defaults "
-                   "to 20% and is customizable via "
-                   "`--sequence-length-variation`. If sequence length is "
-                   "unspecified and input data is provided, the sequence "
-                   "length will be the number of inputs in the user-provided "
-                   "input data. Default is 20.",
-                   18)
-            << std::endl;
-  std::cerr << FormatMessage(
-                   " --sequence-length-variation: The percentage variation in "
-                   "length of sequences. This flag is only valid when "
-                   "not using user-provided input data or when "
-                   "`--sequence-length` is specified while using user-provided "
-                   "input data. Default is 20.",
-                   18)
-            << std::endl;
-  std::cerr
-      << FormatMessage(
-             " --sequence-id-range <start:end>: Determines the range of "
-             "sequence id used by the perf_analyzer. The perf_analyzer "
-             "will start from the sequence id of 'start' and go till "
-             "'end' (excluded). If 'end' is not specified then perf_analyzer "
-             "will use new sequence id without bounds. If 'end' is specified "
-             "and the concurrency setting may result in maintaining a number "
-             "of sequences more than the range of available sequence id, "
-             "perf analyzer will exit with error due to possible sequence id "
-             "collision. The default setting is start from sequence id 1 and "
-             "without bounds",
-             18)
-      << std::endl;
-  std::cerr << FormatMessage(
-                   " --string-length: Specifies the length of the random "
-                   "strings to be generated by the analyzer for string input. "
-                   "This option is ignored if --input-data points to a "
-                   "directory. Default is 128.",
-                   18)
-            << std::endl;
-  std::cerr << FormatMessage(
-                   " --string-data: If provided, analyzer will use this string "
-                   "to initialize string input buffers. The perf analyzer will "
-                   "replicate the given string to build tensors of required "
-                   "shape. --string-length will not have any effect. This "
-                   "option is ignored if --input-data points to a directory.",
-                   18)
-            << std::endl;
-  std::cerr << FormatMessage(
-                   " --input-tensor-format=[binary|json]: Specifies Triton "
-                   "inference request input tensor format. Only valid when "
-                   "HTTP protocol is used. Default is 'binary'.",
-                   18)
-            << std::endl;
-  std::cerr << FormatMessage(
-                   " --output-tensor-format=[binary|json]: Specifies Triton "
-                   "inference response output tensor format. Only valid when "
-                   "HTTP protocol is used. Default is 'binary'.",
-                   18)
-            << std::endl;
-  std::cerr << std::endl;
-  std::cerr << "III. SERVER DETAILS: " << std::endl;
-  std::cerr << std::setw(38) << std::left << " -u: "
-            << FormatMessage(
-                   "Specify URL to the server. When using triton default is "
-                   "\"localhost:8000\" if using HTTP and \"localhost:8001\" "
-                   "if using gRPC. When using tfserving default is "
-                   "\"localhost:8500\". ",
-                   38)
-            << std::endl;
-  std::cerr << std::setw(38) << std::left << " -i: "
-            << FormatMessage(
-                   "The communication protocol to use. The available protocols "
-                   "are gRPC and HTTP. Default is HTTP.",
-                   38)
-            << std::endl;
-  std::cerr << std::setw(38) << std::left << " --ssl-grpc-use-ssl: "
-            << FormatMessage(
-                   "Bool (true|false) for whether "
-                   "to use encrypted channel to the server. Default false.",
-                   38)
-            << std::endl;
-  std::cerr << std::setw(38) << std::left
-            << " --ssl-grpc-root-certifications-file: "
-            << FormatMessage(
-                   "Path to file containing the "
-                   "PEM encoding of the server root certificates.",
-                   38)
-            << std::endl;
-  std::cerr << std::setw(38) << std::left << " --ssl-grpc-private-key-file: "
-            << FormatMessage(
-                   "Path to file containing the "
-                   "PEM encoding of the client's private key.",
-                   38)
-            << std::endl;
-  std::cerr << std::setw(38) << std::left
-            << " --ssl-grpc-certificate-chain-file: "
-            << FormatMessage(
-                   "Path to file containing the "
-                   "PEM encoding of the client's certificate chain.",
-                   38)
-            << std::endl;
-  std::cerr << std::setw(38) << std::left << " --ssl-https-verify-peer: "
-            << FormatMessage(
-                   "Number (0|1) to verify the "
-                   "peer's SSL certificate. See "
-                   "https://curl.se/libcurl/c/CURLOPT_SSL_VERIFYPEER.html for "
-                   "the meaning of each value. Default is 1.",
-                   38)
-            << std::endl;
-  std::cerr
-      << std::setw(38) << std::left << " --ssl-https-verify-host: "
-      << FormatMessage(
-             "Number (0|1|2) to verify the "
-             "certificate's name against host. "
-             "See https://curl.se/libcurl/c/CURLOPT_SSL_VERIFYHOST.html for "
-             "the meaning of each value. Default is 2.",
-             38)
-      << std::endl;
-  std::cerr << std::setw(38) << std::left
-            << " --ssl-https-ca-certificates-file: "
-            << FormatMessage(
-                   "Path to Certificate Authority "
-                   "(CA) bundle.",
-                   38)
-            << std::endl;
-  std::cerr << std::setw(38) << std::left
-            << " --ssl-https-client-certificate-file: "
-            << FormatMessage("Path to the SSL client certificate.", 38)
-            << std::endl;
-  std::cerr << std::setw(38) << std::left
-            << " --ssl-https-client-certificate-type: "
-            << FormatMessage(
-                   "Type (PEM|DER) of the client "
-                   "SSL certificate. Default is PEM.",
-                   38)
-            << std::endl;
-  std::cerr << std::setw(38) << std::left << " --ssl-https-private-key-file: "
-            << FormatMessage(
-                   "Path to the private keyfile "
-                   "for TLS and SSL client cert.",
-                   38)
-            << std::endl;
-  std::cerr << std::setw(38) << std::left << " --ssl-https-private-key-type: "
-            << FormatMessage(
-                   "Type (PEM|DER) of the private "
-                   "key file. Default is PEM.",
-                   38)
-            << std::endl;
-  std::cerr << std::endl;
-  std::cerr << "IV. OTHER OPTIONS: " << std::endl;
-  std::cerr
-      << std::setw(9) << std::left << " -f: "
-      << FormatMessage(
-             "The latency report will be stored in the file named by "
-             "this option. By default, the result is not recorded in a file.",
-             9)
-      << std::endl;
-  std::cerr << std::setw(9) << std::left << " --profile-export-file: "
-            << FormatMessage(
-                   "Specifies the path that the profile export will be "
-                   "generated at. By default, the profile export will not be "
-                   "generated.",
-                   9)
-            << std::endl;
-  std::cerr
-      << std::setw(9) << std::left << " -H: "
-      << FormatMessage(
-             "The header will be added to HTTP requests (ignored for GRPC "
-             "requests). The header must be specified as 'Header:Value'. -H "
-             "may be specified multiple times to add multiple headers.",
-             9)
-      << std::endl;
-  std::cerr
-      << FormatMessage(
-             " --streaming: Enables the use of streaming API. This flag is "
-             "only valid with gRPC protocol. By default, it is set false.",
-             18)
-      << std::endl;
-
-  std::cerr << FormatMessage(
-                   " --grpc-compression-algorithm: The compression algorithm "
-                   "to be used by gRPC when sending request. Only supported "
-                   "when grpc protocol is being used. The supported values are "
-                   "none, gzip, and deflate. Default value is none.",
-                   18)
-            << std::endl;
-
-  std::cerr
-      << FormatMessage(
-             " --trace-level: Specify a trace level. OFF to disable tracing, "
-             "TIMESTAMPS to trace timestamps, TENSORS to trace tensors. It "
-             "may be specified multiple times to trace multiple "
-             "information. Default is OFF.",
-             18)
-      << std::endl;
-  std::cerr
-      << FormatMessage(
-             " --trace-rate: Set the trace sampling rate. Default is 1000.", 18)
-      << std::endl;
-  std::cerr << FormatMessage(
-                   " --trace-count: Set the number of traces to be sampled. "
-                   "If the value is -1, the number of traces to be sampled "
-                   "will not be limited. Default is -1.",
-                   18)
-            << std::endl;
-  std::cerr
-      << FormatMessage(
-             " --log-frequency:  Set the trace log frequency. If the "
-             "value is 0, Triton will only log the trace output to "
-             "the trace file when shutting down. Otherwise, Triton will log "
-             "the trace output to <trace-file>.<idx> when it collects the "
-             "specified number of traces. For example, if the log frequency "
-             "is 100, when Triton collects the 100-th trace, it logs the "
-             "traces to file <trace-file>.0, and when it collects the 200-th "
-             "trace, it logs the 101-th to the 200-th traces to file "
-             "<trace-file>.1. Default is 0.",
-             18)
-      << std::endl;
-
-  std::cerr << FormatMessage(
-                   " --triton-server-directory: The Triton server install "
-                   "path. Required by and only used when C API "
-                   "is used (--service-kind=triton_c_api). "
-                   "eg:--triton-server-directory=/opt/tritonserver.",
-                   18)
-            << std::endl;
-  std::cerr
-      << FormatMessage(
-             " --model-repository: The model repository of which the model is "
-             "loaded. Required by and only used when C API is used "
-             "(--service-kind=triton_c_api). "
-             "eg:--model-repository=/tmp/host/docker-data/model_unit_test.",
-             18)
-      << std::endl;
-  std::cerr << FormatMessage(
-                   " --verbose-csv: The csv files generated by perf analyzer "
-                   "will include additional information.",
-                   18)
-            << std::endl;
-  std::cerr << FormatMessage(
-                   " --collect-metrics: Enables collection of server-side "
-                   "inference server metrics. Outputs metrics in the csv file "
-                   "generated with the -f option. Must enable `--verbose-csv` "
-                   "option to use the `--collect-metrics`.",
-                   18)
-            << std::endl;
-  std::cerr << FormatMessage(
-                   " --metrics-url: The URL to query for server-side inference "
-                   "server metrics. Default is 'localhost:8002/metrics'.",
-                   18)
-            << std::endl;
-  std::cerr << FormatMessage(
-                   " --metrics-interval: How often in milliseconds, within "
-                   "each measurement window, to query for server-side "
-                   "inference server metrics. Default is 1000.",
-                   18)
-            << std::endl;
-  std::cerr << FormatMessage(
-                   " --bls-composing-models: A comma separated list of all "
-                   "BLS composing models (with optional model version number "
-                   "after a colon for each) that may be called by the input "
-                   "BLS model. For example, 'modelA:3,modelB' would specify "
-                   "that modelA and modelB are composing models that may be "
-                   "called by the input BLS model, and that modelA will use "
-                   "version 3, while modelB's version is unspecified",
-                   18)
-            << std::endl;
-  throw pa::PerfAnalyzerException(GENERIC_ERROR);
-}
-
-void
-CLParser::PrintVersion()
-{
-  std::cerr << "Perf Analyzer Version " << VERSION << " (commit " << SHA << ")"
-            << std::endl;
-  exit(SUCCESS);
-}
-
-void
-CLParser::ParseCommandLine(int argc, char** argv)
-{
-  argc_ = argc;
-  argv_ = argv;
-
-  // {name, has_arg, *flag, val}
-  static struct option long_options[] = {
-      {"streaming", no_argument, 0, 0},
-      {"max-threads", required_argument, 0, 1},
-      {"sequence-length", required_argument, 0, 2},
-      {"percentile", required_argument, 0, 3},
-      {"data-directory", required_argument, 0, 4},
-      {"shape", required_argument, 0, 5},
-      {"measurement-interval", required_argument, 0, 6},
-      {"concurrency-range", required_argument, 0, 7},
-      {"latency-threshold", required_argument, 0, 8},
-      {"stability-percentage", required_argument, 0, 9},
-      {"max-trials", required_argument, 0, 10},
-      {"input-data", required_argument, 0, 11},
-      {"string-length", required_argument, 0, 12},
-      {"string-data", required_argument, 0, 13},
-      {"async", no_argument, 0, 14},
-      {"sync", no_argument, 0, 15},
-      {"request-rate-range", required_argument, 0, 16},
-      {"num-of-sequences", required_argument, 0, 17},
-      {"binary-search", no_argument, 0, 18},
-      {"request-distribution", required_argument, 0, 19},
-      {"request-intervals", required_argument, 0, 20},
-      {"shared-memory", required_argument, 0, 21},
-      {"output-shared-memory-size", required_argument, 0, 22},
-      {"service-kind", required_argument, 0, 23},
-      {"model-signature-name", required_argument, 0, 24},
-      {"grpc-compression-algorithm", required_argument, 0, 25},
-      {"measurement-mode", required_argument, 0, 26},
-      {"measurement-request-count", required_argument, 0, 27},
-      {"triton-server-directory", required_argument, 0, 28},
-      {"model-repository", required_argument, 0, 29},
-      {"sequence-id-range", required_argument, 0, 30},
-      {"ssl-grpc-use-ssl", no_argument, 0, 31},
-      {"ssl-grpc-root-certifications-file", required_argument, 0, 32},
-      {"ssl-grpc-private-key-file", required_argument, 0, 33},
-      {"ssl-grpc-certificate-chain-file", required_argument, 0, 34},
-      {"ssl-https-verify-peer", required_argument, 0, 35},
-      {"ssl-https-verify-host", required_argument, 0, 36},
-      {"ssl-https-ca-certificates-file", required_argument, 0, 37},
-      {"ssl-https-client-certificate-file", required_argument, 0, 38},
-      {"ssl-https-client-certificate-type", required_argument, 0, 39},
-      {"ssl-https-private-key-file", required_argument, 0, 40},
-      {"ssl-https-private-key-type", required_argument, 0, 41},
-      {"verbose-csv", no_argument, 0, 42},
-      {"enable-mpi", no_argument, 0, 43},
-      {"trace-level", required_argument, 0, 44},
-      {"trace-rate", required_argument, 0, 45},
-      {"trace-count", required_argument, 0, 46},
-      {"log-frequency", required_argument, 0, 47},
-      {"collect-metrics", no_argument, 0, 48},
-      {"metrics-url", required_argument, 0, 49},
-      {"metrics-interval", required_argument, 0, 50},
-      {"sequence-length-variation", required_argument, 0, 51},
-      {"bls-composing-models", required_argument, 0, 52},
-      {"serial-sequences", no_argument, 0, 53},
-      {"input-tensor-format", required_argument, 0, 54},
-      {"output-tensor-format", required_argument, 0, 55},
-      {"version", no_argument, 0, 56},
-      {"profile-export-file", required_argument, 0, 57},
-      {"periodic-concurrency-range", required_argument, 0, 58},
-      {"request-period", required_argument, 0, 59},
-      {"request-parameter", required_argument, 0, 60},
-      {"endpoint", required_argument, 0, 61},
-      {"request-count", required_argument, 0, 62},
-      {0, 0, 0, 0}};
-
-  // Parse commandline...
-  int opt;
-  while ((opt = getopt_long(
-              argc, argv, "vdazc:u:m:x:b:t:p:i:H:l:r:s:f:", long_options,
-              NULL)) != -1) {
-    try {
-      switch (opt) {
-        case 0:
-          params_->streaming = true;
-          break;
-        case 1: {
-          std::string max_threads{optarg};
-          if (std::stoi(max_threads) > 0) {
-            params_->max_threads = std::stoull(max_threads);
-            params_->max_threads_specified = true;
-          } else {
-            Usage("Failed to parse --max-threads. The value must be > 0.");
-          }
-          break;
-        }
-        case 2: {
-          std::string sequence_length{optarg};
-          if (std::stoi(sequence_length) > 0) {
-            params_->sequence_length = std::stoull(sequence_length);
-          } else {
-            std::cerr << "WARNING: The sequence length must be > 0. Perf "
-                         "Analyzer will use default value if it is measuring "
-                         "on sequence model."
-                      << std::endl;
-          }
-          params_->sequence_length_specified = true;
-          break;
-        }
-        case 3:
-          params_->percentile = std::atoi(optarg);
-          break;
-        case 4:
-          params_->user_data.push_back(optarg);
-          break;
-        case 5: {
-          std::string arg = optarg;
-          auto colon_pos = arg.rfind(":");
-          if (colon_pos == std::string::npos) {
-            Usage(
-                "Failed to parse --shape. There must be a colon after input "
-                "name.");
-          }
-          std::string name = arg.substr(0, colon_pos);
-          std::string shape_str = arg.substr(name.size() + 1);
-          size_t pos = 0;
-          std::vector<int64_t> shape;
-          while (pos != std::string::npos) {
-            size_t comma_pos = shape_str.find(",", pos);
-            int64_t dim;
-            if (comma_pos == std::string::npos) {
-              dim = std::stoll(shape_str.substr(pos, comma_pos));
-              pos = comma_pos;
-            } else {
-              dim = std::stoll(shape_str.substr(pos, comma_pos - pos));
-              pos = comma_pos + 1;
-            }
-            if (dim <= 0) {
-              Usage(
-                  "Failed to parse --shape. The dimensions of input tensor "
-                  "must be > 0.");
-            }
-            shape.emplace_back(dim);
-          }
-
-          params_->input_shapes[name] = shape;
-          break;
-        }
-        case 6:
-        case 'p': {
-          std::string measurement_window_ms{optarg};
-          if (std::stoi(measurement_window_ms) > 0) {
-            params_->measurement_window_ms = std::stoull(measurement_window_ms);
-          } else {
-            Usage(
-                "Failed to parse --measurement-interval (-p). The value must "
-                "be > 0 msec.");
-          }
-          break;
-        }
-        case 7: {
-          params_->using_concurrency_range = true;
-          std::string arg = optarg;
-          std::vector<std::string> values{SplitString(arg)};
-          if (values.size() > 3) {
-            Usage(
-                "Failed to parse --concurrency-range. The value does not match "
-                "<start:end:step>.");
-          }
-
-          for (size_t i = 0; i < values.size(); ++i) {
-            uint64_t val = std::stoull(values[i]);
-            if (i == 0) {
-              params_->concurrency_range.start = val;
-            } else if (i == 1) {
-              params_->concurrency_range.end = val;
-            } else if (i == 2) {
-              params_->concurrency_range.step = val;
-            }
-          }
-          break;
-        }
-        case 8:
-        case 'l': {
-          std::string latency_threshold_ms{optarg};
-          if (std::stoi(latency_threshold_ms) == 0) {
-            params_->latency_threshold_ms = NO_LIMIT;
-          } else if (std::stoi(latency_threshold_ms) > 0) {
-            params_->latency_threshold_ms = std::stoull(latency_threshold_ms);
-          } else {
-            Usage(
-                "Failed to parse --latency-threshold (-l). The value must be "
-                ">= 0 msecs.");
-          }
-          break;
-        }
-        case 9:
-        case 's': {
-          std::string stability_threshold{optarg};
-          if (std::stof(stability_threshold) >= 0.0) {
-            params_->stability_threshold = std::stof(optarg) / 100;
-          } else {
-            Usage(
-                "Failed to parse --stability-percentage (-s). The value must "
-                "be >= 0.0.");
-          }
-          break;
-        }
-        case 10:
-        case 'r': {
-          std::string max_trials{optarg};
-          if (std::stoi(max_trials) > 0) {
-            params_->max_trials = std::stoull(max_trials);
-          } else {
-            Usage("Failed to parse --max-trials (-r). The value must be > 0.");
-          }
-          break;
-        }
-        case 11: {
-          std::string arg = optarg;
-          // Check whether the argument is a directory
-          if (IsDirectory(arg) || IsFile(arg)) {
-            params_->user_data.push_back(optarg);
-          } else if (arg.compare("zero") == 0) {
-            params_->zero_input = true;
-          } else if (arg.compare("random") == 0) {
-            break;
-          } else {
-            Usage(
-                "Failed to parse --input-data. Unsupported type provided: '" +
-                std::string(optarg) +
-                "'. The available options are 'zero', 'random', path to a "
-                "directory, or a json file.");
-          }
-          break;
-        }
-        case 12: {
-          std::string string_length{optarg};
-          if (std::stoi(string_length) > 0) {
-            params_->string_length = std::stoull(string_length);
-          } else {
-            Usage("Failed to parse --string-length. The value must be > 0");
-          }
-          break;
-        }
-        case 13: {
-          params_->string_data = optarg;
-          break;
-        }
-        case 14:
-        case 'a': {
-          params_->async = true;
-          break;
-        }
-        case 15: {
-          params_->forced_sync = true;
-          break;
-        }
-        case 16: {
-          params_->using_request_rate_range = true;
-          std::string arg = optarg;
-          size_t pos = 0;
-          int index = 0;
-          while (pos != std::string::npos) {
-            size_t colon_pos = arg.find(":", pos);
-            if (index > 2) {
-              Usage(
-                  "Failed to parse --request-rate-range. The value does not "
-                  "match <start:end:step>.");
-            }
-            if (colon_pos == std::string::npos) {
-              params_->request_rate_range[index] =
-                  std::stod(arg.substr(pos, colon_pos));
-              pos = colon_pos;
-            } else {
-              params_->request_rate_range[index] =
-                  std::stod(arg.substr(pos, colon_pos - pos));
-              pos = colon_pos + 1;
-              index++;
-            }
-          }
-
-          break;
-        }
-        case 17: {
-          std::string num_of_sequences{optarg};
-          if (std::stoi(num_of_sequences) > 0) {
-            params_->num_of_sequences = std::stoul(num_of_sequences);
-          } else {
-            Usage("Failed to parse --num-of-sequences. The value must be > 0.");
-          }
-          break;
-        }
-        case 18: {
-          params_->search_mode = SearchMode::BINARY;
-          break;
-        }
-        case 19: {
-          std::string arg = optarg;
-          if (arg.compare("poisson") == 0) {
-            params_->request_distribution = Distribution::POISSON;
-          } else if (arg.compare("constant") == 0) {
-            params_->request_distribution = Distribution::CONSTANT;
-          } else {
-            Usage(
-                "Failed to parse --request-distribution. Unsupported type "
-                "provided: '" +
-                std::string(optarg) + "'. Choices are 'posson' or 'constant'.");
-          }
-          break;
-        }
-        case 20: {
-          std::string request_intervals_file{optarg};
-          if (IsFile(request_intervals_file)) {
-            params_->request_intervals_file = request_intervals_file;
-            params_->using_custom_intervals = true;
-          } else {
-            Usage(
-                "Failed to parse --request-intervals. The value must be a "
-                "valid file path");
-          }
-          break;
-        }
-        case 21: {
-          std::string arg = optarg;
-          if (arg.compare("system") == 0) {
-            params_->shared_memory_type =
-                SharedMemoryType::SYSTEM_SHARED_MEMORY;
-          } else if (arg.compare("cuda") == 0) {
-#ifdef TRITON_ENABLE_GPU
-            params_->shared_memory_type = SharedMemoryType::CUDA_SHARED_MEMORY;
-#else
-            Usage(
-                "Cuda shared memory is not supported when "
-                "TRITON_ENABLE_GPU=0.");
-#endif  // TRITON_ENABLE_GPU
-          } else if (arg.compare("none") == 0) {
-            params_->shared_memory_type = SharedMemoryType::NO_SHARED_MEMORY;
-          } else {
-            Usage(
-                "Failed to parse --shared-memory. Unsupported type provided: "
-                "'" +
-                std::string(optarg) +
-                "'. The available options are 'system', 'cuda', or 'none'.");
-          }
-          break;
-        }
-        case 22: {
-          std::string output_shm_size{optarg};
-          if (std::stoi(output_shm_size) >= 0) {
-            params_->output_shm_size = std::stoull(output_shm_size);
-          } else {
-            Usage(
-                "Failed to parse --output-shared-memory-size. The value must "
-                "be >= 0.");
-          }
-          break;
-        }
-        case 23: {
-          std::string arg = optarg;
-          if (arg.compare("triton") == 0) {
-            params_->kind = cb::TRITON;
-          } else if (arg.compare("tfserving") == 0) {
-            params_->kind = cb::TENSORFLOW_SERVING;
-          } else if (arg.compare("torchserve") == 0) {
-            params_->kind = cb::TORCHSERVE;
-          } else if (arg.compare("triton_c_api") == 0) {
-            params_->kind = cb::TRITON_C_API;
-          } else if (arg.compare("openai") == 0) {
-            params_->kind = cb::OPENAI;
-          } else {
-            Usage(
-                "Failed to parse --service-kind. Unsupported type provided: '" +
-                std::string{optarg} +
-                "'. The available options are 'triton', 'tfserving', "
-                "'torchserve', or 'triton_c_api'.");
-          }
-          break;
-        }
-        case 24:
-          params_->model_signature_name = optarg;
-          break;
-        case 25: {
-          std::string arg = optarg;
-          if (arg.compare("none") == 0) {
-            params_->compression_algorithm = cb::COMPRESS_NONE;
-          } else if (arg.compare("deflate") == 0) {
-            params_->compression_algorithm = cb::COMPRESS_DEFLATE;
-          } else if (arg.compare("gzip") == 0) {
-            params_->compression_algorithm = cb::COMPRESS_GZIP;
-          } else {
-            Usage(
-                "Failed to parse --grpc-compression-algorithm. Unsupported "
-                "type provided: '" +
-                arg +
-                "'. The available options are 'gzip', 'deflate', or 'none'.");
-          }
-          params_->using_grpc_compression = true;
-          break;
-        }
-        case 26: {
-          std::string arg = optarg;
-          if (arg.compare("time_windows") == 0) {
-            params_->measurement_mode = MeasurementMode::TIME_WINDOWS;
-          } else if (arg.compare("count_windows") == 0) {
-            params_->measurement_mode = MeasurementMode::COUNT_WINDOWS;
-          } else {
-            Usage(
-                "Failed to parse --measurement-mode. Unsupported type "
-                "provided: '" +
-                arg +
-                "'. The available options are 'time_windows' or "
-                "'count_windows'.");
-          }
-          break;
-        }
-        case 27: {
-          std::string request_count{optarg};
-          if (std::stoi(request_count) > 0) {
-            params_->measurement_request_count = std::stoull(request_count);
-          } else {
-            Usage(
-                "Failed to parse --measurement-request-count. The value must "
-                "be > 0.");
-          }
-          break;
-        }
-        case 28: {
-          params_->triton_server_path = optarg;
-          break;
-        }
-        case 29: {
-          params_->model_repository_path = optarg;
-          break;
-        }
-        case 30: {
-          std::string arg = optarg;
-          int64_t start_id;
-          int64_t end_id;
-          size_t pos = 0;
-          int index = 0;
-          while (pos != std::string::npos) {
-            size_t colon_pos = arg.find(":", pos);
-            if (index > 1) {
-              Usage(
-                  "Failed to parse --sequence-id-range. The value does not "
-                  "match <start:end>.");
-            }
-            if (colon_pos == std::string::npos) {
-              std::string sequence_id{arg.substr(pos, colon_pos)};
-              if (index == 0) {
-                start_id = std::stoi(sequence_id);
-              } else {
-                end_id = std::stoi(sequence_id);
-              }
-              pos = colon_pos;
-            } else {
-              std::string sequence_id{arg.substr(pos, colon_pos - pos)};
-              start_id = std::stoi(sequence_id);
-              pos = colon_pos + 1;
-              index++;
-            }
-          }
-
-          // Check for invalid inputs
-          if (start_id < 0 || end_id < 0) {
-            Usage(
-                "Failed to parse --sequence-id-range. The range values must be "
-                ">= 0.");
-          } else if (start_id > end_id) {
-            Usage(
-                "Failed to parse --sequence-id-range. The 'end' value must be "
-                "greater than 'start' value.");
-          }
-
-          if (index == 0) {  // Only start ID is given
-            params_->start_sequence_id = start_id;
-          } else {
-            params_->start_sequence_id = start_id;
-            params_->sequence_id_range = end_id - start_id;
-          }
-          break;
-        }
-        case 31: {
-          params_->ssl_options.ssl_grpc_use_ssl = true;
-          break;
-        }
-        case 32: {
-          if (IsFile(optarg)) {
-            params_->ssl_options.ssl_grpc_root_certifications_file = optarg;
-          } else {
-            Usage(
-                "Failed to parse --ssl-grpc-root-certifications-file. The "
-                "value must be a valid file path.");
-          }
-          break;
-        }
-        case 33: {
-          if (IsFile(optarg)) {
-            params_->ssl_options.ssl_grpc_private_key_file = optarg;
-          } else {
-            Usage(
-                "Failed to parse --ssl-grpc-private-key-file. The value must "
-                "be a valid file path.");
-          }
-          break;
-        }
-        case 34: {
-          if (IsFile(optarg)) {
-            params_->ssl_options.ssl_grpc_certificate_chain_file = optarg;
-          } else {
-            Usage(
-                "Failed to parse --ssl-grpc-certificate-chain-file. The value "
-                "must be a valid file path.");
-          }
-          break;
-        }
-        case 35: {
-          if (std::atol(optarg) == 0 || std::atol(optarg) == 1) {
-            params_->ssl_options.ssl_https_verify_peer = std::atol(optarg);
-          } else {
-            Usage(
-                "Failed to parse --ssl-https-verify-peer. The value must be "
-                "either 0 or 1.");
-          }
-          break;
-        }
-        case 36: {
-          if (std::atol(optarg) == 0 || std::atol(optarg) == 1 ||
-              std::atol(optarg) == 2) {
-            params_->ssl_options.ssl_https_verify_host = std::atol(optarg);
-          } else {
-            Usage(
-                "Failed to parse --ssl-https-verify-host. The value must be "
-                "either 0, 1, or 2.");
-          }
-          break;
-        }
-        case 37: {
-          if (IsFile(optarg)) {
-            params_->ssl_options.ssl_https_ca_certificates_file = optarg;
-          } else {
-            Usage(
-                "Failed to parse --ssl-https-ca-certificates-file. The value "
-                "must be a valid file path.");
-          }
-          break;
-        }
-        case 38: {
-          if (IsFile(optarg)) {
-            params_->ssl_options.ssl_https_client_certificate_file = optarg;
-          } else {
-            Usage(
-                "Failed to parse --ssl-https-client-certificate-file. The "
-                "value must be a valid file path.");
-          }
-          break;
-        }
-        case 39: {
-          if (std::string(optarg) == "PEM" || std::string(optarg) == "DER") {
-            params_->ssl_options.ssl_https_client_certificate_type = optarg;
-          } else {
-            Usage(
-                "Failed to parse --ssl-https-client-certificate-type. "
-                "Unsupported type provided: '" +
-                std::string{optarg} +
-                "'. The available options are 'PEM' or 'DER'.");
-          }
-          break;
-        }
-        case 40: {
-          if (IsFile(optarg)) {
-            params_->ssl_options.ssl_https_private_key_file = optarg;
-          } else {
-            Usage(
-                "Failed to parse --ssl-https-private-key-file. The value must "
-                "be a valid file path.");
-          }
-          break;
-        }
-        case 41: {
-          if (std::string(optarg) == "PEM" || std::string(optarg) == "DER") {
-            params_->ssl_options.ssl_https_private_key_type = optarg;
-          } else {
-            Usage(
-                "Failed to parse --ssl-https-private-key-type. Unsupported "
-                "type provided: '" +
-                std::string{optarg} +
-                "'. The available options are 'PEM' or 'DER'.");
-          }
-          break;
-        }
-        case 42: {
-          params_->verbose_csv = true;
-          break;
-        }
-        case 43: {
-          params_->enable_mpi = true;
-          break;
-        }
-        case 44: {
-          std::string trace_level{optarg};
-          if (trace_level == "OFF" || trace_level == "TIMESTAMPS" ||
-              trace_level == "TENSORS") {
-            params_->trace_options["trace_level"] = {trace_level};
-          } else {
-            Usage(
-                "Failed to parse --trace-level. Unsupported type provided: '" +
-                trace_level +
-                "'. The available options are 'OFF', 'TIMESTAMPS', or "
-                "'TENSORS'.");
-          }
-          break;
-        }
-        case 45: {
-          params_->trace_options["trace_rate"] = {optarg};
-          break;
-        }
-        case 46: {
-          std::string trace_count{optarg};
-          if (std::stoi(trace_count) >= -1) {
-            params_->trace_options["trace_count"] = {trace_count};
-          } else {
-            Usage(
-                "Failed to parse --trace-count. The value must be >= 0 or set "
-                "to -1 (default).");
-          }
-          break;
-        }
-        case 47: {
-          std::string log_frequency{optarg};
-          if (std::stoi(log_frequency) >= 0) {
-            params_->trace_options["log_frequency"] = {log_frequency};
-          } else {
-            Usage("Failed to parse --log-frequency. The value must be >= 0.");
-          }
-          break;
-        }
-        case 48: {
-          params_->should_collect_metrics = true;
-          break;
-        }
-        case 49: {
-          params_->metrics_url = optarg;
-          params_->metrics_url_specified = true;
-          break;
-        }
-        case 50: {
-          std::string metrics_interval_ms{optarg};
-          if (std::stoi(metrics_interval_ms) > 0) {
-            params_->metrics_interval_ms = std::stoull(metrics_interval_ms);
-            params_->metrics_interval_ms_specified = true;
-          } else {
-            Usage(
-                "Failed to parse --metrics-interval. The value must be > 0 "
-                "msecs.");
-          }
-          break;
-        }
-        case 51: {
-          params_->sequence_length_variation = std::stod(optarg);
-          break;
-        }
-        case 52: {
-          std::string arg = optarg;
-
-          // Remove all spaces in the string
-          arg.erase(
-              std::remove_if(arg.begin(), arg.end(), ::isspace), arg.end());
-
-          std::stringstream ss(arg);
-          while (ss.good()) {
-            std::string model_name;
-            std::string model_version{""};
-            std::string tmp_model_name;
-
-            getline(ss, tmp_model_name, ',');
-
-            size_t colon_pos = tmp_model_name.find(":");
-
-            if (colon_pos == std::string::npos) {
-              model_name = tmp_model_name;
-            } else {
-              model_name = tmp_model_name.substr(0, colon_pos);
-              model_version = tmp_model_name.substr(colon_pos + 1);
-            }
-
-            params_->bls_composing_models.push_back(
-                {model_name, model_version});
-          }
-          break;
-        }
-        case 53: {
-          params_->serial_sequences = true;
-          break;
-        }
-        case 54: {
-          cb::TensorFormat input_tensor_format{ParseTensorFormat(optarg)};
-          if (input_tensor_format == cb::TensorFormat::UNKNOWN) {
-            Usage(
-                "Failed to parse --input-tensor-format. Unsupported type "
-                "provided: '" +
-                std::string{optarg} +
-                "'. The available options are 'binary' or 'json'.");
-          }
-          params_->input_tensor_format = input_tensor_format;
-          break;
-        }
-        case 55: {
-          cb::TensorFormat output_tensor_format{ParseTensorFormat(optarg)};
-          if (output_tensor_format == cb::TensorFormat::UNKNOWN) {
-            Usage(
-                "Failed to parse --output-tensor-format. Unsupported type "
-                "provided: '" +
-                std::string{optarg} +
-                "'. The available options are 'binary' or 'json'.");
-          }
-          params_->output_tensor_format = output_tensor_format;
-          break;
-        }
-        case 56: {
-          PrintVersion();
-          break;
-        }
-        case 57: {
-          std::string profile_export_file{optarg};
-          if (IsFile(profile_export_file) || IsDirectory(profile_export_file)) {
-            Usage(
-                "Failed to parse --profile-export-file. Path must not already "
-                "exist.");
-          }
-          params_->profile_export_file = profile_export_file;
-          break;
-        }
-        case 58: {
-          params_->is_using_periodic_concurrency_mode = true;
-          std::string arg = optarg;
-          std::vector<std::string> values{SplitString(arg)};
-          if (values.size() < 2) {
-            Usage(
-                "Failed to parse --periodic-concurrency-range. Both <start> "
-                "and <end> values must be provided.");
-          } else if (values.size() > 3) {
-            Usage(
-                "Failed to parse --periodic-concurrency-range. The value does "
-                "not match <start:end:step>.");
-          }
-
-          for (size_t i = 0; i < values.size(); ++i) {
-            uint64_t val = std::stoull(values[i]);
-            if (i == 0) {
-              params_->periodic_concurrency_range.start = val;
-            } else if (i == 1) {
-              params_->periodic_concurrency_range.end = val;
-            } else if (i == 2) {
-              params_->periodic_concurrency_range.step = val;
-            }
-          }
-
-          Range<uint64_t> range{params_->periodic_concurrency_range};
-          if (range.step == 0) {
-            Usage(
-                "Failed to parse --periodic-concurrency-range. The <step> "
-                "value must be > 0.");
-          } else if (range.start > range.end) {
-            Usage(
-                "Failed to parse --periodic-concurrency-range. The <start> "
-                "must be <= <end>.");
-          } else if ((range.end - range.start) % range.step != 0) {
-            Usage(
-                "Failed to parse --periodic-concurrency-range. The <step> "
-                "value must be a factor of the range size (<end> - <start>).");
-          }
-          break;
-        }
-        case 59: {
-          std::string request_period{optarg};
-          if (std::stoi(request_period) > 0) {
-            params_->request_period = std::stoull(request_period);
-          } else {
-            Usage("Failed to parse --request-period. The value must be > 0");
-          }
-          break;
-        }
-        case 60: {
-          std::string arg = optarg;
-          std::vector<std::string> values{SplitString(arg)};
-          if (values.size() != 3) {
-            Usage(
-                "Failed to parse --request-parameter. The value does not match "
-                "<name:value:type>.");
-          }
-
-          std::for_each(values.begin(), values.end(), ToLowerCase);
-          std::string name{values[0]};
-          std::string value{values[1]};
-          std::string type{values[2]};
-
-          cb::RequestParameter param;
-          param.name = name;
-          param.value = value;
-          param.type = type;
-          params_->request_parameters[name] = param;
-          break;
-        }
-        case 61: {
-          params_->endpoint = optarg;
-          break;
-        }
-        case 62: {
-          if (std::stoi(optarg) < 0) {
-            Usage("Failed to parse --request-count. The value must be > 0.");
-          }
-          params_->request_count = std::stoi(optarg);
-          break;
-        }
-        case 'v':
-          params_->extra_verbose = params_->verbose;
-          params_->verbose = true;
-          break;
-        case 'z':
-          params_->zero_input = true;
-          break;
-        case 'd':
-          params_->using_old_options = true;
-          params_->dynamic_concurrency_mode = true;
-          break;
-        case 'u':
-          params_->url_specified = true;
-          params_->url = optarg;
-          break;
-        case 'm':
-          params_->model_name = optarg;
-          break;
-        case 'x':
-          params_->model_version = optarg;
-          break;
-        case 'b': {
-          std::string batch_size{optarg};
-          if (std::stoi(batch_size) > 0) {
-            params_->batch_size = std::stoull(batch_size);
-            params_->using_batch_size = true;
-          } else {
-            Usage("Failed to parse -b (batch size). The value must be > 0.");
-          }
-          break;
-        }
-        case 't':
-          params_->using_old_options = true;
-          params_->concurrent_request_count = std::atoi(optarg);
-          break;
-        case 'i':
-          params_->protocol = ParseProtocol(optarg);
-          break;
-        case 'H': {
-          std::string arg = optarg;
-          std::string header = arg.substr(0, arg.find(":"));
-          (*params_->http_headers)[header] = arg.substr(header.size() + 1);
-          break;
-        }
-        case 'c':
-          params_->using_old_options = true;
-          params_->max_concurrency = std::atoi(optarg);
-          break;
-        case 'f':
-          params_->filename = optarg;
-          break;
-        case '?':
-          Usage();
-          break;
-      }
-    }
-    catch (const std::invalid_argument& ia) {
-      if (opt >= 'A') {  // short options
-        Usage(
-            "Failed to parse -" + std::string{(char)opt} +
-            ". Invalid value provided: " + std::string{optarg});
-      } else {
-        Usage(
-            "Failed to parse --" + std::string{long_options[opt].name} +
-            ". Invalid value provided: " + std::string{optarg});
-      }
-    }
-  }
-
-  params_->mpi_driver = std::shared_ptr<triton::perfanalyzer::MPIDriver>{
-      std::make_shared<triton::perfanalyzer::MPIDriver>(params_->enable_mpi)};
-  params_->mpi_driver->MPIInit(&argc, &argv);
-
-  if (!params_->url_specified &&
-      (params_->protocol == cb::ProtocolType::GRPC)) {
-    if (params_->kind == cb::BackendKind::TRITON) {
-      params_->url = "localhost:8001";
-    } else if (params_->kind == cb::BackendKind::TENSORFLOW_SERVING) {
-      params_->url = "localhost:8500";
-    }
-  }
-
-  // Overriding the max_threads default for request_rate search
-  if (!params_->max_threads_specified && params_->targeting_concurrency()) {
-    params_->max_threads =
-        std::max(DEFAULT_MAX_THREADS, params_->concurrency_range.end);
-  }
-
-  if (params_->using_custom_intervals) {
-    // Will be using user-provided time intervals, hence no control variable.
-    params_->search_mode = SearchMode::NONE;
-  }
-
-  // When the request-count feature is enabled, override the measurement mode to
-  // be count windows with a window size of the requested count
-  if (params_->request_count) {
-    params_->measurement_mode = MeasurementMode::COUNT_WINDOWS;
-    params_->measurement_request_count = params_->request_count;
-  }
-}
-
-void
-CLParser::VerifyOptions()
-{
-  if (params_->model_name.empty()) {
-    Usage("Failed to parse -m (model name). The value must be specified.");
-  }
-  if (params_->concurrency_range.start <= 0 ||
-      params_->concurrent_request_count < 0) {
-    Usage("The start of the search range must be > 0");
-  }
-  if (params_->request_rate_range[SEARCH_RANGE::kSTART] <= 0) {
-    Usage(
-        "Failed to parse --request-rate-range. The start of the search range "
-        "must be > 0.");
-  }
-  if (params_->protocol == cb::ProtocolType::UNKNOWN) {
-    Usage(
-        "Failed to parse -i (protocol). The value should be either HTTP or "
-        "gRPC.");
-  }
-  if (params_->streaming && (params_->protocol != cb::ProtocolType::GRPC)) {
-    Usage("Streaming is only allowed with gRPC protocol.");
-  }
-  if (params_->using_grpc_compression &&
-      (params_->protocol != cb::ProtocolType::GRPC)) {
-    Usage("Using compression algorithm is only allowed with gRPC protocol.");
-  }
-  if (params_->sequence_length_variation < 0.0) {
-    Usage(
-        "Failed to parse --sequence-length-variation. The value must be >= "
-        "0.0.");
-  }
-  if (params_->start_sequence_id == 0) {
-    params_->start_sequence_id = 1;
-    std::cerr << "WARNING: using an invalid start sequence id. Perf Analyzer"
-              << " will use default value if it is measuring on sequence model."
-              << std::endl;
-  }
-  if (params_->percentile != -1 &&
-      (params_->percentile > 99 || params_->percentile < 1)) {
-    Usage(
-        "Failed to parse --percentile. The value must be -1 for not reporting "
-        "or in range (0, 100).");
-  }
-  if (params_->zero_input && !params_->user_data.empty()) {
-    Usage("The -z flag cannot be set when --data-directory is provided.");
-  }
-  if (params_->async && params_->forced_sync) {
-    Usage("Cannot specify --async and --sync simultaneously.");
-  }
-
-  if (params_->using_concurrency_range && params_->using_old_options) {
-    Usage("Cannot use deprecated options with --concurrency-range.");
-  } else if (params_->using_old_options) {
-    if (params_->dynamic_concurrency_mode) {
-      params_->concurrency_range.end = params_->max_concurrency;
-    }
-    params_->concurrency_range.start = params_->concurrent_request_count;
-  }
-
-  if (params_->using_request_rate_range && params_->using_old_options) {
-    Usage("Cannot use concurrency options with --request-rate-range.");
-  }
-
-  std::vector<bool> load_modes{
-      params_->is_using_periodic_concurrency_mode,
-      params_->using_concurrency_range, params_->using_request_rate_range,
-      params_->using_custom_intervals};
-  if (std::count(load_modes.begin(), load_modes.end(), true) > 1) {
-    Usage(
-        "Cannot specify more then one inference load mode. Please choose only "
-        "one of the following modes: --concurrency-range, "
-        "--periodic-concurrency-range, --request-rate-range, or "
-        "--request-intervals.");
-  }
-
-  if (params_->is_using_periodic_concurrency_mode && !params_->streaming) {
-    Usage(
-        "The --periodic-concurrency-range option requires bi-directional gRPC "
-        "streaming.");
-  }
-
-  if (params_->is_using_periodic_concurrency_mode &&
-      (params_->profile_export_file == "")) {
-    Usage(
-        "Must provide --profile-export-file when using the "
-        "--periodic-concurrency-range option.");
-  }
-
-  if (params_->is_using_periodic_concurrency_mode) {
-    if (params_->periodic_concurrency_range.end == pa::NO_LIMIT) {
-      std::cerr
-          << "WARNING: The maximum attainable concurrency will be limited by "
-             "max_threads specification."
-          << std::endl;
-      params_->periodic_concurrency_range.end = params_->max_threads;
-    } else {
-      if (params_->max_threads_specified) {
-        std::cerr << "WARNING: Overriding max_threads specification to ensure "
-                     "requested concurrency range."
-                  << std::endl;
-      }
-      params_->max_threads = std::max(
-          params_->max_threads, params_->periodic_concurrency_range.end);
-    }
-  }
-
-  if (params_->request_parameters.size() > 0 &&
-      params_->protocol != cb::ProtocolType::GRPC) {
-    Usage(
-        "The --request-parameter option is currently only supported by gRPC "
-        "protocol.");
-  }
-
-  if (params_->using_request_rate_range && params_->mpi_driver->IsMPIRun() &&
-      (params_->request_rate_range[SEARCH_RANGE::kEND] != 1.0 ||
-       params_->request_rate_range[SEARCH_RANGE::kSTEP] != 1.0)) {
-    Usage("Cannot specify --request-rate-range when in multi-model mode.");
-  }
-
-  if (params_->using_custom_intervals && params_->using_old_options) {
-    Usage("Cannot use deprecated options with --request-intervals.");
-  }
-
-  if ((params_->using_custom_intervals) &&
-      (params_->using_request_rate_range || params_->using_concurrency_range)) {
-    Usage(
-        "Cannot use --concurrency-range or --request-rate-range "
-        "along with --request-intervals.");
-  }
-
-  if (params_->using_concurrency_range && params_->mpi_driver->IsMPIRun() &&
-      (params_->concurrency_range.end != 1 ||
-       params_->concurrency_range.step != 1)) {
-    Usage("Cannot specify --concurrency-range when in multi-model mode.");
-  }
-
-  if (((params_->concurrency_range.end == NO_LIMIT) ||
-       (params_->request_rate_range[SEARCH_RANGE::kEND] ==
-        static_cast<double>(NO_LIMIT))) &&
-      (params_->latency_threshold_ms == NO_LIMIT)) {
-    Usage(
-        "The end of the search range and the latency limit can not be both 0 "
-        "(or 0.0) simultaneously");
-  }
-
-  if (((params_->concurrency_range.end == NO_LIMIT) ||
-       (params_->request_rate_range[SEARCH_RANGE::kEND] ==
-        static_cast<double>(NO_LIMIT))) &&
-      (params_->search_mode == SearchMode::BINARY)) {
-    Usage("The end of the range can not be 0 (or 0.0) for binary search mode.");
-  }
-
-  if ((params_->search_mode == SearchMode::BINARY) &&
-      (params_->latency_threshold_ms == NO_LIMIT)) {
-    Usage("The --latency-threshold cannot be 0 for binary search mode.");
-  }
-
-  if (((params_->concurrency_range.end < params_->concurrency_range.start) ||
-       (params_->request_rate_range[SEARCH_RANGE::kEND] <
-        params_->request_rate_range[SEARCH_RANGE::kSTART])) &&
-      (params_->search_mode == SearchMode::BINARY)) {
-    Usage(
-        "The end of the range can not be less than start of the range for "
-        "binary search mode.");
-  }
-
-  if (params_->request_count != 0) {
-    if (params_->using_concurrency_range) {
-      if (params_->request_count < params_->concurrency_range.start) {
-        Usage("request-count can not be less than concurrency");
-      }
-      if (params_->concurrency_range.start < params_->concurrency_range.end) {
-        Usage(
-            "request-count not supported with multiple concurrency values in "
-            "one run");
-      }
-    }
-    if (params_->using_request_rate_range) {
-      if (params_->request_count <
-          static_cast<int>(params_->request_rate_range[0])) {
-        Usage("request-count can not be less than request-rate");
-      }
-      if (params_->request_rate_range[SEARCH_RANGE::kSTART] <
-          params_->request_rate_range[SEARCH_RANGE::kEND]) {
-        Usage(
-            "request-count not supported with multiple request-rate values in "
-            "one run");
-      }
-    }
-  }
-
-  if (params_->kind == cb::TENSORFLOW_SERVING) {
-    if (params_->protocol != cb::ProtocolType::GRPC) {
-      Usage(
-          "perf_analyzer supports only grpc protocol for TensorFlow Serving.");
-    } else if (params_->streaming) {
-      Usage("perf_analyzer does not support streaming for TensorFlow Serving.");
-    } else if (params_->async) {
-      Usage("perf_analyzer does not support async API for TensorFlow Serving.");
-    } else if (!params_->using_batch_size) {
-      params_->batch_size = 0;
-    }
-  } else if (params_->kind == cb::TORCHSERVE) {
-    if (params_->user_data.empty()) {
-      Usage(
-          "--input-data should be provided with a json file with "
-          "input data for torchserve.");
-    }
-  }
-
-  if (params_->kind == cb::BackendKind::TRITON_C_API) {
-    if (params_->triton_server_path.empty()) {
-      Usage(
-          "--triton-server-path should not be empty when using "
-          "service-kind=triton_c_api.");
-    }
-
-    if (params_->model_repository_path.empty()) {
-      Usage(
-          "--model-repository should not be empty when using "
-          "service-kind=triton_c_api.");
-    }
-
-    if (params_->async) {
-      Usage(
-          "Async mode is not supported by triton_c_api service "
-          "kind.");
-    }
-
-    params_->protocol = cb::ProtocolType::UNKNOWN;
-  }
-
-  if (params_->kind == cb::BackendKind::OPENAI) {
-    if (params_->user_data.empty()) {
-      Usage("Must supply --input-data for OpenAI service kind.");
-    }
-    if (params_->endpoint.empty()) {
-      Usage(
-          "Must supply --endpoint for OpenAI service kind. For example, "
-          "\"v1/chat/completions\".");
-    }
-    if (!params_->async) {
-      Usage("Only async mode is currently supported for OpenAI service-kind");
-    }
-    if (params_->batch_size != 1) {
-      Usage("Batching is not currently supported with OpenAI service-kind");
-    }
-  }
-
-  if (params_->should_collect_metrics &&
-      params_->kind != cb::BackendKind::TRITON) {
-    Usage(
-        "Server-side metric collection is only supported with Triton client "
-        "backend.");
-  }
-
-  if (params_->metrics_url_specified &&
-      params_->should_collect_metrics == false) {
-    Usage(
-        "Must specify --collect-metrics when using the --metrics-url option.");
-  }
-
-  if (params_->metrics_interval_ms_specified &&
-      params_->should_collect_metrics == false) {
-    Usage(
-        "Must specify --collect-metrics when using the --metrics-interval "
-        "option.");
-  }
-
-  if (params_->should_collect_metrics && !params_->metrics_url_specified) {
-    // Update the default metrics URL to be associated with the input URL
-    // instead of localhost
-    //
-    size_t colon_pos = params_->url.find(':');
-    if (colon_pos != std::string::npos) {
-      params_->metrics_url =
-          params_->url.substr(0, colon_pos) + ":8002/metrics";
-    }
-  }
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/command_line_parser.h b/src/c++/perf_analyzer/command_line_parser.h
deleted file mode 100644
index 461e24e2d..000000000
--- a/src/c++/perf_analyzer/command_line_parser.h
+++ /dev/null
@@ -1,182 +0,0 @@
-// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-#pragma once
-
-#include <memory>
-#include <string>
-#include <unordered_map>
-#include <vector>
-
-#include "constants.h"
-#include "mpi_utils.h"
-#include "perf_utils.h"
-
-namespace triton { namespace perfanalyzer {
-
-enum SEARCH_RANGE { kSTART = 0, kEND = 1, kSTEP = 2 };
-
-// Perf Analyzer command line parameters.
-// PAParams are used to initialize PerfAnalyzer and track configuration
-//
-struct PerfAnalyzerParameters {
-  bool verbose = false;
-  bool extra_verbose = false;
-  bool streaming = false;
-  size_t max_threads = 4;
-  bool max_threads_specified = false;
-  size_t sequence_length = 20;  // average length of a sentence
-  bool sequence_length_specified = false;
-  double sequence_length_variation = 20.0;
-  int32_t percentile = -1;
-  std::vector<std::string> user_data;
-  std::unordered_map<std::string, std::vector<int64_t>> input_shapes;
-  std::vector<cb::ModelIdentifier> bls_composing_models;
-  uint64_t measurement_window_ms = 5000;
-  bool using_concurrency_range = false;
-  Range<uint64_t> concurrency_range{1, 1, 1};
-  std::unordered_map<std::string, cb::RequestParameter> request_parameters;
-  uint64_t latency_threshold_ms = NO_LIMIT;
-  double stability_threshold = 0.1;
-  size_t max_trials = 10;
-  size_t request_count = 0;
-  bool zero_input = false;
-  size_t string_length = 128;
-  std::string string_data;
-  bool async = false;
-  bool forced_sync = false;
-  bool using_request_rate_range = false;
-  double request_rate_range[3] = {1.0, 1.0, 1.0};
-  uint32_t num_of_sequences = 4;
-  bool serial_sequences = false;
-  SearchMode search_mode = SearchMode::LINEAR;
-  Distribution request_distribution = Distribution::CONSTANT;
-  bool using_custom_intervals = false;
-  std::string request_intervals_file{""};
-  SharedMemoryType shared_memory_type = NO_SHARED_MEMORY;
-  size_t output_shm_size = 100 * 1024;
-  clientbackend::BackendKind kind = clientbackend::BackendKind::TRITON;
-  std::string model_signature_name{"serving_default"};
-  bool using_grpc_compression = false;
-  clientbackend::GrpcCompressionAlgorithm compression_algorithm =
-      clientbackend::GrpcCompressionAlgorithm::COMPRESS_NONE;
-  MeasurementMode measurement_mode = MeasurementMode::TIME_WINDOWS;
-  uint64_t measurement_request_count = 50;
-  std::string triton_server_path = "/opt/tritonserver";
-  std::string model_repository_path;
-  uint64_t start_sequence_id = 1;
-  uint64_t sequence_id_range = UINT32_MAX;
-  clientbackend::SslOptionsBase ssl_options;  // gRPC and HTTP SSL options
-
-  // Verbose csv option for including additional information
-  bool verbose_csv = false;
-
-  // Enable MPI option for using MPI functionality with multi-model mode.
-  bool enable_mpi = false;
-  std::map<std::string, std::vector<std::string>> trace_options;
-  bool using_old_options = false;
-  bool dynamic_concurrency_mode = false;
-  bool url_specified = false;
-  std::string url{"localhost:8000"};
-  std::string endpoint{""};
-  std::string model_name;
-  std::string model_version;
-  uint64_t batch_size = 1;
-  bool using_batch_size = false;
-  int32_t concurrent_request_count = 1;
-  clientbackend::ProtocolType protocol = clientbackend::ProtocolType::HTTP;
-  std::shared_ptr<clientbackend::Headers> http_headers{
-      new clientbackend::Headers()};
-  size_t max_concurrency = 0;
-  std::string filename{""};
-  std::shared_ptr<MPIDriver> mpi_driver;
-  std::string memory_type{"system"};  // currently not used, to be removed
-
-  // Enable collection of server-side metrics from inference server.
-  bool should_collect_metrics{false};
-
-  // The URL to query for server-side inference server metrics.
-  std::string metrics_url{"localhost:8002/metrics"};
-  bool metrics_url_specified{false};
-
-  // How often, within each measurement window, to query for server-side
-  // inference server metrics.
-  uint64_t metrics_interval_ms{1000};
-  bool metrics_interval_ms_specified{false};
-
-  // Return true if targeting concurrency
-  //
-  bool targeting_concurrency() const
-  {
-    return (
-        using_concurrency_range || using_old_options ||
-        !(using_request_rate_range || using_custom_intervals ||
-          is_using_periodic_concurrency_mode));
-  }
-
-  // Sets the threshold for PA client overhead.
-  // Overhead is defined as the percentage of time when PA is doing work and
-  // requests are not outstanding to the triton server. If the overhead
-  // percentage exceeds the threshold, a warning is displayed.
-  //
-  double overhead_pct_threshold{50.0};
-
-  // Triton inference request input tensor format.
-  cb::TensorFormat input_tensor_format{cb::TensorFormat::BINARY};
-
-  // Triton inference response output tensor format.
-  cb::TensorFormat output_tensor_format{cb::TensorFormat::BINARY};
-
-  // The profile export file path.
-  std::string profile_export_file{""};
-
-  bool is_using_periodic_concurrency_mode{false};
-  Range<uint64_t> periodic_concurrency_range{1, 1, 1};
-  uint64_t request_period{10};
-};
-
-using PAParamsPtr = std::shared_ptr<PerfAnalyzerParameters>;
-
-class CLParser {
- public:
-  CLParser() : params_(new PerfAnalyzerParameters{}) {}
-
-  // Parse command line arguments into a parameters struct
-  //
-  PAParamsPtr Parse(int argc, char** argv);
-
- private:
-  char** argv_;
-  int argc_;
-  PAParamsPtr params_;
-
-  std::string FormatMessage(std::string str, int offset) const;
-  virtual void Usage(const std::string& msg = std::string());
-  void PrintVersion();
-  void ParseCommandLine(int argc, char** argv);
-  void VerifyOptions();
-};
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/concurrency_ctx_id_tracker.h b/src/c++/perf_analyzer/concurrency_ctx_id_tracker.h
deleted file mode 100644
index 9699fa30e..000000000
--- a/src/c++/perf_analyzer/concurrency_ctx_id_tracker.h
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "base_queue_ctx_id_tracker.h"
-
-namespace triton { namespace perfanalyzer {
-
-// Context ID Tracker that always returns context 0, but ensures that only X
-// requests are outstanding at a time
-//
-class ConcurrencyCtxIdTracker : public BaseQueueCtxIdTracker {
- public:
-  ConcurrencyCtxIdTracker() = default;
-  void Reset(size_t count) override
-  {
-    Clear();
-
-    for (size_t i = 0; i < count; ++i) {
-      free_ctx_ids_.push(0);
-    }
-  }
-};
-
-}};  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/concurrency_manager.cc b/src/c++/perf_analyzer/concurrency_manager.cc
deleted file mode 100644
index 283861846..000000000
--- a/src/c++/perf_analyzer/concurrency_manager.cc
+++ /dev/null
@@ -1,193 +0,0 @@
-// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "concurrency_manager.h"
-
-#include <queue>
-
-namespace triton { namespace perfanalyzer {
-
-ConcurrencyManager::~ConcurrencyManager()
-{
-  // The destruction of derived class should wait for all the request generator
-  // threads to finish
-  StopWorkerThreads();
-}
-
-cb::Error
-ConcurrencyManager::Create(
-    const bool async, const bool streaming, const int32_t batch_size,
-    const size_t max_threads, const size_t max_concurrency,
-    const SharedMemoryType shared_memory_type, const size_t output_shm_size,
-    const std::shared_ptr<ModelParser>& parser,
-    const std::shared_ptr<cb::ClientBackendFactory>& factory,
-    std::unique_ptr<LoadManager>* manager,
-    const std::unordered_map<std::string, cb::RequestParameter>&
-        request_parameters)
-{
-  std::unique_ptr<ConcurrencyManager> local_manager(new ConcurrencyManager(
-      async, streaming, batch_size, max_threads, max_concurrency,
-      shared_memory_type, output_shm_size, parser, factory,
-      request_parameters));
-
-  *manager = std::move(local_manager);
-
-  return cb::Error::Success;
-}
-
-ConcurrencyManager::ConcurrencyManager(
-    const bool async, const bool streaming, const int32_t batch_size,
-    const size_t max_threads, const size_t max_concurrency,
-    const SharedMemoryType shared_memory_type, const size_t output_shm_size,
-    const std::shared_ptr<ModelParser>& parser,
-    const std::shared_ptr<cb::ClientBackendFactory>& factory,
-    const std::unordered_map<std::string, cb::RequestParameter>&
-        request_parameters)
-    : LoadManager(
-          async, streaming, batch_size, max_threads, shared_memory_type,
-          output_shm_size, parser, factory, request_parameters),
-      execute_(true), max_concurrency_(max_concurrency)
-{
-  threads_config_.reserve(max_threads);
-}
-
-void
-ConcurrencyManager::InitManagerFinalize()
-{
-  if (on_sequence_model_) {
-    sequence_manager_->InitSequenceStatuses(max_concurrency_);
-  }
-}
-
-cb::Error
-ConcurrencyManager::ChangeConcurrencyLevel(
-    const size_t concurrent_request_count, const size_t request_count)
-{
-  PauseSequenceWorkers();
-  ReconfigThreads(concurrent_request_count, request_count);
-  ResumeSequenceWorkers();
-
-  std::cout << "Request concurrency: " << concurrent_request_count << std::endl;
-  return cb::Error::Success;
-}
-
-void
-ConcurrencyManager::PauseSequenceWorkers()
-{
-  if (on_sequence_model_) {
-    execute_ = false;
-    // Wait to see all threads are paused.
-    for (auto& thread_config : threads_config_) {
-      while (!thread_config->is_paused_) {
-        std::this_thread::sleep_for(std::chrono::milliseconds(10));
-      }
-    }
-  }
-}
-
-void
-ConcurrencyManager::ReconfigThreads(
-    size_t concurrent_request_count, size_t request_count)
-{
-  // Always prefer to create new threads if the maximum limit has not been met
-  //
-  // While operating in synchronous mode, each context can send only one
-  // request at a time, hence the number of worker threads should be equal to
-  // the requested concurrency levels.
-  //
-  while ((concurrent_request_count > threads_.size()) &&
-         (threads_.size() < max_threads_)) {
-    // Launch new thread for inferencing
-    threads_stat_.emplace_back(new ThreadStat());
-    threads_config_.emplace_back(new ThreadConfig(threads_config_.size()));
-
-    workers_.push_back(
-        MakeWorker(threads_stat_.back(), threads_config_.back()));
-
-    threads_.emplace_back(&IWorker::Infer, workers_.back());
-  }
-
-  {
-    // Make sure all threads are reconfigured before they are woken up
-    std::lock_guard<std::mutex> lock(wake_mutex_);
-
-    // Compute the new concurrency level for each thread (take floor)
-    // and spread the remaining value
-    size_t avg_concurrency = concurrent_request_count / threads_.size();
-    size_t threads_add_one = concurrent_request_count % threads_.size();
-
-    size_t avg_req_count = request_count / threads_.size();
-    size_t req_count_add_one = request_count % threads_.size();
-
-    size_t seq_stat_index_offset = 0;
-    active_threads_ = 0;
-    for (size_t i = 0; i < threads_stat_.size(); i++) {
-      size_t concurrency = avg_concurrency + (i < threads_add_one ? 1 : 0);
-
-      threads_config_[i]->concurrency_ = concurrency;
-      threads_config_[i]->seq_stat_index_offset_ = seq_stat_index_offset;
-
-      size_t thread_num_reqs = avg_req_count + (i < req_count_add_one ? 1 : 0);
-      threads_config_[i]->num_requests_ = thread_num_reqs;
-
-      seq_stat_index_offset += concurrency;
-
-      if (concurrency) {
-        active_threads_++;
-      }
-    }
-
-    // TODO REFACTOR TMA-1043 the memory manager should have API to set
-    // num_active_threads in constructor, as well as overwrite it here
-  }
-}
-
-void
-ConcurrencyManager::ResumeSequenceWorkers()
-{
-  if (on_sequence_model_) {
-    execute_ = true;
-  }
-
-  // Make sure all threads will check their updated concurrency level
-  wake_signal_.notify_all();
-}
-
-std::shared_ptr<IWorker>
-ConcurrencyManager::MakeWorker(
-    std::shared_ptr<ThreadStat> thread_stat,
-    std::shared_ptr<ThreadConfig> thread_config)
-{
-  uint32_t id = workers_.size();
-
-  return std::make_shared<ConcurrencyWorker>(
-      id, thread_stat, thread_config, parser_, data_loader_, factory_,
-      on_sequence_model_, async_, max_concurrency_, using_json_data_,
-      streaming_, batch_size_, wake_signal_, wake_mutex_, active_threads_,
-      execute_, infer_data_manager_, sequence_manager_);
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/concurrency_manager.h b/src/c++/perf_analyzer/concurrency_manager.h
deleted file mode 100644
index c6c90f1d1..000000000
--- a/src/c++/perf_analyzer/concurrency_manager.h
+++ /dev/null
@@ -1,145 +0,0 @@
-// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "concurrency_worker.h"
-#include "load_manager.h"
-
-namespace triton { namespace perfanalyzer {
-
-#ifndef DOCTEST_CONFIG_DISABLE
-class TestConcurrencyManager;
-#endif
-
-//==============================================================================
-/// ConcurrencyManager is a helper class to send inference requests to inference
-/// server consistently, based on the specified setting, so that the
-/// perf_analyzer can measure performance under different concurrency.
-///
-/// An instance of concurrency manager will be created at the beginning of the
-/// perf_analyzer and it will be used to simulate different load level in
-/// respect to number of concurrent infer requests and to collect per-request
-/// statistic.
-///
-/// Detail:
-/// Concurrency Manager will maintain the number of concurrent requests by
-/// spawning worker threads that keep sending randomly generated requests to the
-/// server. The worker threads will record the start time and end
-/// time of each request into a shared vector.
-///
-class ConcurrencyManager : public LoadManager {
- public:
-  ~ConcurrencyManager();
-
-  /// Create a concurrency manager that is responsible to maintain specified
-  /// load on inference server.
-  /// \param async Whether to use asynchronous or synchronous API for infer
-  /// request.
-  /// \param streaming Whether to use gRPC streaming API for infer request
-  /// \param batch_size The batch size used for each request.
-  /// \param max_threads The maximum number of working threads to be spawned.
-  /// \param max_concurrency The maximum concurrency which will be requested.
-  /// \param string_length The length of the string to create for input.
-  /// \param string_data The data to use for generating string input.
-  /// \param zero_input Whether to fill the input tensors with zero.
-  /// \param user_data The vector containing path/paths to user-provided data
-  /// that can be a directory or path to a json data file.
-  /// \param shared_memory_type The type of shared memory to use for inputs.
-  /// \param output_shm_size The size in bytes of the shared memory to
-  /// allocate for the output.
-  /// \param parser The ModelParser object to get the model details.
-  /// \param factory The ClientBackendFactory object used to create
-  /// client to the server.
-  /// \param manager Returns a new ConcurrencyManager object.
-  /// \param request_parameters Custom request parameters to send to the server
-  /// \return cb::Error object indicating success or failure.
-  static cb::Error Create(
-      const bool async, const bool streaming, const int32_t batch_size,
-      const size_t max_threads, const size_t max_concurrency,
-      const SharedMemoryType shared_memory_type, const size_t output_shm_size,
-      const std::shared_ptr<ModelParser>& parser,
-      const std::shared_ptr<cb::ClientBackendFactory>& factory,
-      std::unique_ptr<LoadManager>* manager,
-      const std::unordered_map<std::string, cb::RequestParameter>&
-          request_parameters);
-
-  /// Adjusts the number of concurrent requests to be the same as
-  /// 'concurrent_request_count' (by creating or pausing threads)
-  /// \param concurent_request_count The number of concurrent requests.
-  /// \param request_count The number of requests to generate. If 0, then
-  /// there is no limit, and it will generate until told to stop.
-  /// \return cb::Error object indicating success or failure.
-  cb::Error ChangeConcurrencyLevel(
-      const size_t concurrent_request_count, const size_t request_count = 0);
-
- protected:
-  // Makes a new worker
-  virtual std::shared_ptr<IWorker> MakeWorker(
-      std::shared_ptr<ThreadStat>, std::shared_ptr<ThreadConfig>);
-
-  ConcurrencyManager(
-      const bool async, const bool streaming, const int32_t batch_size,
-      const size_t max_threads, const size_t max_concurrency,
-      const SharedMemoryType shared_memory_type, const size_t output_shm_size,
-      const std::shared_ptr<ModelParser>& parser,
-      const std::shared_ptr<cb::ClientBackendFactory>& factory,
-      const std::unordered_map<std::string, cb::RequestParameter>&
-          request_parameters);
-
-  // The number of worker threads with non-zero concurrencies
-  size_t active_threads_;
-
-  bool execute_;
-
-  size_t max_concurrency_;
-
-  std::vector<std::shared_ptr<ThreadConfig>> threads_config_;
-
- private:
-  void InitManagerFinalize() override;
-
-  // Pause all worker threads that are working on sequences
-  //
-  void PauseSequenceWorkers();
-
-  // Create new threads (if necessary), and then reconfigure all worker threads
-  // to handle the new concurrent request count
-  //
-  void ReconfigThreads(size_t concurrent_request_count, size_t request_count);
-
-  // Restart all worker threads that were working on sequences
-  //
-  void ResumeSequenceWorkers();
-
-#ifndef DOCTEST_CONFIG_DISABLE
-  friend TestConcurrencyManager;
-
- public:
-  ConcurrencyManager() = default;
-#endif
-};
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/concurrency_worker.cc b/src/c++/perf_analyzer/concurrency_worker.cc
deleted file mode 100644
index 37a562f76..000000000
--- a/src/c++/perf_analyzer/concurrency_worker.cc
+++ /dev/null
@@ -1,208 +0,0 @@
-// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "concurrency_worker.h"
-
-#include <algorithm>
-
-#include "client_backend/client_backend.h"
-#include "perf_utils.h"
-
-namespace triton { namespace perfanalyzer {
-
-// Function for worker threads.
-// If the model is non-sequence model, each worker uses only one context
-// to maintain concurrency assigned to worker.
-// If the model is sequence model, each worker has to use multiples contexts
-// to maintain (sequence) concurrency assigned to worker.
-void
-ConcurrencyWorker::Infer()
-{
-  CreateCtxIdTracker();
-  ReserveContexts();
-
-  // run inferencing until receiving exit signal to maintain server load.
-  do {
-    if (RunInference()) {
-      break;
-    }
-  } while (true);
-}
-
-bool
-ConcurrencyWorker::RunInference()
-{
-  HandleExecuteOff();
-  if (HandleNoConcurrency()) {
-    return true;
-  }
-  CreateContextsAsNecessary();
-  if (HandleExitConditions()) {
-    return true;
-  }
-  SendInferRequests();
-  if (HandleExitConditions()) {
-    return true;
-  }
-  WaitForResponses();
-  if (HandleExitConditions()) {
-    return true;
-  }
-  return false;
-}
-
-void
-ConcurrencyWorker::CreateCtxIdTracker()
-{
-  bool is_concurrency = true;
-  bool serial_sequences = false;
-  ctx_id_tracker_ = CtxIdTrackerFactory::CreateTracker(
-      is_concurrency, on_sequence_model_, serial_sequences);
-}
-
-void
-ConcurrencyWorker::ReserveContexts()
-{
-  // Reserve the vectors in case of sequence models. In non-sequence or
-  // synchronous mode only one context will be opened hence no need of
-  // reserving.
-  if (on_sequence_model_ && async_) {
-    thread_stat_->contexts_stat_.reserve(max_concurrency_);
-    ctxs_.reserve(max_concurrency_);
-  }
-}
-
-void
-ConcurrencyWorker::HandleExecuteOff()
-{
-  if (on_sequence_model_) {
-    if (!execute_) {
-      // Ensures the clean exit of the sequences
-      CompleteOngoingSequences();
-      WaitForOngoingRequests();
-
-      // Reset Ctx IDs because CompleteOngoingSequences()
-      // has destructive side affects
-      ResetFreeCtxIds();
-
-      // Wait if no request should be sent and it is not exiting
-      thread_config_->is_paused_ = true;
-      std::unique_lock<std::mutex> lock(wake_mutex_);
-      wake_signal_.wait(lock, [this]() { return early_exit || execute_; });
-
-      // TODO REFACTOR TMA-1043 - memory manager should be handling this instead
-      // of here
-      for (auto ctx : ctxs_) {
-        ctx->SetNumActiveThreads(active_threads_);
-      }
-    }
-  }
-  thread_config_->is_paused_ = false;
-}
-
-bool
-ConcurrencyWorker::HandleNoConcurrency()
-{
-  // Only interact with synchronous mechanism if the worker should wait
-  if (thread_config_->concurrency_ == 0) {
-    // Wait if no request should be sent and it is not exiting
-    std::unique_lock<std::mutex> lock(wake_mutex_);
-    wake_signal_.wait(lock, [this]() {
-      return early_exit || (thread_config_->concurrency_ > 0);
-    });
-    // Stop executing if concurrency is 0 and early exit is requested
-    if (early_exit && thread_config_->concurrency_ == 0) {
-      return true;
-    }
-  }
-  return false;
-}
-
-void
-ConcurrencyWorker::CreateContextsAsNecessary()
-{
-  // If the model is non-sequence model, use one InferContext to
-  // maintain concurrency for this thread.
-  size_t active_ctx_cnt = on_sequence_model_ ? thread_config_->concurrency_ : 1;
-
-  if (active_ctx_cnt > ctxs_.size()) {
-    while (active_ctx_cnt > ctxs_.size()) {
-      CreateContext();
-    }
-    ResetFreeCtxIds();
-  }
-
-  // TODO REFACTOR TMA-1043 -- this shouldn't be handled here
-  for (auto ctx : ctxs_) {
-    ctx->SetNumActiveThreads(active_threads_);
-  }
-}
-
-void
-ConcurrencyWorker::SendInferRequests()
-{
-  while (ctx_id_tracker_->IsAvailable() && execute_ && !ShouldExit()) {
-    uint32_t ctx_id = GetCtxId();
-    SendInferRequest(ctx_id);
-    RestoreFreeCtxId(ctx_id);
-  }
-}
-
-
-void
-ConcurrencyWorker::WaitForResponses()
-{
-  if (async_) {
-    {
-      // If async, then wait for signal from callback.
-      std::unique_lock<std::mutex> lk(cb_mtx_);
-      thread_stat_->idle_timer.Start();
-      cb_cv_.wait(lk, [this] {
-        if (notified_) {
-          notified_ = false;
-          return true;
-        }
-        return false;
-      });
-      thread_stat_->idle_timer.Stop();
-    }
-  }
-}
-
-void
-ConcurrencyWorker::ResetFreeCtxIds()
-{
-  std::lock_guard<std::mutex> lock(cb_mtx_);
-  ctx_id_tracker_->Reset(thread_config_->concurrency_);
-}
-
-uint32_t
-ConcurrencyWorker::GetSeqStatIndex(uint32_t ctx_id)
-{
-  return (thread_config_->seq_stat_index_offset_ + ctx_id);
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/concurrency_worker.h b/src/c++/perf_analyzer/concurrency_worker.h
deleted file mode 100644
index 4645f07af..000000000
--- a/src/c++/perf_analyzer/concurrency_worker.h
+++ /dev/null
@@ -1,122 +0,0 @@
-// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <memory>
-
-#include "load_worker.h"
-#include "sequence_manager.h"
-#include "thread_config.h"
-
-namespace triton { namespace perfanalyzer {
-
-
-#ifndef DOCTEST_CONFIG_DISABLE
-class NaggyMockConcurrencyWorker;
-#endif
-
-/// Worker thread for the ConcurrencyManager
-///
-/// The worker maintains concurrency in different ways:
-///   For sequence models, multiple contexts must be created for multiple
-///   concurrent sequences.
-///
-///   For non-sequence models, one context can send out multiple requests
-///   at the same time. Thus it uses one single context as every infer context
-///   creates a worker thread implicitly.
-///
-class ConcurrencyWorker : public LoadWorker {
- public:
-  ConcurrencyWorker(
-      uint32_t id, std::shared_ptr<ThreadStat> thread_stat,
-      std::shared_ptr<ThreadConfig> thread_config,
-      const std::shared_ptr<ModelParser> parser,
-      std::shared_ptr<DataLoader> data_loader,
-      const std::shared_ptr<cb::ClientBackendFactory> factory,
-      const bool on_sequence_model, const bool async,
-      const size_t max_concurrency, const bool using_json_data,
-      const bool streaming, const int32_t batch_size,
-      std::condition_variable& wake_signal, std::mutex& wake_mutex,
-      size_t& active_threads, bool& execute,
-      const std::shared_ptr<IInferDataManager>& infer_data_manager,
-      std::shared_ptr<SequenceManager> sequence_manager)
-      : LoadWorker(
-            id, thread_stat, thread_config, parser, data_loader, factory,
-            on_sequence_model, async, streaming, batch_size, using_json_data,
-            wake_signal, wake_mutex, execute, infer_data_manager,
-            sequence_manager),
-        max_concurrency_(max_concurrency), active_threads_(active_threads)
-  {
-  }
-
-  virtual void Infer() override;
-
- protected:
-  bool RunInference();
-
-  void CreateCtxIdTracker();
-
-  // Reserve vector size for contexts
-  void ReserveContexts();
-
- private:
-  const size_t max_concurrency_;
-  // TODO REFACTOR TMA-1020 can we decouple this thread from the total count of
-  // threads?
-  size_t& active_threads_;
-
-  // Handle the case where execute_ is false
-  void HandleExecuteOff();
-
-  // Handle the case where this thread is configured to do nothing
-  // Returns true if an exit condition was met
-  bool HandleNoConcurrency();
-
-  // Create and populate contexts if needed
-  void CreateContextsAsNecessary();
-
-  // Send out the desired concurrency of requests
-  void SendInferRequests();
-
-  void WaitForResponses();
-
-  void ResetFreeCtxIds();
-
-  uint32_t GetSeqStatIndex(uint32_t ctx_id) override;
-
-  void CreateContextFinalize(std::shared_ptr<InferContext> ctx) override
-  {
-    ctx->RegisterAsyncCallbackFinalize(std::bind(
-        &ConcurrencyWorker::AsyncCallbackFinalize, this,
-        std::placeholders::_1));
-  }
-
-#ifndef DOCTEST_CONFIG_DISABLE
-  friend NaggyMockConcurrencyWorker;
-#endif
-};
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/constants.h b/src/c++/perf_analyzer/constants.h
deleted file mode 100644
index fbcd911b8..000000000
--- a/src/c++/perf_analyzer/constants.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <cstdint>
-#include <string>
-
-#define STRINGIFY_(x) #x
-#define STRINGIFY(x) STRINGIFY_(x)
-namespace triton { namespace perfanalyzer {
-
-const std::string SHA{STRINGIFY(GIT_SHA)};
-const std::string VERSION{STRINGIFY(PERF_ANALYZER_VERSION)};
-
-constexpr static const uint32_t SUCCESS = 0;
-
-constexpr static const uint32_t STABILITY_ERROR = 2;
-constexpr static const uint32_t OPTION_ERROR = 3;
-
-constexpr static const uint32_t GENERIC_ERROR = 99;
-constexpr static const size_t DEFAULT_MAX_THREADS = 16;
-
-const double DELAY_PCT_THRESHOLD{1.0};
-
-/// Different measurement modes possible.
-enum MeasurementMode { TIME_WINDOWS = 0, COUNT_WINDOWS = 1 };
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/ctx_id_tracker_factory.h b/src/c++/perf_analyzer/ctx_id_tracker_factory.h
deleted file mode 100644
index 0a455fc9c..000000000
--- a/src/c++/perf_analyzer/ctx_id_tracker_factory.h
+++ /dev/null
@@ -1,67 +0,0 @@
-// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <memory>
-
-#include "concurrency_ctx_id_tracker.h"
-#include "fifo_ctx_id_tracker.h"
-#include "rand_ctx_id_tracker.h"
-
-namespace triton { namespace perfanalyzer {
-
-// Context ID tracker that is always available and returns random Context IDs
-//
-class CtxIdTrackerFactory {
- public:
-  CtxIdTrackerFactory() = delete;
-
-  /// Creates and returns a Context Id Tracker
-  ///
-  /// \param is_concurrency True if targeting Concurrency
-  /// \param is_sequence_model True if the model is a sequence model
-  /// \param serial_sequences True if in serial sequence mode
-  ///
-  static std::shared_ptr<ICtxIdTracker> CreateTracker(
-      bool is_concurrency, bool is_sequence_model, bool serial_sequences)
-  {
-    if (is_concurrency) {
-      if (is_sequence_model) {
-        return std::make_shared<FifoCtxIdTracker>();
-      } else {
-        return std::make_shared<ConcurrencyCtxIdTracker>();
-      }
-    } else {
-      if (is_sequence_model && serial_sequences) {
-        return std::make_shared<FifoCtxIdTracker>();
-      } else {
-        return std::make_shared<RandCtxIdTracker>();
-      }
-    }
-  }
-};
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/custom_load_manager.cc b/src/c++/perf_analyzer/custom_load_manager.cc
deleted file mode 100644
index 55a20a690..000000000
--- a/src/c++/perf_analyzer/custom_load_manager.cc
+++ /dev/null
@@ -1,178 +0,0 @@
-// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "custom_load_manager.h"
-
-#include <fstream>
-
-#include "constants.h"
-
-namespace triton { namespace perfanalyzer {
-
-cb::Error
-CustomLoadManager::Create(
-    const bool async, const bool streaming,
-    const uint64_t measurement_window_ms, const size_t max_trials,
-    const std::string& request_intervals_file, const int32_t batch_size,
-    const size_t max_threads, const uint32_t num_of_sequences,
-    const SharedMemoryType shared_memory_type, const size_t output_shm_size,
-    const bool serial_sequences, const std::shared_ptr<ModelParser>& parser,
-    const std::shared_ptr<cb::ClientBackendFactory>& factory,
-    std::unique_ptr<LoadManager>* manager,
-    const std::unordered_map<std::string, cb::RequestParameter>&
-        request_parameters)
-{
-  std::unique_ptr<CustomLoadManager> local_manager(new CustomLoadManager(
-      async, streaming, request_intervals_file, batch_size,
-      measurement_window_ms, max_trials, max_threads, num_of_sequences,
-      shared_memory_type, output_shm_size, serial_sequences, parser, factory,
-      request_parameters));
-
-  *manager = std::move(local_manager);
-
-  return cb::Error::Success;
-}
-
-CustomLoadManager::CustomLoadManager(
-    const bool async, const bool streaming,
-    const std::string& request_intervals_file, int32_t batch_size,
-    const uint64_t measurement_window_ms, const size_t max_trials,
-    const size_t max_threads, const uint32_t num_of_sequences,
-    const SharedMemoryType shared_memory_type, const size_t output_shm_size,
-    const bool serial_sequences, const std::shared_ptr<ModelParser>& parser,
-    const std::shared_ptr<cb::ClientBackendFactory>& factory,
-    const std::unordered_map<std::string, cb::RequestParameter>&
-        request_parameters)
-    : RequestRateManager(
-          async, streaming, Distribution::CUSTOM, batch_size,
-          measurement_window_ms, max_trials, max_threads, num_of_sequences,
-          shared_memory_type, output_shm_size, serial_sequences, parser,
-          factory, request_parameters),
-      request_intervals_file_(request_intervals_file)
-{
-}
-
-cb::Error
-CustomLoadManager::InitCustomIntervals(const size_t request_count)
-{
-  PauseWorkers();
-  ConfigureThreads(request_count);
-  auto status = GenerateSchedule();
-  ResumeWorkers();
-  return status;
-}
-
-cb::Error
-CustomLoadManager::GenerateSchedule()
-{
-  if (request_intervals_file_.empty()) {
-    return cb::Error::Success;
-  }
-
-  RETURN_IF_ERROR(
-      ReadTimeIntervalsFile(request_intervals_file_, &custom_intervals_));
-
-  auto worker_schedules = CreateWorkerSchedules();
-  GiveSchedulesToWorkers(worker_schedules);
-  return cb::Error::Success;
-}
-
-std::vector<RateSchedulePtr_t>
-CustomLoadManager::CreateWorkerSchedules()
-{
-  std::vector<RateSchedulePtr_t> worker_schedules =
-      CreateEmptyWorkerSchedules();
-  std::vector<size_t> thread_ids{CalculateThreadIds()};
-
-  size_t thread_id_index = 0;
-  size_t worker_index = 0;
-  size_t intervals_index = 0;
-
-  std::chrono::nanoseconds next_timestamp(0);
-
-  bool started = false;
-
-  // Keep filling the schedule until both the thread_ids (which can differ if
-  // sequences are enabled) and the intervals are both at the end of their
-  // lists. This effectively finds the least common multiple of the two sizes
-  // and makes sure that the schedule is complete and can be repeated
-  // indefinitely
-  //
-  while (!started || thread_id_index != 0 || intervals_index != 0) {
-    started = true;
-    next_timestamp += custom_intervals_[intervals_index];
-    worker_index = thread_ids[thread_id_index];
-    worker_schedules[worker_index]->intervals.emplace_back(next_timestamp);
-
-    thread_id_index = (thread_id_index + 1) % thread_ids.size();
-    intervals_index = (intervals_index + 1) % custom_intervals_.size();
-  }
-
-  SetScheduleDurations(worker_schedules);
-
-  return worker_schedules;
-}
-
-cb::Error
-CustomLoadManager::GetCustomRequestRate(double* request_rate)
-{
-  if (custom_intervals_.empty()) {
-    return cb::Error("The custom intervals vector is empty", pa::GENERIC_ERROR);
-  }
-  uint64_t total_time_ns = 0;
-  for (auto interval : custom_intervals_) {
-    total_time_ns += interval.count();
-  }
-
-  *request_rate =
-      (custom_intervals_.size() * NANOS_PER_SECOND) / (total_time_ns);
-  return cb::Error::Success;
-}
-
-cb::Error
-CustomLoadManager::ReadTimeIntervalsFile(
-    const std::string& path, NanoIntervals* contents)
-{
-  std::ifstream in(path);
-  if (!in) {
-    return cb::Error("failed to open file '" + path + "'", pa::GENERIC_ERROR);
-  }
-
-  std::string current_string;
-  while (std::getline(in, current_string)) {
-    std::chrono::nanoseconds curent_time_interval_ns(
-        std::stol(current_string) * 1000);
-    contents->push_back(curent_time_interval_ns);
-  }
-  in.close();
-
-  if (contents->size() == 0) {
-    return cb::Error("file '" + path + "' is empty", pa::GENERIC_ERROR);
-  }
-  return cb::Error::Success;
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/custom_load_manager.h b/src/c++/perf_analyzer/custom_load_manager.h
deleted file mode 100644
index 39c51d99f..000000000
--- a/src/c++/perf_analyzer/custom_load_manager.h
+++ /dev/null
@@ -1,137 +0,0 @@
-// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <chrono>
-#include <string>
-#include <vector>
-
-#include "client_backend/client_backend.h"
-#include "request_rate_manager.h"
-
-namespace triton { namespace perfanalyzer {
-
-#ifndef DOCTEST_CONFIG_DISABLE
-class TestCustomLoadManager;
-#endif
-
-//==============================================================================
-/// CustomLoadManager is a helper class to send inference requests to
-/// inference server in accordance with  user provided time intervals. This
-/// load manager can be used to model certain patterns of interest.
-///
-class CustomLoadManager : public RequestRateManager {
- public:
-  ~CustomLoadManager() = default;
-
-  /// Create an object of realistic load manager that is responsible to maintain
-  /// specified load on inference server.
-  /// \param async Whether to use asynchronous or synchronous API for infer
-  /// request.
-  /// \param streaming Whether to use gRPC streaming API for infer request
-  /// \param measurement_window_ms The time window for measurements.
-  /// \param max_trials The maximum number of windows that will be measured
-  /// \param request_intervals_file The path to the file to use to pick up the
-  /// time intervals between the successive requests.
-  /// \param batch_size The batch size used for each request.
-  /// \param max_threads The maximum number of working threads to be spawned.
-  /// \param num_of_sequences The number of concurrent sequences that must be
-  /// maintained on the server.
-  /// \param zero_input Whether to fill the input tensors with zero.
-  /// \param input_shapes The shape of the input tensors.
-  /// \param user_data The vector containing path/paths to user-provided data
-  /// that can be a directory or path to a json data file.
-  /// \param shared_memory_type The type of shared memory to use for inputs.
-  /// \param output_shm_size The size of the shared memory to allocate for the
-  /// output.
-  /// \param serial_sequences Enable serial sequence mode.
-  /// \param parser The ModelParser object to get the model details.
-  /// \param factory The ClientBackendFactory object used to create
-  /// client to the server.
-  /// \param manager Returns a new ConcurrencyManager object.
-  /// \param request_parameters Custom request parameters to send to the server
-  /// \return cb::Error object indicating success or failure.
-  static cb::Error Create(
-      const bool async, const bool streaming,
-      const uint64_t measurement_window_ms, const size_t max_trials,
-      const std::string& request_intervals_file, const int32_t batch_size,
-      const size_t max_threads, const uint32_t num_of_sequences,
-      const SharedMemoryType shared_memory_type, const size_t output_shm_size,
-      const bool serial_sequences, const std::shared_ptr<ModelParser>& parser,
-      const std::shared_ptr<cb::ClientBackendFactory>& factory,
-      std::unique_ptr<LoadManager>* manager,
-      const std::unordered_map<std::string, cb::RequestParameter>&
-          request_parameter);
-
-  /// Initializes the load manager with the provided file containing request
-  /// intervals
-  /// \param request_count The number of requests to generate. If 0, then
-  /// there is no limit, and it will generate until told to stop.
-  /// \return cb::Error object indicating success or failure.
-  cb::Error InitCustomIntervals(const size_t request_count);
-
-  /// Computes the request rate from the time interval file. Fails with an error
-  /// if the file is not present or is empty.
-  /// \param request_rate Returns request rate as computed from the time
-  /// interval file.
-  /// \return cb::Error object indicating success or failure.
-  cb::Error GetCustomRequestRate(double* request_rate);
-
- private:
-  CustomLoadManager(
-      const bool async, const bool streaming,
-      const std::string& request_intervals_file, const int32_t batch_size,
-      const uint64_t measurement_window_ms, const size_t max_trials,
-      const size_t max_threads, const uint32_t num_of_sequences,
-      const SharedMemoryType shared_memory_type, const size_t output_shm_size,
-      const bool serial_sequences, const std::shared_ptr<ModelParser>& parser,
-      const std::shared_ptr<cb::ClientBackendFactory>& factory,
-      const std::unordered_map<std::string, cb::RequestParameter>&
-          request_parameters);
-
-  cb::Error GenerateSchedule();
-
-  std::vector<RateSchedulePtr_t> CreateWorkerSchedules();
-
-  /// Reads the time intervals file and stores intervals in vector
-  /// \param path Filesystem path of the time intervals file.
-  /// \param contents Output intervals vector.
-  /// \return cb::Error object indicating success or failure.
-  virtual cb::Error ReadTimeIntervalsFile(
-      const std::string& path, NanoIntervals* contents);
-
-  std::string request_intervals_file_;
-  NanoIntervals custom_intervals_;
-
-#ifndef DOCTEST_CONFIG_DISABLE
-  friend TestCustomLoadManager;
-
- public:
-  CustomLoadManager() = default;
-#endif
-};
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/data_loader.cc b/src/c++/perf_analyzer/data_loader.cc
deleted file mode 100644
index 38bfe9403..000000000
--- a/src/c++/perf_analyzer/data_loader.cc
+++ /dev/null
@@ -1,744 +0,0 @@
-// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "data_loader.h"
-
-#include <b64/decode.h>
-#include <rapidjson/filereadstream.h>
-
-#include <fstream>
-
-namespace triton { namespace perfanalyzer {
-
-DataLoader::DataLoader(const size_t batch_size)
-    : batch_size_(batch_size), data_stream_cnt_(0)
-{
-}
-
-cb::Error
-DataLoader::ValidateIOExistsInModel(
-    const std::shared_ptr<ModelTensorMap>& inputs,
-    const std::shared_ptr<ModelTensorMap>& outputs,
-    const std::string& data_directory)
-{
-  if (!std::filesystem::exists(data_directory) ||
-      !std::filesystem::is_directory(data_directory)) {
-    return cb::Error(
-        "Error: Directory does not exist or is not a directory: " +
-            std::string(data_directory),
-        pa::GENERIC_ERROR);
-  }
-
-  for (const auto& file : std::filesystem::directory_iterator(data_directory)) {
-    std::string io_name = file.path().filename().string();
-    if (inputs->find(io_name) == inputs->end() &&
-        outputs->find(io_name) == outputs->end()) {
-      return cb::Error(
-          "Provided data file '" + io_name +
-              "' does not correspond to a valid model input or output.",
-          pa::GENERIC_ERROR);
-    }
-  }
-
-  return cb::Error::Success;
-}
-
-cb::Error
-DataLoader::ReadDataFromDir(
-    const std::shared_ptr<ModelTensorMap>& inputs,
-    const std::shared_ptr<ModelTensorMap>& outputs,
-    const std::string& data_directory)
-{
-  // Directory structure supports only a single data stream and step
-  data_stream_cnt_ = 1;
-  step_num_.push_back(1);
-
-  for (const auto& input : *inputs) {
-    if (input.second.datatype_.compare("BYTES") != 0) {
-      const auto file_path = data_directory + "/" + input.second.name_;
-      std::string key_name(
-          input.second.name_ + "_" + std::to_string(0) + "_" +
-          std::to_string(0));
-      auto it = input_data_.emplace(key_name, std::vector<char>()).first;
-      RETURN_IF_ERROR(ReadFile(file_path, &it->second));
-      int64_t byte_size = ByteSize(input.second.shape_, input.second.datatype_);
-      if (byte_size < 0) {
-        return cb::Error(
-            "input " + input.second.name_ +
-                " contains dynamic shape, provide shapes to send along with "
-                "the request",
-            pa::GENERIC_ERROR);
-      }
-      if (it->second.size() != byte_size) {
-        return cb::Error(
-            "provided data for input " + input.second.name_ +
-                " has byte size " + std::to_string(it->second.size()) +
-                ", expect " + std::to_string(byte_size),
-            pa::GENERIC_ERROR);
-      }
-    } else {
-      const auto file_path = data_directory + "/" + input.second.name_;
-      std::vector<std::string> input_string_data;
-      RETURN_IF_ERROR(ReadTextFile(file_path, &input_string_data));
-      std::string key_name(
-          input.second.name_ + "_" + std::to_string(0) + "_" +
-          std::to_string(0));
-      auto it = input_data_.emplace(key_name, std::vector<char>()).first;
-      SerializeStringTensor(input_string_data, &it->second);
-      int64_t batch1_num_strings = ElementCount(input.second.shape_);
-      if (batch1_num_strings == -1) {
-        return cb::Error(
-            "input " + input.second.name_ +
-                " contains dynamic shape, provide shapes to send along with "
-                "the request",
-            pa::GENERIC_ERROR);
-      }
-      if (input_string_data.size() != batch1_num_strings) {
-        return cb::Error(
-            "provided data for input " + input.second.name_ + " has " +
-                std::to_string(input_string_data.size()) +
-                " elements, expect " + std::to_string(batch1_num_strings),
-            pa::GENERIC_ERROR);
-      }
-    }
-  }
-
-  for (const auto& output : *outputs) {
-    if (output.second.datatype_.compare("BYTES") != 0) {
-      const auto file_path = data_directory + "/" + output.second.name_;
-      std::string key_name(
-          output.second.name_ + "_" + std::to_string(0) + "_" +
-          std::to_string(0));
-      auto it = output_data_.emplace(key_name, std::vector<char>()).first;
-      if (!ReadFile(file_path, &it->second).IsOk()) {
-        output_data_.erase(it);
-      }
-    } else {
-      const auto file_path = data_directory + "/" + output.second.name_;
-      std::vector<std::string> output_string_data;
-      if (!ReadTextFile(file_path, &output_string_data).IsOk()) {
-        continue;
-      }
-      std::string key_name(
-          output.second.name_ + "_" + std::to_string(0) + "_" +
-          std::to_string(0));
-      auto it = output_data_.emplace(key_name, std::vector<char>()).first;
-      SerializeStringTensor(output_string_data, &it->second);
-    }
-  }
-  return cb::Error::Success;
-}
-
-cb::Error
-DataLoader::ReadDataFromJSON(
-    const std::shared_ptr<ModelTensorMap>& inputs,
-    const std::shared_ptr<ModelTensorMap>& outputs,
-    const std::string& json_file)
-{
-  FILE* data_file = fopen(json_file.c_str(), "r");
-  if (data_file == nullptr) {
-    return cb::Error(
-        "failed to open file for reading provided data", pa::GENERIC_ERROR);
-  }
-
-  char readBuffer[65536];
-  rapidjson::FileReadStream fs(data_file, readBuffer, sizeof(readBuffer));
-
-  rapidjson::Document d{};
-  const unsigned int parseFlags = rapidjson::kParseNanAndInfFlag;
-  d.ParseStream<parseFlags>(fs);
-
-  fclose(data_file);
-
-  return ParseData(d, inputs, outputs);
-}
-
-cb::Error
-DataLoader::ParseData(
-    const rapidjson::Document& json,
-    const std::shared_ptr<ModelTensorMap>& inputs,
-    const std::shared_ptr<ModelTensorMap>& outputs)
-{
-  if (json.HasParseError()) {
-    std::cerr << "cb::Error  : " << json.GetParseError() << '\n'
-              << "Offset : " << json.GetErrorOffset() << '\n';
-    return cb::Error(
-        "failed to parse the specified json file for reading provided data",
-        pa::GENERIC_ERROR);
-  }
-
-  if (!json.HasMember("data")) {
-    return cb::Error(
-        "The json file doesn't contain data field", pa::GENERIC_ERROR);
-  }
-
-  const rapidjson::Value& streams = json["data"];
-
-  // Validation data is optional, once provided, it must align with 'data'
-  const rapidjson::Value* out_streams = nullptr;
-  if (json.HasMember("validation_data")) {
-    out_streams = &json["validation_data"];
-    if (out_streams->Size() != streams.Size()) {
-      return cb::Error(
-          "The 'validation_data' field doesn't align with 'data' field in the "
-          "json file",
-          pa::GENERIC_ERROR);
-    }
-  }
-
-  int count = streams.Size();
-
-  data_stream_cnt_ += count;
-  int offset = step_num_.size();
-  for (size_t i = offset; i < data_stream_cnt_; i++) {
-    const rapidjson::Value& steps = streams[i - offset];
-    const rapidjson::Value* output_steps =
-        (out_streams == nullptr) ? nullptr : &(*out_streams)[i - offset];
-
-    RETURN_IF_ERROR(ValidateParsingMode(steps));
-
-    if (steps.IsArray()) {
-      step_num_.push_back(steps.Size());
-      for (size_t k = 0; k < step_num_[i]; k++) {
-        RETURN_IF_ERROR(ReadTensorData(steps[k], inputs, i, k, true));
-      }
-
-      if (output_steps != nullptr) {
-        if (!output_steps->IsArray() ||
-            (output_steps->Size() != steps.Size())) {
-          return cb::Error(
-              "The 'validation_data' field doesn't align with 'data' field in "
-              "the json file",
-              pa::GENERIC_ERROR);
-        }
-        for (size_t k = 0; k < step_num_[i]; k++) {
-          RETURN_IF_ERROR(
-              ReadTensorData((*output_steps)[k], outputs, i, k, false));
-        }
-      }
-    } else {
-      // There is no nesting of tensors, hence, will interpret streams as steps
-      // and add the tensors to a single stream '0'.
-      int offset = 0;
-      if (step_num_.empty()) {
-        step_num_.push_back(count);
-      } else {
-        offset = step_num_[0];
-        step_num_[0] += (count);
-      }
-      data_stream_cnt_ = 1;
-      for (size_t k = offset; k < step_num_[0]; k++) {
-        RETURN_IF_ERROR(
-            ReadTensorData(streams[k - offset], inputs, 0, k, true));
-      }
-
-      if (out_streams != nullptr) {
-        for (size_t k = offset; k < step_num_[0]; k++) {
-          RETURN_IF_ERROR(
-              ReadTensorData((*out_streams)[k - offset], outputs, 0, k, false));
-        }
-      }
-      break;
-    }
-  }
-
-
-  return cb::Error::Success;
-}
-
-cb::Error
-DataLoader::GenerateData(
-    std::shared_ptr<ModelTensorMap> inputs, const bool zero_input,
-    const size_t string_length, const std::string& string_data)
-{
-  // Data generation supports only a single data stream and step
-  // Not supported for inputs with dynamic shapes
-  data_stream_cnt_ = 1;
-  step_num_.push_back(1);
-
-  // Validate the absence of shape tensors
-  for (const auto& input : *inputs) {
-    if (input.second.is_shape_tensor_) {
-      return cb::Error(
-          "can not generate data for shape tensor '" + input.second.name_ +
-              "', user-provided data is needed.",
-          pa::GENERIC_ERROR);
-    }
-  }
-
-  uint64_t max_input_byte_size = 0;
-  for (const auto& input : *inputs) {
-    if (input.second.datatype_.compare("BYTES") != 0) {
-      int64_t byte_size = ByteSize(input.second.shape_, input.second.datatype_);
-      if (byte_size < 0) {
-        return cb::Error(
-            "input " + input.second.name_ +
-                " contains dynamic shape, provide shapes to send along with "
-                "the request",
-            pa::GENERIC_ERROR);
-      }
-      max_input_byte_size = std::max(max_input_byte_size, (size_t)byte_size);
-    } else {
-      // Generate string input and store it into map
-      std::vector<std::string> input_string_data;
-      int64_t batch1_num_strings = ElementCount(input.second.shape_);
-      if (batch1_num_strings == -1) {
-        return cb::Error(
-            "input " + input.second.name_ +
-                " contains dynamic shape, provide shapes to send along with "
-                "the request",
-            pa::GENERIC_ERROR);
-      }
-      input_string_data.resize(batch1_num_strings);
-      if (!string_data.empty()) {
-        for (size_t i = 0; i < batch1_num_strings; i++) {
-          input_string_data[i] = string_data;
-        }
-      } else {
-        for (size_t i = 0; i < batch1_num_strings; i++) {
-          input_string_data[i] = GetRandomString(string_length);
-        }
-      }
-
-      std::string key_name(
-          input.second.name_ + "_" + std::to_string(0) + "_" +
-          std::to_string(0));
-      auto it = input_data_.emplace(key_name, std::vector<char>()).first;
-      SerializeStringTensor(input_string_data, &it->second);
-    }
-  }
-
-  // Create a zero or randomly (as indicated by zero_input)
-  // initialized buffer that is large enough to provide the largest
-  // needed input. We (re)use this buffer for all non-string input values.
-  if (max_input_byte_size > 0) {
-    if (zero_input) {
-      input_buf_.resize(max_input_byte_size, 0);
-    } else {
-      input_buf_.resize(max_input_byte_size);
-      for (auto& byte : input_buf_) {
-        byte = rand();
-      }
-    }
-  }
-
-  return cb::Error::Success;
-}
-
-cb::Error
-DataLoader::GetInputData(
-    const ModelTensor& input, const int stream_id, const int step_id,
-    TensorData& data)
-{
-  data.data_ptr = nullptr;
-  data.batch1_size = 0;
-  data.is_valid = false;
-
-  // If json data is available then try to retrieve the data from there
-  if (!input_data_.empty()) {
-    RETURN_IF_ERROR(ValidateIndexes(stream_id, step_id));
-
-    std::string key_name(
-        input.name_ + "_" + std::to_string(stream_id) + "_" +
-        std::to_string(step_id));
-
-    // Get the data and the corresponding byte-size
-    auto it = input_data_.find(key_name);
-    if (it != input_data_.end()) {
-      std::vector<char>* data_vec = &it->second;
-      data.is_valid = true;
-      data.batch1_size = data_vec->size();
-      data.data_ptr = (const uint8_t*)data_vec->data();
-    }
-  }
-
-  if (!data.is_valid) {
-    if ((input.datatype_.compare("BYTES") != 0) && (input_buf_.size() != 0)) {
-      int64_t byte_size = ByteSize(input.shape_, input.datatype_);
-      if (byte_size < 0) {
-        return cb::Error(
-            "failed to get correct byte size for '" + input.name_ + "'.",
-            pa::GENERIC_ERROR);
-      }
-      data.batch1_size = (size_t)byte_size;
-      data.data_ptr = &input_buf_[0];
-      data.is_valid = true;
-    }
-  }
-
-  if (input.is_optional_ == false && !data.is_valid) {
-    return cb::Error(
-        "unable to find data for input '" + input.name_ + "'.",
-        pa::GENERIC_ERROR);
-  }
-
-  return cb::Error::Success;
-}
-
-cb::Error
-DataLoader::GetOutputData(
-    const std::string& output_name, const int stream_id, const int step_id,
-    TensorData& data)
-{
-  data.data_ptr = nullptr;
-  data.batch1_size = 0;
-  data.is_valid = false;
-  data.name = "";
-
-  // If json data is available then try to retrieve the data from there
-  if (!output_data_.empty()) {
-    RETURN_IF_ERROR(ValidateIndexes(stream_id, step_id));
-
-    std::string key_name(
-        output_name + "_" + std::to_string(stream_id) + "_" +
-        std::to_string(step_id));
-    // Get the data and the corresponding byte-size
-    auto it = output_data_.find(key_name);
-    if (it != output_data_.end()) {
-      std::vector<char>* data_vec = &it->second;
-      data.is_valid = true;
-      data.batch1_size = data_vec->size();
-      data.data_ptr = (const uint8_t*)data_vec->data();
-      data.name = output_name;
-    }
-  }
-  return cb::Error::Success;
-}
-
-cb::Error
-DataLoader::ValidateIndexes(int stream_id, int step_id)
-{
-  if (stream_id < 0 || stream_id >= (int)data_stream_cnt_) {
-    return cb::Error(
-        "stream_id for retrieving the data should be less than " +
-            std::to_string(data_stream_cnt_) + ", got " +
-            std::to_string(stream_id),
-        pa::GENERIC_ERROR);
-  }
-  if (step_id < 0 || step_id >= (int)step_num_[stream_id]) {
-    return cb::Error(
-        "step_id for retrieving the data should be less than " +
-            std::to_string(step_num_[stream_id]) + ", got " +
-            std::to_string(step_id),
-        pa::GENERIC_ERROR);
-  }
-  return cb::Error::Success;
-}
-
-
-cb::Error
-DataLoader::GetInputShape(
-    const ModelTensor& input, const int stream_id, const int step_id,
-    std::vector<int64_t>* provided_shape)
-{
-  std::string key_name(
-      input.name_ + "_" + std::to_string(stream_id) + "_" +
-      std::to_string(step_id));
-
-  provided_shape->clear();
-
-  // Prefer the values read from file over the ones provided from
-  // CLI
-  auto it = input_shapes_.find(key_name);
-  if (it != input_shapes_.end()) {
-    *provided_shape = it->second;
-  } else {
-    *provided_shape = input.shape_;
-  }
-  return cb::Error::Success;
-}
-
-cb::Error
-DataLoader::ReadTensorData(
-    const rapidjson::Value& step,
-    const std::shared_ptr<ModelTensorMap>& tensors, const int stream_index,
-    const int step_index, const bool is_input)
-{
-  std::unordered_set<std::string> model_io_names;
-  auto& tensor_data = is_input ? input_data_ : output_data_;
-  auto& tensor_shape = is_input ? input_shapes_ : output_shapes_;
-  for (const auto& io : *tensors) {
-    model_io_names.insert(io.first);
-    if (step.HasMember(io.first.c_str())) {
-      std::string key_name(
-          io.first + "_" + std::to_string(stream_index) + "_" +
-          std::to_string(step_index));
-
-      auto it = tensor_data.emplace(key_name, std::vector<char>()).first;
-
-      const rapidjson::Value& tensor = step[(io.first).c_str()];
-
-      const rapidjson::Value* content;
-
-      // Check if the input data file is malformed
-      if (!(tensor.IsArray() || tensor.IsObject())) {
-        return cb::Error("Input data file is malformed.", pa::GENERIC_ERROR);
-      }
-
-      if (tensor.IsArray()) {
-        content = &tensor;
-      } else {
-        // Populate the shape values first if available
-        if (tensor.HasMember("shape")) {
-          auto shape_it =
-              tensor_shape.emplace(key_name, std::vector<int64_t>()).first;
-          for (const auto& value : tensor["shape"].GetArray()) {
-            if (!value.IsInt()) {
-              return cb::Error(
-                  "shape values must be integers.", pa::GENERIC_ERROR);
-            }
-            shape_it->second.push_back(value.GetInt());
-          }
-        }
-
-        if (tensor.HasMember("b64")) {
-          content = &tensor;
-        } else {
-          if (!tensor.HasMember("content")) {
-            return cb::Error(
-                "missing content field. ( Location stream id: " +
-                    std::to_string(stream_index) +
-                    ", step id: " + std::to_string(step_index) + ")",
-                pa::GENERIC_ERROR);
-          }
-
-          content = &tensor["content"];
-        }
-      }
-
-      if (content->IsArray()) {
-        RETURN_IF_ERROR(SerializeExplicitTensor(
-            *content, io.second.datatype_, &it->second));
-      } else {
-        if (content->IsObject() && content->HasMember("b64")) {
-          if ((*content)["b64"].IsString()) {
-            const std::string& encoded = (*content)["b64"].GetString();
-            it->second.resize(encoded.length());
-            base64::decoder D;
-            int size =
-                D.decode(encoded.c_str(), encoded.length(), &it->second[0]);
-            it->second.resize(size);
-          } else {
-            return cb::Error(
-                "the value of b64 field should be of type string ( "
-                "Location stream id: " +
-                    std::to_string(stream_index) +
-                    ", step id: " + std::to_string(step_index) + ")",
-                pa::GENERIC_ERROR);
-          }
-        } else {
-          return cb::Error(
-              "The tensor values are not supported. Expected an array or "
-              "b64 string ( Location stream id: " +
-                  std::to_string(stream_index) +
-                  ", step id: " + std::to_string(step_index) + ")",
-              pa::GENERIC_ERROR);
-        }
-      }
-
-      RETURN_IF_ERROR(ValidateTensor(io.second, stream_index, step_index));
-
-    } else if (io.second.is_optional_ == false) {
-      return cb::Error(
-          "missing tensor " + io.first +
-              " ( Location stream id: " + std::to_string(stream_index) +
-              ", step id: " + std::to_string(step_index) + ")",
-          pa::GENERIC_ERROR);
-    }
-  }
-
-  // Add allowed non-model inputs/outputs to the model_io_names set
-  model_io_names.insert("model");
-
-  for (auto itr = step.MemberBegin(); itr != step.MemberEnd(); ++itr) {
-    if (model_io_names.find(itr->name.GetString()) == model_io_names.end()) {
-      return cb::Error(
-          "The input or output '" + std::string(itr->name.GetString()) +
-              "' is not found in the model configuration",
-          pa::GENERIC_ERROR);
-    }
-  }
-
-
-  return cb::Error::Success;
-}
-
-
-cb::Error
-DataLoader::ReadFile(const std::string& path, std::vector<char>* contents)
-{
-  std::ifstream in(path, std::ios::in | std::ios::binary);
-  if (!in) {
-    return cb::Error("failed to open file '" + path + "'", pa::GENERIC_ERROR);
-  }
-
-  in.seekg(0, std::ios::end);
-
-  int file_size = in.tellg();
-  if (file_size > 0) {
-    contents->resize(file_size);
-    in.seekg(0, std::ios::beg);
-    in.read(&(*contents)[0], contents->size());
-  }
-
-  in.close();
-
-  // If size is invalid, report after ifstream is closed
-  if (file_size < 0) {
-    return cb::Error(
-        "failed to get size for file '" + path + "'", pa::GENERIC_ERROR);
-  } else if (file_size == 0) {
-    return cb::Error("file '" + path + "' is empty", pa::GENERIC_ERROR);
-  }
-
-  return cb::Error::Success;
-}
-
-cb::Error
-DataLoader::ReadTextFile(
-    const std::string& path, std::vector<std::string>* contents)
-{
-  std::ifstream in(path);
-  if (!in) {
-    return cb::Error("failed to open file '" + path + "'", pa::GENERIC_ERROR);
-  }
-
-  std::string current_string;
-  while (std::getline(in, current_string)) {
-    contents->push_back(current_string);
-  }
-  in.close();
-
-  if (contents->size() == 0) {
-    return cb::Error("file '" + path + "' is empty", pa::GENERIC_ERROR);
-  }
-  return cb::Error::Success;
-}
-
-cb::Error
-DataLoader::ValidateTensor(
-    const ModelTensor& model_tensor, const int stream_index,
-    const int step_index)
-{
-  std::string key_name(
-      model_tensor.name_ + "_" + std::to_string(stream_index) + "_" +
-      std::to_string(step_index));
-
-  auto data_it = input_data_.find(key_name);
-  if (data_it == input_data_.end()) {
-    data_it = output_data_.find(key_name);
-  }
-  if (data_it == output_data_.end()) {
-    return cb::Error("Can't validate a nonexistent tensor");
-  }
-
-  auto shape_it = input_shapes_.find(key_name);
-
-  const std::vector<char>& data = data_it->second;
-  const std::vector<int64_t>& shape = (shape_it == input_shapes_.end())
-                                          ? model_tensor.shape_
-                                          : shape_it->second;
-
-  int64_t batch1_byte = ByteSize(shape, model_tensor.datatype_);
-
-  RETURN_IF_ERROR(ValidateTensorShape(shape, model_tensor));
-  RETURN_IF_ERROR(ValidateTensorDataSize(data, batch1_byte, model_tensor));
-
-  return cb::Error::Success;
-}
-
-cb::Error
-DataLoader::ValidateTensorShape(
-    const std::vector<int64_t>& shape, const ModelTensor& model_tensor)
-{
-  int element_count = ElementCount(shape);
-  if (element_count < 0) {
-    return cb::Error(
-        "The variable-sized tensor \"" + model_tensor.name_ +
-            "\" with model shape " + ShapeVecToString(model_tensor.shape_) +
-            " needs to have its shape fully defined. See the --shape option.",
-        pa::GENERIC_ERROR);
-  }
-
-  bool is_error = false;
-
-  if (shape.size() != model_tensor.shape_.size()) {
-    is_error = true;
-  }
-
-  for (size_t i = 0; i < shape.size() && !is_error; i++) {
-    if (shape[i] != model_tensor.shape_[i] && model_tensor.shape_[i] != -1) {
-      is_error = true;
-    }
-  }
-
-  if (is_error) {
-    return cb::Error(
-        "The supplied shape of " + ShapeVecToString(shape) + " for input \"" +
-        model_tensor.name_ +
-        "\" is incompatible with the model's input shape of " +
-        ShapeVecToString(model_tensor.shape_));
-  }
-
-  return cb::Error::Success;
-}
-
-cb::Error
-DataLoader::ValidateTensorDataSize(
-    const std::vector<char>& data, int64_t batch1_byte,
-    const ModelTensor& model_tensor)
-{
-  // Validate that the supplied data matches the amount of data expected based
-  // on the shape
-  if (batch1_byte > 0 && (size_t)batch1_byte != data.size()) {
-    return cb::Error(
-        "mismatch in the data provided for " + model_tensor.name_ +
-            ". Expected: " + std::to_string(batch1_byte) +
-            " bytes, Got: " + std::to_string(data.size()) + " bytes",
-        pa::GENERIC_ERROR);
-  }
-
-  return cb::Error::Success;
-}
-
-cb::Error
-DataLoader::ValidateParsingMode(const rapidjson::Value& steps)
-{
-  // If our first time parsing data, capture the mode
-  if (step_num_.size() == 0) {
-    multiple_stream_mode_ = steps.IsArray();
-  } else {
-    if (steps.IsArray() != multiple_stream_mode_) {
-      return cb::Error(
-          "Inconsistency in input-data provided. Can not have a combination of "
-          "objects and arrays inside of the Data array",
-          pa::GENERIC_ERROR);
-    }
-  }
-  return cb::Error::Success;
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/data_loader.h b/src/c++/perf_analyzer/data_loader.h
deleted file mode 100644
index 2f83f959f..000000000
--- a/src/c++/perf_analyzer/data_loader.h
+++ /dev/null
@@ -1,246 +0,0 @@
-// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <filesystem>
-#include <fstream>
-#include <unordered_set>
-
-#include "model_parser.h"
-#include "perf_utils.h"
-#include "tensor_data.h"
-
-namespace triton { namespace perfanalyzer {
-
-#ifndef DOCTEST_CONFIG_DISABLE
-class NaggyMockDataLoader;
-#endif
-
-
-class DataLoader {
- public:
-  DataLoader(size_t batch_size);
-
-  /// Returns the total number of data streams available.
-  size_t GetDataStreamsCount() { return data_stream_cnt_; }
-
-  /// Returns the total data steps supported for a requested data stream
-  /// id.
-  /// \param stream_id The target stream id
-  virtual size_t GetTotalSteps(size_t stream_id)
-  {
-    if (stream_id < data_stream_cnt_) {
-      return step_num_[stream_id];
-    }
-    return 0;
-  }
-
-  /// Validate user-supplied inputs and outputs exist in the model
-  /// \param inputs The pointer to the map holding the information about
-  /// input tensors of a model
-  /// \param outputs The pointer to the map holding the information about
-  /// output tensors of a model
-  /// \param data_directory The path to the directory containing the data
-  cb::Error ValidateIOExistsInModel(
-      const std::shared_ptr<ModelTensorMap>& inputs,
-      const std::shared_ptr<ModelTensorMap>& outputs,
-      const std::string& data_directory);
-
-  /// Reads the input data from the specified data directory.
-  /// \param inputs The pointer to the map holding the information about
-  /// input tensors of a model
-  /// \param outputs The pointer to the map holding the information about
-  /// output tensors of a model
-  /// \param data_directory The path to the directory containing the data
-  cb::Error ReadDataFromDir(
-      const std::shared_ptr<ModelTensorMap>& inputs,
-      const std::shared_ptr<ModelTensorMap>& outputs,
-      const std::string& data_directory);
-
-  /// Reads the input data from the specified json file.
-  /// \param inputs The pointer to the map holding the information about
-  /// input tensors of a model
-  /// \param json_file The json file containing the user-provided input
-  /// data.
-  /// Returns error object indicating status
-  virtual cb::Error ReadDataFromJSON(
-      const std::shared_ptr<ModelTensorMap>& inputs,
-      const std::shared_ptr<ModelTensorMap>& outputs,
-      const std::string& json_file);
-
-  /// Generates the input data to use with the inference requests
-  /// \param inputs The pointer to the map holding the information about
-  /// input tensors of a model
-  /// \param zero_input Whether or not to use zero value for buffer
-  /// initialization.
-  /// \param string_length The length of the string to generate for
-  /// tensor inputs.
-  /// \param string_data The user provided string to use to populate
-  /// string tensors
-  /// Returns error object indicating status
-  cb::Error GenerateData(
-      std::shared_ptr<ModelTensorMap> inputs, const bool zero_input,
-      const size_t string_length, const std::string& string_data);
-
-  /// Helper function to access data for the specified input
-  /// \param input The target model input tensor
-  /// \param stream_id The data stream_id to use for retrieving input data.
-  /// \param step_id The data step_id to use for retrieving input data.
-  /// \param data Returns the input TensorData
-  /// Returns error object indicating status
-  cb::Error GetInputData(
-      const ModelTensor& input, const int stream_id, const int step_id,
-      TensorData& data);
-
-  /// Helper function to get the shape values to the input
-  /// \param input The target model input tensor
-  /// \param stream_id The data stream_id to use for retrieving input shape.
-  /// \param step_id The data step_id to use for retrieving input shape.
-  /// \param shape returns the pointer to the vector containing the shape
-  /// values.
-  /// Returns error object indicating status
-  cb::Error GetInputShape(
-      const ModelTensor& input, const int stream_id, const int step_id,
-      std::vector<int64_t>* shape);
-
-  /// Helper function to access data for the specified output. nullptr will be
-  /// returned if there is no data specified.
-  /// \param output_name The name of the output tensor
-  /// \param stream_id The data stream_id to use for retrieving output data.
-  /// \param step_id The data step_id to use for retrieving output data.
-  /// \param data Returns the output TensorData
-  /// Returns error object indicating status
-  cb::Error GetOutputData(
-      const std::string& output_name, const int stream_id, const int step_id,
-      TensorData& data);
-
-  /// Return an error if the stream index or step index are invalid
-  cb::Error ValidateIndexes(int stream_index, int step_index);
-
- protected:
-  /// Parses the input and output data from the json document
-  /// \param inputs The input tensors of a model
-  /// \param outputs The output tensors of a model
-  /// \param json The json document containing the raw json inputs/outputs
-  /// \return Returns error object indicating status
-  cb::Error ParseData(
-      const rapidjson::Document& json,
-      const std::shared_ptr<ModelTensorMap>& inputs,
-      const std::shared_ptr<ModelTensorMap>& outputs);
-
- private:
-  /// Reads the data from file specified by path into vector of characters
-  /// \param path The complete path to the file to be read
-  /// \param contents The character vector that will contain the data read
-  /// \return error status. Returns Non-Ok if an error is encountered during
-  ///  read operation.
-  virtual cb::Error ReadFile(
-      const std::string& path, std::vector<char>* contents);
-
-  /// Reads the string from file specified by path into vector of strings
-  /// \param path The complete path to the file to be read
-  /// \param contents The string vector that will contain the data read
-  /// \return error status. Returns Non-Ok if an error is encountered during
-  ///  read operation.
-  virtual cb::Error ReadTextFile(
-      const std::string& path, std::vector<std::string>* contents);
-
-  /// Helper function to read data for the specified input from json
-  /// \param step the DOM for current step
-  /// \param inputs The pointer to the map holding the information about
-  /// input tensors of a model
-  /// \param stream_index the stream index the data should be exported to.
-  /// \param step_index the step index the data should be exported to.
-  /// Returns error object indicating status
-  cb::Error ReadTensorData(
-      const rapidjson::Value& step,
-      const std::shared_ptr<ModelTensorMap>& tensors, const int stream_index,
-      const int step_index, const bool is_input);
-
-  /// Helper function to validate the provided data and shape for the tensor
-  /// \param input The target model input or output tensor
-  /// \param stream_index the stream index the data should be exported to.
-  /// \param step_index the step index the data should be exported to.
-  /// Returns error object indicating status
-  cb::Error ValidateTensor(
-      const ModelTensor& model_tensor, const int stream_index,
-      const int step_index);
-
-  /// Helper function to validate the provided shape for a tensor
-  /// \param shape Shape for the tensor
-  /// \param model_tensor The tensor to validate
-  /// Returns error object indicating status
-  cb::Error ValidateTensorShape(
-      const std::vector<int64_t>& shape, const ModelTensor& model_tensor);
-
-  /// Helper function to validate the provided data's size
-  /// \param data The provided data for the tensor
-  /// \param batch1_byte The expected number of bytes of data
-  /// \param model_tensor The tensor to validate
-  /// Returns error object indicating status
-  cb::Error ValidateTensorDataSize(
-      const std::vector<char>& data, int64_t batch1_byte,
-      const ModelTensor& model_tensor);
-
-  /// Helper function to validate consistency of parsing mode for provided input
-  /// data.  The code explicitly does not support a mixture of objects (multiple
-  /// entries of a single stream) and arrays (multiple streams)
-  ///
-  /// \param steps The json data provided for one or multiple streams
-  cb::Error ValidateParsingMode(const rapidjson::Value& steps);
-
-  // The batch_size_ for the data
-  size_t batch_size_{1};
-  // The total number of data streams available.
-  size_t data_stream_cnt_{0};
-  // A vector containing the supported step number for respective stream
-  // ids.
-  std::vector<size_t> step_num_;
-
-  // User provided input data, it will be preferred over synthetic data
-  std::unordered_map<std::string, std::vector<char>> input_data_;
-  std::unordered_map<std::string, std::vector<int64_t>> input_shapes_;
-
-  // User provided output data for validation
-  std::unordered_map<std::string, std::vector<char>> output_data_;
-  std::unordered_map<std::string, std::vector<int64_t>> output_shapes_;
-
-  // Placeholder for generated input data, which will be used for all inputs
-  // except string
-  std::vector<uint8_t> input_buf_;
-
-  // Tracks what type of input data has been provided
-  bool multiple_stream_mode_ = false;
-
-#ifndef DOCTEST_CONFIG_DISABLE
-  friend NaggyMockDataLoader;
-
- public:
-  DataLoader() = default;
-#endif
-};
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/docs/README.md b/src/c++/perf_analyzer/docs/README.md
deleted file mode 100644
index 34f33475a..000000000
--- a/src/c++/perf_analyzer/docs/README.md
+++ /dev/null
@@ -1,55 +0,0 @@
-<!--
-Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
- * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
- * Neither the name of NVIDIA CORPORATION nor the names of its
-   contributors may be used to endorse or promote products derived
-   from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--->
-
-# Perf Analyzer Documentation
-
-| [Installation](README.md#installation) | [Getting Started](README.md#getting-started) | [User Guide](README.md#user-guide) |
-| -------------------------------------- | -------------------------------------------- | ---------------------------------- |
-
-## **Installation**
-
-See the [Installation Guide](install.md) for details on how to install Perf
-Analyzer.
-
-## **Getting Started**
-
-The [Quick Start Guide](quick_start.md) will show you how to use Perf
-Analyzer to profile a simple PyTorch model.
-
-## **User Guide**
-
-The User Guide describes the Perf Analyzer command line options, how to specify
-model input data, the performance measurement modes, the performance metrics and
-outputs, how to benchmark different servers, and more.
-
-- [Perf Analyzer CLI](cli.md)
-- [Inference Load Modes](inference_load_modes.md)
-- [Input Data](input_data.md)
-- [Measurements & Metrics](measurements_metrics.md)
-- [Benchmarking](benchmarking.md)
-- [Large Language Models (LLMs)](../genai-perf/README.md)
diff --git a/src/c++/perf_analyzer/docs/benchmarking.md b/src/c++/perf_analyzer/docs/benchmarking.md
deleted file mode 100644
index 96f1ad3a8..000000000
--- a/src/c++/perf_analyzer/docs/benchmarking.md
+++ /dev/null
@@ -1,250 +0,0 @@
-<!--
-Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
- * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
- * Neither the name of NVIDIA CORPORATION nor the names of its
-   contributors may be used to endorse or promote products derived
-   from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--->
-
-# Benchmarking Triton via HTTP or gRPC endpoint
-
-This is the default mode for Perf Analyzer.
-
-# Benchmarking Triton directly via C API
-
-Besides using HTTP or gRPC server endpoints to communicate with Triton, Perf
-Analyzer also allows users to benchmark Triton directly using the C API. HTTP
-and gRPC endpoints introduce an additional latency in the pipeline which may not
-be of interest to users who are using Triton via C API within their application.
-Specifically, this feature is useful to benchmark a bare minimum Triton without
-additional overheads from HTTP/gRPC communication.
-
-## Prerequisite
-
-Pull the Triton SDK and the Triton Server container images on target machine.
-Since you will need access to the `tritonserver` install, it might be easier if
-you copy the `perf_analyzer` binary to the Inference Server container.
-
-## Required parameters
-
-Use the [`--help`](cli.md#--help) option to see a complete list of supported
-command line arguments. By default, Perf Analyzer expects the Triton instance to
-already be running. You can configure C API mode using the
-[`--service-kind`](cli.md#--service-kindtritontriton_c_apitfservingtorchserve)
-option. In addition, you will need to point Perf Analyzer to the Triton server
-library path using the
-[`--triton-server-directory`](cli.md#--triton-server-directorypath) option and
-the model repository path using the
-[`--model-repository`](cli.md#--model-repositorypath) option.
-
-An example run would look like:
-
-```
-$ perf_analyzer -m my_model --service-kind=triton_c_api --triton-server-directory=/opt/tritonserver --model-repository=/my/model/repository
-...
-*** Measurement Settings ***
-  Service Kind: Triton C-API
-  Using "time_windows" mode for stabilization
-  Measurement window: 5000 msec
-  Using synchronous calls for inference
-  Stabilizing using average latency
-
-Request concurrency: 1
-  Client:
-    Request count: 353
-    Throughput: 19.6095 infer/sec
-    Avg latency: 50951 usec (standard deviation 2265 usec)
-    p50 latency: 50833 usec
-    p90 latency: 50923 usec
-    p95 latency: 50940 usec
-    p99 latency: 50985 usec
-
-  Server:
-    Inference count: 353
-    Execution count: 353
-    Successful request count: 353
-    Avg request latency: 50841 usec (overhead 20 usec + queue 63 usec + compute input 35 usec + compute infer 50663 usec + compute output 59 usec)
-
-Inferences/Second vs. Client Average Batch Latency
-Concurrency: 1, throughput: 19.6095 infer/sec, latency 50951 usec
-```
-
-## Non-supported functionalities
-
-There are a few functionalities that are missing from C API mode. They are:
-
-1. Async mode ([`--async`](cli.md#--async))
-2. For additional known non-working cases, please refer to
-   [qa/L0_perf_analyzer_capi/test.sh](https://github.com/triton-inference-server/server/blob/main/qa/L0_perf_analyzer_capi/test.sh#L239-L277)
-
-# Benchmarking TensorFlow Serving
-
-Perf Analyzer can also be used to benchmark models deployed on
-[TensorFlow Serving](https://github.com/tensorflow/serving) using the
-[`--service-kind=tfserving`](cli.md#--service-kindtritontriton_c_apitfservingtorchserve)
-option. Only gRPC protocol is supported.
-
-The following invocation demonstrates how to configure Perf Analyzer to issue
-requests to a running instance of `tensorflow_model_server`:
-
-```
-$ perf_analyzer -m resnet50 --service-kind tfserving -i grpc -b 1 -p 5000 -u localhost:8500
-*** Measurement Settings ***
-  Batch size: 1
-  Using "time_windows" mode for stabilization
-  Measurement window: 5000 msec
-  Using synchronous calls for inference
-  Stabilizing using average latency
-Request concurrency: 1
-  Client:
-    Request count: 829
-    Throughput: 165.8 infer/sec
-    Avg latency: 6032 usec (standard deviation 569 usec)
-    p50 latency: 5863 usec
-    p90 latency: 6655 usec
-    p95 latency: 6974 usec
-    p99 latency: 8093 usec
-    Avg gRPC time: 5984 usec ((un)marshal request/response 257 usec + response wait 5727 usec)
-Inferences/Second vs. Client Average Batch Latency
-Concurrency: 1, throughput: 165.8 infer/sec, latency 6032 usec
-```
-
-You might have to specify a different url ([`-u`](cli.md#-u-url)) to access
-wherever the server is running. The report of Perf Analyzer will only include
-statistics measured at the client-side.
-
-**NOTE:** The support is still in **beta**. Perf Analyzer does not guarantee
-optimal tuning for TensorFlow Serving. However, a single benchmarking tool that
-can be used to stress the inference servers in an identical manner is important
-for performance analysis.
-
-The following points are important for interpreting the results:
-
-1. `Concurrent Request Execution`:
-   TensorFlow Serving (TFS), as of version 2.8.0, by default creates threads for
-   each request that individually submits requests to TensorFlow Session. There
-   is a resource limit on the number of concurrent threads serving requests.
-   When benchmarking at a higher request concurrency, you can see higher
-   throughput because of this. Unlike TFS, by default Triton is configured with
-   only a single
-   [instance count](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/model_configuration.md#instance-groups).
-   Hence, at a higher request concurrency, most of the requests are blocked on
-   the instance availability. To configure Triton to behave like TFS, set the
-   instance count to a reasonably high value and then set
-   [MAX_SESSION_SHARE_COUNT](https://github.com/triton-inference-server/tensorflow_backend#parameters)
-   parameter in the model `config.pbtxt` to the same value. For some context,
-   the TFS sets its thread constraint to four times the num of schedulable CPUs.
-2. `Different library versions`:
-   The version of TensorFlow might differ between Triton and TensorFlow Serving
-   being benchmarked. Even the versions of CUDA libraries might differ between
-   the two solutions. The performance of models can be susceptible to the
-   versions of these libraries. For a single request concurrency, if the
-   `compute_infer` time reported by Perf Analyzer when benchmarking Triton is as
-   large as the latency reported by Perf Analyzer when benchmarking TFS, then
-   the performance difference is likely because of the difference in the
-   software stack and outside the scope of Triton.
-3. `CPU Optimization`:
-   TFS has separate builds for CPU and GPU targets. They have target-specific
-   optimization. Unlike TFS, Triton has a single build which is optimized for
-   execution on GPUs. When collecting performance on CPU models on Triton, try
-   running Triton with the environment variable `TF_ENABLE_ONEDNN_OPTS=1`.
-
-# Benchmarking TorchServe
-
-Perf Analyzer can also be used to benchmark
-[TorchServe](https://github.com/pytorch/serve) using the
-[`--service-kind=torchserve`](cli.md#--service-kindtritontriton_c_apitfservingtorchserve)
-option. Only HTTP protocol is supported. It also requires input to be provided
-via JSON file.
-
-The following invocation demonstrates how to configure Perf Analyzer to issue
-requests to a running instance of `torchserve` assuming the location holds
-`kitten_small.jpg`:
-
-```
-$ perf_analyzer -m resnet50 --service-kind torchserve -i http -u localhost:8080 -b 1 -p 5000 --input-data data.json
- Successfully read data for 1 stream/streams with 1 step/steps.
-*** Measurement Settings ***
-  Batch size: 1
-  Using "time_windows" mode for stabilization
-  Measurement window: 5000 msec
-  Using synchronous calls for inference
-  Stabilizing using average latency
-Request concurrency: 1
-  Client:
-    Request count: 799
-    Throughput: 159.8 infer/sec
-    Avg latency: 6259 usec (standard deviation 397 usec)
-    p50 latency: 6305 usec
-    p90 latency: 6448 usec
-    p95 latency: 6494 usec
-    p99 latency: 7158 usec
-    Avg HTTP time: 6272 usec (send/recv 77 usec + response wait 6195 usec)
-Inferences/Second vs. Client Average Batch Latency
-Concurrency: 1, throughput: 159.8 infer/sec, latency 6259 usec
-```
-
-The content of `data.json`:
-
-```json
- {
-   "data" :
-    [
-       {
-         "TORCHSERVE_INPUT" : ["kitten_small.jpg"]
-       }
-     ]
- }
-```
-
-You might have to specify a different url ([`-u`](cli.md#-u-url)) to access
-wherever the server is running. The report of Perf Analyzer will only include
-statistics measured at the client-side.
-
-**NOTE:** The support is still in **beta**. Perf Analyzer does not guarantee
-optimal tuning for TorchServe. However, a single benchmarking tool that can be
-used to stress the inference servers in an identical manner is important for
-performance analysis.
-
-# Advantages of using Perf Analyzer over third-party benchmark suites
-
-Triton Inference Server offers the entire serving solution which includes
-[client libraries](https://github.com/triton-inference-server/client) that are
-optimized for Triton. Using third-party benchmark suites like `jmeter` fails to
-take advantage of the optimized libraries. Some of these optimizations includes
-but are not limited to:
-
-1. Using
-   [binary tensor data extension](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_binary_data.md#binary-tensor-data-extension)
-   with HTTP requests.
-2. Effective re-use of gRPC message allocation in subsequent requests.
-3. Avoiding extra memory copy via libcurl interface.
-
-These optimizations can have a tremendous impact on overall performance. Using
-Perf Analyzer for benchmarking directly allows a user to access these
-optimizations in their study.
-
-Not only that, Perf Analyzer is also very customizable and supports many Triton
-features as described in this document. This, along with a detailed report,
-allows a user to identify performance bottlenecks and experiment with different
-features before deciding upon what works best for them.
diff --git a/src/c++/perf_analyzer/docs/cli.md b/src/c++/perf_analyzer/docs/cli.md
deleted file mode 100644
index bd82415c8..000000000
--- a/src/c++/perf_analyzer/docs/cli.md
+++ /dev/null
@@ -1,663 +0,0 @@
-<!--
-Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
- * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
- * Neither the name of NVIDIA CORPORATION nor the names of its
-   contributors may be used to endorse or promote products derived
-   from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--->
-
-# Perf Analyzer CLI
-
-This document details the Perf Analyzer command line interface:
-
-- [General Options](#general-options)
-- [Measurement Options](#measurement-options)
-- [Sequence Model Options](#sequence-model-options)
-- [Input Data Options](#input-data-options)
-- [Request Options](#request-options)
-- [Server Options](#server-options)
-- [Prometheus Metrics Options](#prometheus-metrics-options)
-- [Report Options](#report-options)
-- [Trace Options](#trace-options)
-- [Deprecated Options](#deprecated-options)
-
-## General Options
-
-#### `-?`
-#### `-h`
-#### `--help`
-
-Prints a description of the Perf Analyzer command line interface.
-
-#### `-m <string>`
-
-Specifies the model name for Perf Analyzer to run.
-
-This is a required option.
-
-#### `-x <string>`
-
-Specifies the version of the model to be used. If not specified the most
-recent version (the highest numbered version) of the model will be used.
-
-#### `--service-kind=[triton|triton_c_api|tfserving|torchserve]`
-
-Specifies the kind of service for Perf Analyzer to generate load for. Note: in
-order to use `torchserve` backend, the `--input-data` option must point to a
-JSON file holding data in the following format:
-
-```
-{
-  "data": [
-    {
-      "TORCHSERVE_INPUT": [
-        "<complete path to the content file>"
-      ]
-    },
-    {...},
-    ...
-  ]
-}
-```
-
-The type of file here will depend on the model. In order to use `triton_c_api`
-you must specify the Triton server install path and the model repository path
-via the `--triton-server-directory` and `--model-repository` options.
-
-Default is `triton`.
-
-#### `--bls-composing-models=<string>`
-
-Specifies the list of all BLS composing models as a comma separated list of
-model names (with optional model version number after a colon for each) that may
-be called by the input BLS model. For example,
-`--bls-composing-models=modelA:3,modelB` would specify that modelA and modelB
-are composing models that may be called by the input BLS model, and that modelA
-will use version 3, while modelB's version is unspecified.
-
-#### `--model-signature-name=<string>`
-
-Specifies the signature name of the saved model to use.
-
-Default is `serving_default`. This option will be ignored if `--service-kind`
-is not `tfserving`.
-
-#### `-v`
-
-Enables verbose mode. May be specified an additional time (`-v -v`) to enable
-extra verbose mode.
-
-## Measurement Options
-
-#### `--measurement-mode=[time_windows|count_windows]`
-
-Specifies the mode used for stabilizing measurements. 'time_windows' will
-create windows such that the duration of each window is equal to
-`--measurement-interval`. 'count_windows' will create windows such that there
-are at least `--measurement-request-count` requests in each window and that
-the window is at least one second in duration (adding more requests if
-necessary).
-
-Default is `time_windows`.
-
-#### `-p <n>`
-#### `--measurement-interval=<n>`
-
-Specifies the time interval used for each measurement in milliseconds when
-`--measurement-mode=time_windows` is used. Perf Analyzer will sample a time
-interval specified by this option and take measurement over the requests
-completed within that time interval.
-
-Default is `5000`.
-
-#### `--measurement-request-count=<n>`
-
-Specifies the minimum number of requests to be collected in each measurement
-window when `--measurement-mode=count_windows` is used.
-
-Default is `50`.
-
-#### `-s <n>`
-#### `--stability-percentage=<n>`
-
-Specifies the allowed variation in latency measurements when determining if a
-result is stable. The measurement is considered stable if the ratio of max /
-min from the recent 3 measurements is within (stability percentage)% in terms
-of both inferences per second and latency.
-
-Default is `10`(%).
-
-#### `--percentile=<n>`
-
-Specifies the confidence value as a percentile that will be used to determine
-if a measurement is stable. For example, a value of `85` indicates that the
-85th percentile latency will be used to determine stability. The percentile
-will also be reported in the results.
-
-Default is `-1` indicating that the average latency is used to determine
-stability.
-
-#### `--request-count=<n>`
-
-Specifies a total number of requests to use for measurement.
-
-Default is `0`, which means that there is no request count and the measurement
-will proceed using windows until stabilization is detected.
-
-#### `-r <n>`
-#### `--max-trials=<n>`
-
-Specifies the maximum number of measurements when attempting to reach stability
-of inferences per second and latency for each concurrency or request rate
-during the search. Perf Analyzer will terminate if the measurement is still
-unstable after the maximum number of trials.
-
-Default is `10`.
-
-#### `--concurrency-range=<start:end:step>`
-
-Specifies the range of concurrency levels covered by Perf Analyzer. Perf
-Analyzer will start from the concurrency level of 'start' and go until 'end'
-with a stride of 'step'.
-
-Default of 'start', 'end', and 'step' are `1`. If 'end' is not specified then
-Perf Analyzer will run for a single concurrency level determined by 'start'. If
-'end' is set as `0`, then the concurrency limit will be incremented by 'step'
-until the latency threshold is met. 'end' and `--latency-threshold` cannot
-both be `0`. 'end' cannot be `0` for sequence models while using asynchronous
-mode.
-
-#### `--periodic-concurrency-range=<start:end:step>`
-
-Specifies the range of concurrency levels in the similar but slightly different
-manner as the `--concurrency-range`. Perf Analyzer will start from the
-concurrency level of 'start' and increase by 'step' each time. Unlike
-`--concurrency-range`, the 'end' indicates the *total* number of concurrency
-since the 'start' (including) and will stop increasing once the cumulative
-number of concurrent requests has reached the 'end'. The user can specify
-*when* to periodically increase the concurrency level using the
-`--request-period` option. The concurrency level will periodically increase for
-every `n`-th response specified by `--request-period`. Since this disables
-stability check in Perf Analyzer and reports response timestamps only, the user
-must provide `--profile-export-file` to specify where to dump all the measured
-timestamps.
-
-The default values of 'start', 'end', and 'step' are `1`.
-
-#### `--request-period=<n>`
-
-Specifies the number of responses that each request must receive before new,
-concurrent requests are sent when `--periodic-concurrency-range` is specified.
-
-Default value is `10`.
-
-#### `--request-parameter=<name:value:type>`
-
-Specifies a custom parameter that can be sent to a Triton backend as part of
-the request. For example, providing '--request-parameter max_tokens:256:int'
-to the command line will set an additional parameter 'max_tokens' of type
-'int' to 256 as part of the request. The --request-parameter may be specified
-multiple times for different custom parameters.
-
-Valid `type` values are: `bool`, `int`, and `string`.
-
-> **NOTE**
->
-> The `--request-parameter` is currently only supported by gRPC protocol.
-
-#### `--request-rate-range=<start:end:step>`
-
-Specifies the range of request rates for load generated by Perf Analyzer. This
-option can take floating-point values. The search along the request rate range
-is enabled only when using this option.
-
-If not specified, then Perf Analyzer will search along the concurrency range.
-Perf Analyzer will start from the request rate of 'start' and go until 'end'
-with a stride of 'step'. Default values of 'start', 'end' and 'step' are all
-`1.0`. If 'end' is not specified, then Perf Analyzer will run for a single
-request rate as determined by 'start'. If 'end' is set as `0.0`, then the
-request rate will be incremented by 'step' until the latency threshold is met.
-'end' and `--latency-threshold` can not be both `0`.
-
-#### `--request-distribution=[constant|poisson]`
-
-Specifies the time interval distribution between dispatching inference requests
-to the server. Poisson distribution closely mimics the real-world work load on
-a server. This option is ignored if not using `--request-rate-range`.
-
-Default is `constant`.
-
-#### `-l <n>`
-#### `--latency-threshold=<n>`
-
-Specifies the limit on the observed latency, in milliseconds. Perf Analyzer
-will terminate the concurrency or request rate search once the measured latency
-exceeds this threshold.
-
-Default is `0` indicating that Perf Analyzer will run for the entire
-concurrency or request rate range.
-
-#### `--binary-search`
-
-Enables binary search on the specified search range (concurrency or request
-rate). This option requires 'start' and 'end' to be expilicitly specified in
-the concurrency range or request rate range. When using this option, 'step' is
-more like the precision. When the 'step' is lower, there are more iterations
-along the search path to find suitable convergence.
-
-When `--binary-search` is not specified, linear search is used.
-
-#### `--request-intervals=<path>`
-
-Specifies a path to a file containing time intervals in microseconds. Each time
-interval should be in a new line. Perf Analyzer will try to maintain time
-intervals between successive generated requests to be as close as possible in
-this file. This option can be used to apply custom load to server with a
-certain pattern of interest. Perf Analyzer will loop around the file if the
-duration of execution exceeds the amount of time specified by the intervals.
-This option can not be used with `--request-rate-range` or
-`--concurrency-range`.
-
-#### `--max-threads=<n>`
-
-Specifies the maximum number of threads that will be created for providing
-desired concurrency or request rate. However, when running in synchronous mode
-with `--concurrency-range` having explicit 'end' specification, this value will
-be ignored.
-
-Default is `4` if `--request-rate-range` is specified, otherwise default is
-`16`.
-
-## Sequence Model Options
-
-#### `--num-of-sequences=<n>`
-
-Specifies the number of concurrent sequences for sequence models. This option
-is ignored when `--request-rate-range` is not specified.
-
-Default is `4`.
-
-#### `--sequence-length=<n>`
-
-Specifies the base length of a sequence used for sequence models. A sequence
-with length X will be composed of X requests to be sent as the elements in the
-sequence. The actual length of the sequencewill be within +/- Y% of the base
-length, where Y defaults to 20% and is customizable via
-`--sequence-length-variation`. If sequence length is unspecified and input data
-is provided, the sequence length will be the number of inputs in the
-user-provided input data.
-
-Default is `20`.
-
-#### `--sequence-length-variation=<n>`
-
-Specifies the percentage variation in length of sequences. This option is only
-valid when not using user-provided input data or when `--sequence-length` is
-specified while using user-provided input data.
-
-Default is `20`(%).
-
-#### `--sequence-id-range=<start:end>`
-
-Specifies the range of sequence IDs used by Perf Analyzer. Perf Analyzer will
-start from the sequence ID of 'start' and go until 'end' (excluded). If 'end'
-is not specified then Perf Analyzer will generate new sequence IDs without
-bounds. If 'end' is specified and the concurrency setting may result in
-maintaining a number of sequences more than the range of available sequence
-IDs, Perf Analyzer will exit with an error due to possible sequence ID
-collisions.
-
-The default for 'start is `1`, and 'end' is not specified (no bounds).
-
-#### `--serial-sequences`
-
-Enables the serial sequence mode where a maximum of one request is live per sequence.
-Note: It is possible that this mode can cause the request rate mode to not achieve the
-desired rate, especially if num-of-sequences is too small.
-
-## Input Data Options
-
-#### `--input-data=[zero|random|<path>]`
-
-Specifies type of data that will be used for input in inference requests. The
-available options are `zero`, `random`, and a path to a directory or a JSON
-file.
-
-When pointing to a JSON file, the user must adhere to the format described in
-the [input data documentation](input_data.md). By specifying JSON data, users
-can control data used with every request. Multiple data streams can be specified
-for a sequence model, and Perf Analyzer will select a data stream in a
-round-robin fashion for every new sequence. Multiple JSON files can also be
-provided (`--input-data json_file1.json --input-data json_file2.json` and so on)
-and Perf Analyzer will append data streams from each file. When using
-`--service-kind=torchserve`, make sure this option points to a JSON file.
-
-If the option is path to a directory then the directory must contain a binary
-text file for each non-string/string input respectively, named the same as the
-input. Each file must contain the data required for that input for a batch-1
-request. Each binary file should contain the raw binary representation of the
-input in row-major order for non-string inputs. The text file should contain
-all strings needed by batch-1, each in a new line, listed in row-major order.
-
-Default is `random`.
-
-#### `-b <n>`
-
-Specifies the batch size for each request sent.
-
-Default is `1`.
-
-#### `--shape=<string>`
-
-Specifies the shape used for the specified input. The argument must be
-specified as 'name:shape' where the shape is a comma-separated list for
-dimension sizes. For example `--shape=input_name:1,2,3` indicates that the
-input `input_name` has tensor shape [ 1, 2, 3 ]. `--shape` may be specified
-multiple times to specify shapes for different inputs.
-
-#### `--string-data=<string>`
-
-Specifies the string to initialize string input buffers. Perf Analyzer will
-replicate the given string to build tensors of required shape.
-`--string-length` will not have any effect. This option is ignored if
-`--input-data` points to a JSON file or directory.
-
-#### `--string-length=<n>`
-
-Specifies the length of the random strings to be generated by Perf Analyzer
-for string input. This option is ignored if `--input-data` points to a
-JSON file or directory.
-
-Default is `128`.
-
-#### `--shared-memory=[none|system|cuda]`
-
-Specifies the type of the shared memory to use for input and output data.
-
-Default is `none`.
-
-#### `--output-shared-memory-size=<n>`
-
-Specifies The size, in bytes, of the shared memory region to allocate per
-output tensor. Only needed when one or more of the outputs are of string type
-and/or variable shape. The value should be larger than the size of the largest
-output tensor that the model is expected to return. Perf Analyzer will use the
-following formula to calculate the total shared memory to allocate:
-output_shared_memory_size * number_of_outputs * batch_size.
-
-Default is `102400` (100 KB).
-
-#### `--input-tensor-format=[binary|json]`
-
-Specifies the Triton inference request input tensor format. Only valid when HTTP
-protocol is used.
-
-Default is `binary`.
-
-#### `--output-tensor-format=[binary|json]`
-
-Specifies the Triton inference response output tensor format. Only valid when
-HTTP protocol is used.
-
-Default is `binary`.
-
-## Request Options
-
-#### `-i [http|grpc]`
-
-Specifies the communication protocol to use. The available protocols are HTTP
-and gRPC.
-
-Default is `http`.
-
-#### `-a`
-#### `--async`
-
-Enables asynchronous mode in Perf Analyzer.
-
-By default, Perf Analyzer will use a synchronous request API for inference.
-However, if the model is sequential, then the default mode is asynchronous.
-Specify `--sync` to operate sequential models in synchronous mode. In
-synchronous mode, Perf Analyzer will start threads equal to the concurrency
-level. Use asynchronous mode to limit the number of threads, yet maintain the
-concurrency.
-
-#### `--sync`
-
-Enables synchronous mode in Perf Analyzer. Can be used to operate Perf
-Analyzer with sequential model in synchronous mode.
-
-#### `--streaming`
-
-Enables the use of streaming API. This option is only valid with gRPC protocol.
-
-#### `-H <string>`
-
-Specifies the header that will be added to HTTP requests (ignored for gRPC
-requests). The header must be specified as 'Header:Value'. `-H` may be
-specified multiple times to add multiple headers.
-
-#### `--grpc-compression-algorithm=[none|gzip|deflate]`
-
-Specifies the compression algorithm to be used by gRPC when sending requests.
-Only supported when gRPC protocol is being used.
-
-Default is `none`.
-
-## Server Options
-
-#### `-u <url>`
-
-Specifies the URL for the server.
-
-Default is `localhost:8000` when using `--service-kind=triton` with HTTP.
-Default is `localhost:8001` when using `--service-kind=triton` with gRPC.
-Default is `localhost:8500` when using `--service-kind=tfserving`.
-
-#### `--ssl-grpc-use-ssl`
-
-Enables usage of an encrypted channel to the server.
-
-#### `--ssl-grpc-root-certifications-file=<path>`
-
-Specifies the path to file containing the PEM encoding of the server root
-certificates.
-
-#### `--ssl-grpc-private-key-file=<path>`
-
-Specifies the path to file containing the PEM encoding of the client's private
-key.
-
-#### `--ssl-grpc-certificate-chain-file=<path>`
-
-Specifies the path to file containing the PEM encoding of the client's
-certificate chain.
-
-#### `--ssl-https-verify-peer=[0|1]`
-
-Specifies whether to verify the peer's SSL certificate. See
-https://curl.se/libcurl/c/CURLOPT_SSL_VERIFYPEER.html for the meaning of each
-value.
-
-Default is `1`.
-
-#### `--ssl-https-verify-host=[0|1|2]`
-
-Specifies whether to verify the certificate's name against host. See
-https://curl.se/libcurl/c/CURLOPT_SSL_VERIFYHOST.html for the meaning of each
-value.
-
-Default is `2`.
-
-#### `--ssl-https-ca-certificates-file=<path>`
-
-Specifies the path to Certificate Authority (CA) bundle.
-
-#### `--ssl-https-client-certificate-file=<path>`
-
-Specifies the path to the SSL client certificate.
-
-#### `--ssl-https-client-certificate-type=[PEM|DER]`
-
-Specifies the type of the client SSL certificate.
-
-Default is `PEM`.
-
-#### `--ssl-https-private-key-file=<path>`
-
-Specifies the path to the private keyfile for TLS and SSL client cert.
-
-#### `--ssl-https-private-key-type=[PEM|DER]`
-
-Specifies the type of the private key file.
-
-Default is `PEM`.
-
-#### `--triton-server-directory=<path>`
-
-Specifies the Triton server install path. Required by and only used when C API
-is used (`--service-kind=triton_c_api`).
-
-Default is `/opt/tritonserver`.
-
-#### `--model-repository=<path>`
-
-Specifies the model repository directory path for loading models. Required by
-and only used when C API is used (`--service-kind=triton_c_api`).
-
-## Prometheus Metrics Options
-
-#### `--collect-metrics`
-
-Enables the collection of server-side inference server metrics. Perf Analyzer
-will output metrics in the CSV file generated with the `-f` option. Only valid
-when `--verbose-csv` option also used.
-
-#### `--metrics-url=<url>`
-
-Specifies the URL to query for server-side inference server metrics.
-
-Default is `localhost:8002/metrics`.
-
-#### `--metrics-interval=<n>`
-
-Specifies how often within each measurement window, in milliseconds, Perf
-Analyzer should query for server-side inference server metrics.
-
-Default is `1000`.
-
-## Report Options
-
-#### `-f <path>`
-
-Specifies the path that the latency report file will be generated at.
-
-When `-f` is not specified, a latency report will not be generated.
-
-#### `--profile-export-file <path>`
-
-Specifies the path that the profile export will be generated at.
-
-When `--profile-export-file` is not specified, a profile export will not be
-generated.
-
-#### `--verbose-csv`
-
-Enables additional information being output to the CSV file generated by Perf
-Analyzer.
-
-## Trace Options
-
-#### `--trace-level=[OFF|TIMESTAMPS|TENSORS]`
-
-Specifies a trace level. `OFF` disables tracing. `TIMESTAMPS` traces
-timestamps. `TENSORS` traces tensors. It may be specified multiple times to
-trace multiple information. Only used for `--service-kind=triton`.
-
-Default is `OFF`.
-
-#### `--trace-rate=<n>`
-
-Specifies the trace sampling rate (traces per second).
-
-Default is `1000`.
-
-#### `--trace-count=<n>`
-
-Specifies the number of traces to be sampled. If the value is `-1`, the number
-of traces to be sampled will not be limited.
-
-Default is `-1`.
-
-#### `--log-frequency=<n>`
-
-Specifies the trace log frequency. If the value is `0`, Triton will only log
-the trace output to the trace file when shutting down.
-Otherwise, Triton will log the trace output to `<trace-file>`.<idx> when it
-collects the specified number of traces.
-For example, if the trace file is `trace_file.log`, and if the log
-frequency is `100`, when Triton collects the 100th trace, it logs the traces
-to file `trace_file.log.0`, and when it collects the 200th trace, it logs the
-101st to the 200th traces to file `trace_file.log.1`.
-
-Default is `0`.
-
-## Deprecated Options
-
-#### `--data-directory=<path>`
-
-**DEPRECATED**
-
-Alias for `--input-data=<path>` where `<path>` is the path to a directory. See
-`--input-data` option documentation for details.
-
-#### `-c <n>`
-
-**DEPRECATED**
-
-Specifies the maximum concurrency that Perf Analyzer will search up to. Cannot
-be used with `--concurrency-range`.
-
-#### `-d`
-
-**DEPRECATED**
-
-Enables dynamic concurrency mode. Perf Analyzer will search along
-concurrencies up to the maximum concurrency specified via `-c <n>`. Cannot be
-used with `--concurrency-range`.
-
-#### `-t <n>`
-
-**DEPRECATED**
-
-Specifies the number of concurrent requests. Cannot be used with
-`--concurrency-range`.
-
-Default is `1`.
-
-#### `-z`
-
-**DEPRECATED**
-
-Alias for `--input-data=zero`. See `--input-data` option documentation for
-details.
diff --git a/src/c++/perf_analyzer/docs/inference_load_modes.md b/src/c++/perf_analyzer/docs/inference_load_modes.md
deleted file mode 100644
index 83fa83eb0..000000000
--- a/src/c++/perf_analyzer/docs/inference_load_modes.md
+++ /dev/null
@@ -1,100 +0,0 @@
-<!--
-Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
- * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
- * Neither the name of NVIDIA CORPORATION nor the names of its
-   contributors may be used to endorse or promote products derived
-   from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--->
-
-# Inference Load Modes
-
-Perf Analyzer has several modes for generating inference request load for a
-model.
-
-## Concurrency Mode
-
-In concurrency mode, Perf Analyzer attempts to send inference requests to the
-server such that N requests are always outstanding during profiling. For
-example, when using
-[`--concurrency-range=4`](cli.md#--concurrency-rangestartendstep), Perf Analyzer
-will to attempt to have 4 outgoing inference requests at all times during
-profiling.
-
-## Periodic Concurrency Mode
-
-In periodic concurrency mode, Perf Analyzer will periodically launch a new set
-of inference requests until the total number of inference requests that has been
-launched since the beginning reaches N requests.
-
-For example, when using `--periodic-concurrency-range 10:100:30`, Perf Analyzer
-will start with 10 concurrent requests and for every step, it will launch 30 new
-inference requests until the total number of requests launched since the
-beginning reaches 100. Additionally, the user can also specify *when* to launch
-the new requests by specifying `--request-period M`. This will set Perf Analyzer
-to launch a new set of requests whenever *all* of the latest set of launched
-concurrent requests received M number of responses back from the server.
-
-The user can also specify custom parameters to the model using
-`--request-parameter <name:value:type>` option.
-For instance, passing `--request-parameter max_tokens:256:uint` will set an
-additional parameter `max_tokens` of type `int` to 256 as part of the request.
-
-```bash
-perf_analyzer -m <model_name> -i grpc --async --streaming \
-    --profile-export-file profile.json \
-    --periodic-concurrency-range 10:100:30 \
-    --request-period 10 \
-    --request-parameter max_tokens:256:int
-```
-
-> **Note**
->
-> The periodic concurrency mode is currently supported only by gRPC protocol and
-> with [decoupled models](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/decoupled_models.md).
-> Additionally, the user must also specify a file where Perf Analyzer could dump all the
-> profiled data using `--profile-export-file`.
-
-## Request Rate Mode
-
-In request rate mode, Perf Analyzer attempts to send N inference requests per
-second to the server during profiling. For example, when using
-[`--request-rate-range=20`](cli.md#--request-rate-rangestartendstep), Perf
-Analyzer will attempt to send 20 requests per second during profiling.
-
-## Custom Interval Mode
-
-In custom interval mode, Perf Analyzer attempts to send inference requests
-according to intervals (between requests, looping if necessary) provided by the
-user in the form of a text file with one time interval (in microseconds) per
-line. For example, when using
-[`--request-intervals=my_intervals.txt`](cli.md#--request-intervalspath),
-where `my_intervals.txt` contains:
-
-```
-100000
-200000
-500000
-```
-
-Perf Analyzer will attempt to send requests at the following times: 0.1s, 0.3s,
-0.8s, 0.9s, 1.1s, 1.6s, and so on, during profiling.
diff --git a/src/c++/perf_analyzer/docs/input_data.md b/src/c++/perf_analyzer/docs/input_data.md
deleted file mode 100644
index af2328fcd..000000000
--- a/src/c++/perf_analyzer/docs/input_data.md
+++ /dev/null
@@ -1,306 +0,0 @@
-<!--
-Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
- * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
- * Neither the name of NVIDIA CORPORATION nor the names of its
-   contributors may be used to endorse or promote products derived
-   from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--->
-
-# Input Data
-
-Use the [`--help`](cli.md#--help) option to see complete documentation for all
-input data options. By default Perf Analyzer sends random data to all the inputs
-of your model. You can select a different input data mode with the
-[`--input-data`](cli.md#--input-datazerorandompath) option:
-
-- _random_: (default) Send random data for each input. Note: Perf Analyzer only
-  generates random data once per input and reuses that for all inferences
-- _zero_: Send zeros for each input.
-- directory path: A path to a directory containing a binary file for each input,
-  named the same as the input (and optionally a binary file for each output for
-  validation, named the same as the output). Each binary file must contain the
-  data required for that input/output for a batch-1 request. Each file should
-  contain the raw binary representation of the input/output in row-major order.
-- file path: A path to a JSON file containing data to be used with every
-  inference request. See the "Real Input Data" section for further details.
-  [`--input-data`](cli.md#--input-datazerorandompath) can be provided multiple
-  times with different file paths to specific multiple JSON files.
-
-For tensors with `STRING`/`BYTES` datatype, the
-[`--string-length`](cli.md#--string-lengthn) and
-[`--string-data`](cli.md#--string-datastring) options may be used in some cases
-(see [`--help`](cli.md#--help) for full documentation).
-
-For models that support batching you can use the [`-b`](cli.md#-b-n) option to
-indicate the batch size of the requests that Perf Analyzer should send. For
-models with variable-sized inputs you must provide the
-[`--shape`](cli.md#--shapestring) argument so that Perf Analyzer knows what
-shape tensors to use. For example, for a model that has an input called
-`IMAGE` that has shape `[3, N, M]`, where `N` and `M` are variable-size
-dimensions, to tell Perf Analyzer to send batch size 4 requests of shape
-`[3, 224, 224]`:
-
-```
-$ perf_analyzer -m mymodel -b 4 --shape IMAGE:3,224,224
-```
-
-## Real Input Data
-
-The performance of some models is highly dependent on the data used. For such
-cases you can provide data to be used with every inference request made by Perf
-Analyzer in a JSON file. Perf Analyzer will use the provided data in a
-round-robin order when sending inference requests. For sequence models, if a
-sequence length is specified via
-[`--sequence-length`](cli.md#--sequence-lengthn), Perf Analyzer will also loop
-through the provided data in a round-robin order up to the specified sequence
-length (with a percentage variation customizable via
-[`--sequence-length-variation`](cli.md#--sequence-length-variationn)).
-Otherwise, the sequence length will be the number of inputs specified in
-user-provided input data.
-
-Each entry in the `"data"` array must specify all input tensors with the exact
-size expected by the model for a single batch. The following example describes
-data for a model with inputs named, `INPUT0` and `INPUT1`, shape `[4, 4]` and
-data type `INT32`:
-
-```json
-{
-  "data":
-    [
-      {
-        "INPUT0": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
-        "INPUT1": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
-      },
-      {
-        "INPUT0": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
-        "INPUT1": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
-      },
-      {
-        "INPUT0": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
-        "INPUT1": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
-      },
-      {
-        "INPUT0": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
-        "INPUT1": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
-      }
-    ]
-}
-```
-
-Note that the `[4, 4]` tensor has been flattened in a row-major format for the
-inputs. In addition to specifying explicit tensors, you can also provide Base64
-encoded binary data for the tensors. Each data object must list its data in a
-row-major order. Binary data must be in little-endian byte order. The following
-example highlights how this can be achieved:
-
-```json
-{
-  "data":
-    [
-      {
-        "INPUT0": {"b64": "YmFzZTY0IGRlY29kZXI="},
-        "INPUT1": {"b64": "YmFzZTY0IGRlY29kZXI="}
-      },
-      {
-        "INPUT0": {"b64": "YmFzZTY0IGRlY29kZXI="},
-        "INPUT1": {"b64": "YmFzZTY0IGRlY29kZXI="}
-      },
-      {
-        "INPUT0": {"b64": "YmFzZTY0IGRlY29kZXI="},
-        "INPUT1": {"b64": "YmFzZTY0IGRlY29kZXI="}
-      }
-    ]
-}
-```
-
-In case of sequence models, multiple data streams can be specified in the JSON
-file. Each sequence will get a data stream of its own and Perf Analyzer will
-ensure the data from each stream is played back to the same correlation ID. The
-below example highlights how to specify data for multiple streams for a sequence
-model with a single input named `INPUT`, shape `[1]` and data type `STRING`:
-
-```json
-{
-  "data":
-    [
-      [
-        {
-          "INPUT": ["1"]
-        },
-        {
-          "INPUT": ["2"]
-        },
-        {
-          "INPUT": ["3"]
-        },
-        {
-          "INPUT": ["4"]
-        }
-      ],
-      [
-        {
-          "INPUT": ["1"]
-        },
-        {
-          "INPUT": ["1"]
-        },
-        {
-          "INPUT": ["1"]
-        }
-      ],
-      [
-        {
-          "INPUT": ["1"]
-        },
-        {
-          "INPUT": ["1"]
-        }
-      ]
-    ]
-}
-```
-
-The above example describes three data streams with lengths 4, 3 and 2
-respectively. Perf Analyzer will hence produce sequences of length 4, 3 and 2 in
-this case.
-
-You can also provide an optional `"shape"` field to the tensors. This is
-especially useful while profiling the models with variable-sized tensors as
-input. Additionally note that when providing the `"shape"` field, tensor
-contents must be provided separately in a "content" field in row-major order.
-The specified shape values will override default input shapes provided as a
-command line option (see [`--shape`](cli.md#--shapestring)) for variable-sized
-inputs. In the absence of a `"shape"` field, the provided defaults will be used.
-There is no need to specify shape as a command line option if all the input data
-provide shape values for variable tensors. Below is an example JSON file for a
-model with a single input `INPUT`, shape `[-1, -1]` and data type `INT32`:
-
-```json
-{
-  "data":
-    [
-      {
-        "INPUT":
-          {
-              "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
-              "shape": [2,8]
-          }
-      },
-      {
-        "INPUT":
-          {
-              "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
-              "shape": [8,2]
-          }
-      },
-      {
-        "INPUT":
-          {
-              "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
-          }
-      },
-      {
-        "INPUT":
-          {
-              "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
-              "shape": [4,4]
-          }
-      }
-    ]
-}
-```
-
-The following is the example to provide contents as base64 string with explicit
-shapes:
-
-```json
-{
-  "data":
-    [
-      {
-        "INPUT":
-          {
-            "content": {"b64": "/9j/4AAQSkZ(...)"},
-            "shape": [7964]
-          }
-      },
-      {
-        "INPUT":
-          {
-            "content": {"b64": "/9j/4AAQSkZ(...)"},
-            "shape": [7964]
-          }
-      }
-    ]
-}
-```
-
-Note that for `STRING` type, an element is represented by a 4-byte unsigned
-integer giving the length followed by the actual bytes. The byte array to be
-encoded using base64 must include the 4-byte unsigned integers.
-
-### Output Validation
-
-When real input data is provided, it is optional to request Perf Analyzer to
-validate the inference output for the input data.
-
-Validation output can be specified in the `"validation_data"` field have the
-same format as the `"data"` field for real input. Note that the entries in
-`"validation_data"` must align with `"data"` for proper mapping. The following
-example describes validation data for a model with inputs named `INPUT0` and
-`INPUT1`, outputs named `OUTPUT0` and `OUTPUT1`, all tensors have shape `[4, 4]`
-and data type `INT32`:
-
-```json
-{
-  "data":
-    [
-      {
-        "INPUT0": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
-        "INPUT1": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
-      }
-    ],
-  "validation_data":
-    [
-      {
-        "OUTPUT0": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        "OUTPUT1": [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
-      }
-    ]
-}
-```
-
-Besides the above example, the validation outputs can be specified in the same
-variations described in the real input data section.
-
-# Shared Memory
-
-By default Perf Analyzer sends input tensor data and receives output tensor data
-over the network. You can instead instruct Perf Analyzer to use system shared
-memory or CUDA shared memory to communicate tensor data. By using these options
-you can model the performance that you can achieve by using shared memory in
-your application. Use
-[`--shared-memory=system`](cli.md#--shared-memorynonesystemcuda) to use system
-(CPU) shared memory or
-[`--shared-memory=cuda`](cli.md#--shared-memorynonesystemcuda) to use CUDA
-shared memory.
diff --git a/src/c++/perf_analyzer/docs/install.md b/src/c++/perf_analyzer/docs/install.md
deleted file mode 100644
index 5390dc00a..000000000
--- a/src/c++/perf_analyzer/docs/install.md
+++ /dev/null
@@ -1,106 +0,0 @@
-<!--
-Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
- * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
- * Neither the name of NVIDIA CORPORATION nor the names of its
-   contributors may be used to endorse or promote products derived
-   from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--->
-
-# Recommended Installation Method
-
-## Triton SDK Container
-
-The recommended way to "install" Perf Analyzer is to run the pre-built
-executable from within the Triton SDK docker container available on the
-[NVIDIA GPU Cloud Catalog](https://ngc.nvidia.com/catalog/containers/nvidia:tritonserver).
-As long as the SDK container has its network exposed to the address and port of
-the inference server, Perf Analyzer will be able to run.
-
-```bash
-export RELEASE=<yy.mm> # e.g. to use the release from the end of February of 2023, do `export RELEASE=23.02`
-
-docker pull nvcr.io/nvidia/tritonserver:${RELEASE}-py3-sdk
-
-docker run --gpus all --rm -it --net host nvcr.io/nvidia/tritonserver:${RELEASE}-py3-sdk
-
-# inside container
-perf_analyzer -m <model>
-```
-
-# Alternative Installation Methods
-
-- [Pip](#pip)
-- [Build from Source](#build-from-source)
-
-## Pip
-
-```bash
-pip install tritonclient
-
-perf_analyzer -m <model>
-```
-
-**Warning**: If any runtime dependencies are missing, Perf Analyzer will produce
-errors showing which ones are missing. You will need to manually install them.
-
-## Build from Source
-
-The Triton SDK container is used for building, so some build and runtime
-dependencies are already installed.
-
-```bash
-export RELEASE=<yy.mm> # e.g. to use the release from the end of February of 2023, do `export RELEASE=23.02`
-
-docker pull nvcr.io/nvidia/tritonserver:${RELEASE}-py3-sdk
-
-docker run --gpus all --rm -it --net host nvcr.io/nvidia/tritonserver:${RELEASE}-py3-sdk
-
-# inside container
-# prep installing newer version of cmake
-apt update && apt install -y gpg wget && wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null && . /etc/os-release && echo "deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ $UBUNTU_CODENAME main" | tee /etc/apt/sources.list.d/kitware.list >/dev/null
-
-# install build/runtime dependencies
-apt update && apt install -y cmake-data=3.27.7* cmake=3.27.7* libcurl4-openssl-dev rapidjson-dev
-
-rm -rf client ; git clone --depth 1 https://github.com/triton-inference-server/client
-
-mkdir client/build ; cd client/build
-
-cmake -DTRITON_ENABLE_PERF_ANALYZER=ON ..
-
-make -j8 cc-clients
-
-cc-clients/perf_analyzer/perf_analyzer -m <model>
-```
-
-- To enable
-  [CUDA shared memory](input_data.md#shared-memory), add
-  `-DTRITON_ENABLE_GPU=ON` to the `cmake` command.
-- To enable
-  [C API mode](benchmarking.md#benchmarking-triton-directly-via-c-api), add
-  `-DTRITON_ENABLE_PERF_ANALYZER_C_API=ON` to the `cmake` command.
-- To enable [TorchServe backend](benchmarking.md#benchmarking-torchserve), add
-  `-DTRITON_ENABLE_PERF_ANALYZER_TS=ON` to the `cmake` command.
-- To enable
-  [Tensorflow Serving backend](benchmarking.md#benchmarking-tensorflow-serving),
-  add `-DTRITON_ENABLE_PERF_ANALYZER_TFS=ON` to the `cmake` command.
diff --git a/src/c++/perf_analyzer/docs/measurements_metrics.md b/src/c++/perf_analyzer/docs/measurements_metrics.md
deleted file mode 100644
index 3f5b64348..000000000
--- a/src/c++/perf_analyzer/docs/measurements_metrics.md
+++ /dev/null
@@ -1,225 +0,0 @@
-<!--
-Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
- * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
- * Neither the name of NVIDIA CORPORATION nor the names of its
-   contributors may be used to endorse or promote products derived
-   from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--->
-
-# Measurement Modes
-
-Currently, Perf Analyzer has 2 measurement modes.
-
-## Time Windows
-
-When using time windows measurement mode
-([`--measurement-mode=time_windows`](cli.md#--measurement-modetime_windowscount_windows)),
-Perf Analyzer will count how many requests have completed during a window of
-duration `X` (in milliseconds, via
-[`--measurement-interval=X`](cli.md#--measurement-intervaln), default is
-`5000`). This is the default measurement mode.
-
-## Count Windows
-
-When using count windows measurement mode
-([`--measurement-mode=count_windows`](cli.md#--measurement-modetime_windowscount_windows)),
-Perf Analyzer will start the window duration at 1 second and potentially
-dynamically increase it until `X` requests have completed (via
-[`--measurement-request-count=X`](cli.md#--measurement-request-countn), default
-is `50`).
-
-# Metrics
-
-## How Throughput is Calculated
-
-Perf Analyzer calculates throughput to be the total number of requests completed
-during a measurement, divided by the duration of the measurement, in seconds.
-
-## How Latency is Calculated
-
-For each request concurrency level Perf Analyzer reports latency and throughput
-as seen from Perf Analyzer and also the average request latency on the server.
-
-The server latency measures the total time from when the request is received at
-the server until when the response is sent from the server. Because of the HTTP
-and gRPC libraries used to implement the server endpoints, total server latency
-is typically more accurate for HTTP requests as it measures time from the first
-byte received until last byte sent. For both HTTP and gRPC the total server
-latency is broken-down into the following components:
-
-- _queue_: The average time spent in the inference schedule queue by a request
-  waiting for an instance of the model to become available.
-- _compute_: The average time spent performing the actual inference, including
-  any time needed to copy data to/from the GPU.
-- _overhead_: The average time spent in the endpoint that cannot be correctly
-  captured in the send/receive time with the way the gRPC and HTTP libraries are
-  structured.
-
-The client latency time is broken-down further for HTTP and gRPC as follows:
-
-- HTTP: _send/recv_ indicates the time on the client spent sending the request
-  and receiving the response. _response wait_ indicates time waiting for the
-  response from the server.
-- gRPC: _(un)marshal request/response_ indicates the time spent marshalling the
-  request data into the gRPC protobuf and unmarshalling the response data from
-  the gRPC protobuf. _response wait_ indicates time writing the gRPC request to
-  the network, waiting for the response, and reading the gRPC response from the
-  network.
-
-Use the verbose ([`-v`](cli.md#-v)) option see more output, including the
-stabilization passes run for each request concurrency level or request rate.
-
-# Reports
-
-## Visualizing Latency vs. Throughput
-
-Perf Analyzer provides the [`-f`](cli.md#-f-path) option to generate a file
-containing CSV output of the results.
-
-```
-$ perf_analyzer -m inception_graphdef --concurrency-range 1:4 -f perf.csv
-...
-$ cat perf.csv
-Concurrency,Inferences/Second,Client Send,Network+Server Send/Recv,Server Queue,Server Compute Input,Server Compute Infer,Server Compute Output,Client Recv,p50 latency,p90 latency,p95 latency,p99 latency
-1,69.2,225,2148,64,206,11781,19,0,13891,18795,19753,21018
-3,84.2,237,1768,21673,209,11742,17,0,35398,43984,47085,51701
-4,84.2,279,1604,33669,233,11731,18,1,47045,56545,59225,64886
-2,87.2,235,1973,9151,190,11346,17,0,21874,28557,29768,34766
-```
-
-NOTE: The rows in the CSV file are sorted in an increasing order of throughput
-(Inferences/Second).
-
-You can import the CSV file into a spreadsheet to help visualize the latency vs
-inferences/second tradeoff as well as see some components of the latency. Follow
-these steps:
-
-- Open
-  [this spreadsheet](https://docs.google.com/spreadsheets/d/1S8h0bWBBElHUoLd2SOvQPzZzRiQ55xjyqodm_9ireiw)
-- Make a copy from the File menu "Make a copy..."
-- Open the copy
-- Select the A1 cell on the "Raw Data" tab
-- From the File menu select "Import..."
-- Select "Upload" and upload the file
-- Select "Replace data at selected cell" and then select the "Import data"
-  button
-
-## Server-side Prometheus metrics
-
-Perf Analyzer can collect
-[server-side metrics](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/metrics.md#gpu-metrics),
-such as GPU utilization and GPU power usage. To enable the collection of these
-metrics, use the [`--collect-metrics`](cli.md#--collect-metrics) option.
-
-By default, Perf Analyzer queries the metrics endpoint at the URL
-`localhost:8002/metrics`. If the metrics are accessible at a different url, use
-the [`--metrics-url=<url>`](cli.md#--metrics-urlurl) option to specify that.
-
-By default, Perf Analyzer queries the metrics endpoint every 1000 milliseconds.
-To use a different querying interval, use the
-[`--metrics-interval=<n>`](cli.md#--metrics-intervaln) option (specify in
-milliseconds).
-
-Because Perf Analyzer can collect the server-side metrics multiple times per
-run, these metrics are aggregated in specific ways to produce one final number
-per searched concurrency or request rate. Here are how the metrics are
-aggregated:
-
-| Metric | Aggregation |
-| - | - |
-| GPU Utilization | Averaged from each collection taken during stable passes. We want a number representative of all stable passes. |
-| GPU Power Usage | Averaged from each collection taken during stable passes. We want a number representative of all stable passes. |
-| GPU Used Memory | Maximum from all collections taken during a stable pass. Users are typically curious what the peak memory usage is for determining model/hardware viability. |
-| GPU Total Memory | First from any collection taken during a stable pass. All of the collections should produce the same value for total memory available on the GPU. |
-
-Note that all metrics are per-GPU in the case of multi-GPU systems.
-
-To output these server-side metrics to a CSV file, use the
-[`-f <path>`](cli.md#-f-path) and [`--verbose-csv`](cli.md#--verbose-csv)
-options. The output CSV will contain one column per metric. The value of each
-column will be a `key:value` pair (`GPU UUID:metric value`). Each `key:value`
-pair will be delimited by a semicolon (`;`) to indicate metric values for each
-GPU accessible by the server. There is a trailing semicolon. See below:
-
-`<gpu-uuid-0>:<metric-value>;<gpu-uuid-1>:<metric-value>;...;`
-
-Here is a simplified CSV output:
-
-```
-$ perf_analyzer -m resnet50_libtorch --collect-metrics -f output.csv --verbose-csv
-$ cat output.csv
-Concurrency,...,Avg GPU Utilization,Avg GPU Power Usage,Max GPU Memory Usage,Total GPU Memory
-1,...,gpu_uuid_0:0.33;gpu_uuid_1:0.5;,gpu_uuid_0:55.3;gpu_uuid_1:56.9;,gpu_uuid_0:10000;gpu_uuid_1:11000;,gpu_uuid_0:50000;gpu_uuid_1:75000;,
-2,...,gpu_uuid_0:0.25;gpu_uuid_1:0.6;,gpu_uuid_0:25.6;gpu_uuid_1:77.2;,gpu_uuid_0:11000;gpu_uuid_1:17000;,gpu_uuid_0:50000;gpu_uuid_1:75000;,
-3,...,gpu_uuid_0:0.87;gpu_uuid_1:0.9;,gpu_uuid_0:87.1;gpu_uuid_1:71.7;,gpu_uuid_0:15000;gpu_uuid_1:22000;,gpu_uuid_0:50000;gpu_uuid_1:75000;,
-```
-
-## Communication Protocol
-
-By default, Perf Analyzer uses HTTP to communicate with Triton. The gRPC
-protocol can be specified with the [`-i [http|grpc]`](cli.md#-i-httpgrpc)
-option. If gRPC is selected the [`--streaming`](cli.md#--streaming) option can
-also be specified for gRPC streaming.
-
-### SSL/TLS Support
-
-Perf Analyzer can be used to benchmark Triton service behind SSL/TLS-enabled
-endpoints. These options can help in establishing secure connection with the
-endpoint and profile the server.
-
-For gRPC, see the following options:
-
-- [`--ssl-grpc-use-ssl`](cli.md#--ssl-grpc-use-ssl)
-- [`--ssl-grpc-root-certifications-file=<path>`](cli.md#--ssl-grpc-root-certifications-filepath)
-- [`--ssl-grpc-private-key-file=<path>`](cli.md#--ssl-grpc-private-key-filepath)
-- [`--ssl-grpc-certificate-chain-file=<path>`](cli.md#--ssl-grpc-certificate-chain-filepath)
-
-More details here:
-https://grpc.github.io/grpc/cpp/structgrpc_1_1_ssl_credentials_options.html
-
-The
-[inference protocol gRPC SSL/TLS section](https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/inference_protocols.md#ssltls)
-describes server-side options to configure SSL/TLS in Triton's gRPC endpoint.
-
-For HTTPS, the following options are exposed:
-
-- [`--ssl-https-verify-peer`](cli.md#--ssl-https-verify-peer01)
-- [`--ssl-https-verify-host`](cli.md#--ssl-https-verify-host012)
-- [`--ssl-https-ca-certificates-file`](cli.md#--ssl-https-ca-certificates-filepath)
-- [`--ssl-https-client-certificate-file`](cli.md#--ssl-https-client-certificate-filepath)
-- [`--ssl-https-client-certificate-type`](cli.md#--ssl-https-client-certificate-typepemder)
-- [`--ssl-https-private-key-file`](cli.md#--ssl-https-private-key-filepath)
-- [`--ssl-https-private-key-type`](cli.md#--ssl-https-private-key-typepemder)
-
-See [`--help`](cli.md#--help) for full documentation.
-
-Unlike gRPC, Triton's HTTP server endpoint can not be configured with SSL/TLS
-support.
-
-Note: Just providing these `--ssl-http-*` options to Perf Analyzer does not
-ensure that SSL/TLS is used in communication. If SSL/TLS is not enabled on the
-service endpoint, these options have no effect. The intent of exposing these
-options to a user of Perf Analyzer is to allow them to configure Perf Analyzer
-to benchmark a Triton service behind SSL/TLS-enabled endpoints. In other words,
-if Triton is running behind a HTTPS server proxy, then these options would allow
-Perf Analyzer to profile Triton via exposed HTTPS proxy.
diff --git a/src/c++/perf_analyzer/docs/quick_start.md b/src/c++/perf_analyzer/docs/quick_start.md
deleted file mode 100644
index 17d63f560..000000000
--- a/src/c++/perf_analyzer/docs/quick_start.md
+++ /dev/null
@@ -1,114 +0,0 @@
-<!--
-Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
- * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
- * Neither the name of NVIDIA CORPORATION nor the names of its
-   contributors may be used to endorse or promote products derived
-   from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--->
-
-# Quick Start
-
-The steps below will guide you on how to start using Perf Analyzer.
-
-### Step 1: Start Triton Container
-
-```bash
-export RELEASE=<yy.mm> # e.g. to use the release from the end of February of 2023, do `export RELEASE=23.02`
-
-docker pull nvcr.io/nvidia/tritonserver:${RELEASE}-py3
-
-docker run --gpus all --rm -it --net host nvcr.io/nvidia/tritonserver:${RELEASE}-py3
-```
-
-### Step 2: Download `simple` Model
-
-```bash
-# inside triton container
-git clone --depth 1 https://github.com/triton-inference-server/server
-
-mkdir model_repository ; cp -r server/docs/examples/model_repository/simple model_repository
-```
-
-### Step 3: Start Triton Server
-
-```bash
-# inside triton container
-tritonserver --model-repository $(pwd)/model_repository &> server.log &
-
-# confirm server is ready, look for 'HTTP/1.1 200 OK'
-curl -v localhost:8000/v2/health/ready
-
-# detach (CTRL-p CTRL-q)
-```
-
-### Step 4: Start Triton SDK Container
-
-```bash
-docker pull nvcr.io/nvidia/tritonserver:${RELEASE}-py3-sdk
-
-docker run --gpus all --rm -it --net host nvcr.io/nvidia/tritonserver:${RELEASE}-py3-sdk
-```
-
-### Step 5: Run Perf Analyzer
-
-```bash
-# inside sdk container
-perf_analyzer -m simple
-```
-
-### Step 6: Observe and Analyze Output
-
-```
-$ perf_analyzer -m simple
-*** Measurement Settings ***
-  Batch size: 1
-  Service Kind: Triton
-  Using "time_windows" mode for stabilization
-  Measurement window: 5000 msec
-  Using synchronous calls for inference
-  Stabilizing using average latency
-
-Request concurrency: 1
-  Client:
-    Request count: 25348
-    Throughput: 1407.84 infer/sec
-    Avg latency: 708 usec (standard deviation 663 usec)
-    p50 latency: 690 usec
-    p90 latency: 881 usec
-    p95 latency: 926 usec
-    p99 latency: 1031 usec
-    Avg HTTP time: 700 usec (send/recv 102 usec + response wait 598 usec)
-  Server:
-    Inference count: 25348
-    Execution count: 25348
-    Successful request count: 25348
-    Avg request latency: 382 usec (overhead 41 usec + queue 41 usec + compute input 26 usec + compute infer 257 usec + compute output 16 usec)
-
-Inferences/Second vs. Client Average Batch Latency
-Concurrency: 1, throughput: 1407.84 infer/sec, latency 708 usec
-```
-
-We can see from the output that the model was able to complete approximately
-1407.84 inferences per second, with an average latency of 708 microseconds per
-inference request. Concurrency of 1 meant that Perf Analyzer attempted to always
-have 1 outgoing request at all times.
diff --git a/src/c++/perf_analyzer/doctest.h b/src/c++/perf_analyzer/doctest.h
deleted file mode 100644
index adda4134c..000000000
--- a/src/c++/perf_analyzer/doctest.h
+++ /dev/null
@@ -1,7824 +0,0 @@
-// ====================================================================== lgtm
-// [cpp/missing-header-guard]
-// == DO NOT MODIFY THIS FILE BY HAND - IT IS AUTO GENERATED BY CMAKE! ==
-// ======================================================================
-//
-// doctest.h - the lightest feature-rich C++ single-header testing framework for
-// unit tests and TDD
-//
-// Copyright (c) 2016-2021 Viktor Kirilov
-//
-// Distributed under the MIT Software License
-// See accompanying file LICENSE.txt or copy at
-// https://opensource.org/licenses/MIT
-//
-// The documentation can be found at the library's page:
-// https://github.com/doctest/doctest/blob/master/doc/markdown/readme.md
-//
-// =================================================================================================
-// =================================================================================================
-// =================================================================================================
-//
-// The library is heavily influenced by Catch -
-// https://github.com/catchorg/Catch2 which uses the Boost Software License -
-// Version 1.0 see here -
-// https://github.com/catchorg/Catch2/blob/master/LICENSE.txt
-//
-// The concept of subcases (sections in Catch) and expression decomposition are
-// from there. Some parts of the code are taken directly:
-// - stringification - the detection of "ostream& operator<<(ostream&, const
-// T&)" and StringMaker<>
-// - the Approx() helper class for floating point comparison
-// - colors in the console
-// - breaking into a debugger
-// - signal / SEH handling
-// - timer
-// - XmlWriter class - thanks to Phil Nash for allowing the direct reuse (AKA
-// copy/paste)
-//
-// The expression decomposing templates are taken from lest -
-// https://github.com/martinmoene/lest which uses the Boost Software License -
-// Version 1.0 see here -
-// https://github.com/martinmoene/lest/blob/master/LICENSE.txt
-//
-// =================================================================================================
-// =================================================================================================
-// =================================================================================================
-
-#ifndef DOCTEST_LIBRARY_INCLUDED
-#define DOCTEST_LIBRARY_INCLUDED
-
-// =================================================================================================
-// == VERSION
-// ======================================================================================
-// =================================================================================================
-
-#define DOCTEST_VERSION_MAJOR 2
-#define DOCTEST_VERSION_MINOR 4
-#define DOCTEST_VERSION_PATCH 8
-
-// util we need here
-#define DOCTEST_TOSTR_IMPL(x) #x
-#define DOCTEST_TOSTR(x) DOCTEST_TOSTR_IMPL(x)
-
-#define DOCTEST_VERSION_STR                                   \
-  DOCTEST_TOSTR(DOCTEST_VERSION_MAJOR)                        \
-  "." DOCTEST_TOSTR(DOCTEST_VERSION_MINOR) "." DOCTEST_TOSTR( \
-      DOCTEST_VERSION_PATCH)
-
-#define DOCTEST_VERSION                                          \
-  (DOCTEST_VERSION_MAJOR * 10000 + DOCTEST_VERSION_MINOR * 100 + \
-   DOCTEST_VERSION_PATCH)
-
-// =================================================================================================
-// == COMPILER VERSION
-// =============================================================================
-// =================================================================================================
-
-// ideas for the version stuff are taken from here:
-// https://github.com/cxxstuff/cxx_detect
-
-#define DOCTEST_COMPILER(MAJOR, MINOR, PATCH) \
-  ((MAJOR)*10000000 + (MINOR)*100000 + (PATCH))
-
-// GCC/Clang and GCC/MSVC are mutually exclusive, but Clang/MSVC are not because
-// of clang-cl...
-#if defined(_MSC_VER) && defined(_MSC_FULL_VER)
-#if _MSC_VER == _MSC_FULL_VER / 10000
-#define DOCTEST_MSVC \
-  DOCTEST_COMPILER(_MSC_VER / 100, _MSC_VER % 100, _MSC_FULL_VER % 10000)
-#else  // MSVC
-#define DOCTEST_MSVC \
-  DOCTEST_COMPILER(  \
-      _MSC_VER / 100, (_MSC_FULL_VER / 100000) % 100, _MSC_FULL_VER % 100000)
-#endif  // MSVC
-#endif  // MSVC
-#if defined(__clang__) && defined(__clang_minor__)
-#define DOCTEST_CLANG \
-  DOCTEST_COMPILER(__clang_major__, __clang_minor__, __clang_patchlevel__)
-#elif defined(__GNUC__) && defined(__GNUC_MINOR__) && \
-    defined(__GNUC_PATCHLEVEL__) && !defined(__INTEL_COMPILER)
-#define DOCTEST_GCC \
-  DOCTEST_COMPILER(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__)
-#endif  // GCC
-
-#ifndef DOCTEST_MSVC
-#define DOCTEST_MSVC 0
-#endif  // DOCTEST_MSVC
-#ifndef DOCTEST_CLANG
-#define DOCTEST_CLANG 0
-#endif  // DOCTEST_CLANG
-#ifndef DOCTEST_GCC
-#define DOCTEST_GCC 0
-#endif  // DOCTEST_GCC
-
-// =================================================================================================
-// == COMPILER WARNINGS HELPERS
-// ====================================================================
-// =================================================================================================
-
-#if DOCTEST_CLANG
-#define DOCTEST_PRAGMA_TO_STR(x) _Pragma(#x)
-#define DOCTEST_CLANG_SUPPRESS_WARNING_PUSH _Pragma("clang diagnostic push")
-#define DOCTEST_CLANG_SUPPRESS_WARNING(w) \
-  DOCTEST_PRAGMA_TO_STR(clang diagnostic ignored w)
-#define DOCTEST_CLANG_SUPPRESS_WARNING_POP _Pragma("clang diagnostic pop")
-#define DOCTEST_CLANG_SUPPRESS_WARNING_WITH_PUSH(w) \
-  DOCTEST_CLANG_SUPPRESS_WARNING_PUSH DOCTEST_CLANG_SUPPRESS_WARNING(w)
-#else  // DOCTEST_CLANG
-#define DOCTEST_CLANG_SUPPRESS_WARNING_PUSH
-#define DOCTEST_CLANG_SUPPRESS_WARNING(w)
-#define DOCTEST_CLANG_SUPPRESS_WARNING_POP
-#define DOCTEST_CLANG_SUPPRESS_WARNING_WITH_PUSH(w)
-#endif  // DOCTEST_CLANG
-
-#if DOCTEST_GCC
-#define DOCTEST_PRAGMA_TO_STR(x) _Pragma(#x)
-#define DOCTEST_GCC_SUPPRESS_WARNING_PUSH _Pragma("GCC diagnostic push")
-#define DOCTEST_GCC_SUPPRESS_WARNING(w) \
-  DOCTEST_PRAGMA_TO_STR(GCC diagnostic ignored w)
-#define DOCTEST_GCC_SUPPRESS_WARNING_POP _Pragma("GCC diagnostic pop")
-#define DOCTEST_GCC_SUPPRESS_WARNING_WITH_PUSH(w) \
-  DOCTEST_GCC_SUPPRESS_WARNING_PUSH DOCTEST_GCC_SUPPRESS_WARNING(w)
-#else  // DOCTEST_GCC
-#define DOCTEST_GCC_SUPPRESS_WARNING_PUSH
-#define DOCTEST_GCC_SUPPRESS_WARNING(w)
-#define DOCTEST_GCC_SUPPRESS_WARNING_POP
-#define DOCTEST_GCC_SUPPRESS_WARNING_WITH_PUSH(w)
-#endif  // DOCTEST_GCC
-
-#if DOCTEST_MSVC
-#define DOCTEST_MSVC_SUPPRESS_WARNING_PUSH __pragma(warning(push))
-#define DOCTEST_MSVC_SUPPRESS_WARNING(w) __pragma(warning(disable : w))
-#define DOCTEST_MSVC_SUPPRESS_WARNING_POP __pragma(warning(pop))
-#define DOCTEST_MSVC_SUPPRESS_WARNING_WITH_PUSH(w) \
-  DOCTEST_MSVC_SUPPRESS_WARNING_PUSH DOCTEST_MSVC_SUPPRESS_WARNING(w)
-#else  // DOCTEST_MSVC
-#define DOCTEST_MSVC_SUPPRESS_WARNING_PUSH
-#define DOCTEST_MSVC_SUPPRESS_WARNING(w)
-#define DOCTEST_MSVC_SUPPRESS_WARNING_POP
-#define DOCTEST_MSVC_SUPPRESS_WARNING_WITH_PUSH(w)
-#endif  // DOCTEST_MSVC
-
-// =================================================================================================
-// == COMPILER WARNINGS
-// ============================================================================
-// =================================================================================================
-
-// both the header and the implementation suppress all of these,
-// so it only makes sense to aggregate them like so
-#define DOCTEST_SUPPRESS_COMMON_WARNINGS_PUSH                                  \
-  DOCTEST_CLANG_SUPPRESS_WARNING_PUSH                                          \
-  DOCTEST_CLANG_SUPPRESS_WARNING("-Wunknown-pragmas")                          \
-  DOCTEST_CLANG_SUPPRESS_WARNING("-Wweak-vtables")                             \
-  DOCTEST_CLANG_SUPPRESS_WARNING("-Wpadded")                                   \
-  DOCTEST_CLANG_SUPPRESS_WARNING("-Wmissing-prototypes")                       \
-  DOCTEST_CLANG_SUPPRESS_WARNING("-Wunused-local-typedef")                     \
-  DOCTEST_CLANG_SUPPRESS_WARNING("-Wc++98-compat")                             \
-  DOCTEST_CLANG_SUPPRESS_WARNING("-Wc++98-compat-pedantic")                    \
-                                                                               \
-  DOCTEST_GCC_SUPPRESS_WARNING_PUSH                                            \
-  DOCTEST_GCC_SUPPRESS_WARNING("-Wunknown-pragmas")                            \
-  DOCTEST_GCC_SUPPRESS_WARNING("-Wpragmas")                                    \
-  DOCTEST_GCC_SUPPRESS_WARNING("-Weffc++")                                     \
-  DOCTEST_GCC_SUPPRESS_WARNING("-Wstrict-overflow")                            \
-  DOCTEST_GCC_SUPPRESS_WARNING("-Wstrict-aliasing")                            \
-  DOCTEST_GCC_SUPPRESS_WARNING("-Wmissing-declarations")                       \
-  DOCTEST_GCC_SUPPRESS_WARNING("-Wunused-local-typedefs")                      \
-  DOCTEST_GCC_SUPPRESS_WARNING("-Wuseless-cast")                               \
-  DOCTEST_GCC_SUPPRESS_WARNING("-Wnoexcept")                                   \
-                                                                               \
-  DOCTEST_MSVC_SUPPRESS_WARNING_PUSH                                           \
-  /* these 4 also disabled globally via cmake: */                              \
-  DOCTEST_MSVC_SUPPRESS_WARNING(                                               \
-      4514) /* unreferenced inline function has been removed */                \
-  DOCTEST_MSVC_SUPPRESS_WARNING(4571) /* SEH related */                        \
-  DOCTEST_MSVC_SUPPRESS_WARNING(4710) /* function not inlined */               \
-  DOCTEST_MSVC_SUPPRESS_WARNING(                                               \
-      4711) /* function selected for inline expansion*/                        \
-  /* */                                                                        \
-  DOCTEST_MSVC_SUPPRESS_WARNING(4616) /* invalid compiler warning */           \
-  DOCTEST_MSVC_SUPPRESS_WARNING(4619) /* invalid compiler warning */           \
-  DOCTEST_MSVC_SUPPRESS_WARNING(                                               \
-      4996) /* The compiler encountered a deprecated declaration */            \
-  DOCTEST_MSVC_SUPPRESS_WARNING(                                               \
-      4706) /* assignment within conditional expression */                     \
-  DOCTEST_MSVC_SUPPRESS_WARNING(                                               \
-      4512) /* 'class' : assignment operator could not be generated */         \
-  DOCTEST_MSVC_SUPPRESS_WARNING(4127) /* conditional expression is constant */ \
-  DOCTEST_MSVC_SUPPRESS_WARNING(4820) /* padding */                            \
-  DOCTEST_MSVC_SUPPRESS_WARNING(                                               \
-      4625) /* copy constructor was implicitly deleted */                      \
-  DOCTEST_MSVC_SUPPRESS_WARNING(                                               \
-      4626) /* assignment operator was implicitly deleted */                   \
-  DOCTEST_MSVC_SUPPRESS_WARNING(                                               \
-      5027) /* move assignment operator implicitly deleted */                  \
-  DOCTEST_MSVC_SUPPRESS_WARNING(                                               \
-      5026) /* move constructor was implicitly deleted */                      \
-  DOCTEST_MSVC_SUPPRESS_WARNING(                                               \
-      4640) /* construction of local static object not thread-safe */          \
-  DOCTEST_MSVC_SUPPRESS_WARNING(5045) /* Spectre mitigation for memory load */ \
-  /* static analysis */                                                        \
-  DOCTEST_MSVC_SUPPRESS_WARNING(                                               \
-      26439) /* Function may not throw. Declare it 'noexcept' */               \
-  DOCTEST_MSVC_SUPPRESS_WARNING(                                               \
-      26495) /* Always initialize a member variable */                         \
-  DOCTEST_MSVC_SUPPRESS_WARNING(26451) /* Arithmetic overflow ... */           \
-  DOCTEST_MSVC_SUPPRESS_WARNING(                                               \
-      26444) /* Avoid unnamed objects with custom ctor and dtor... */          \
-  DOCTEST_MSVC_SUPPRESS_WARNING(26812) /* Prefer 'enum class' over 'enum' */
-
-#define DOCTEST_SUPPRESS_COMMON_WARNINGS_POP \
-  DOCTEST_CLANG_SUPPRESS_WARNING_POP         \
-  DOCTEST_GCC_SUPPRESS_WARNING_POP           \
-  DOCTEST_MSVC_SUPPRESS_WARNING_POP
-
-DOCTEST_SUPPRESS_COMMON_WARNINGS_PUSH
-
-DOCTEST_CLANG_SUPPRESS_WARNING_PUSH
-DOCTEST_CLANG_SUPPRESS_WARNING("-Wnon-virtual-dtor")
-DOCTEST_CLANG_SUPPRESS_WARNING("-Wdeprecated")
-
-DOCTEST_GCC_SUPPRESS_WARNING_PUSH
-DOCTEST_GCC_SUPPRESS_WARNING("-Wctor-dtor-privacy")
-DOCTEST_GCC_SUPPRESS_WARNING("-Wnon-virtual-dtor")
-DOCTEST_GCC_SUPPRESS_WARNING("-Wsign-promo")
-
-DOCTEST_MSVC_SUPPRESS_WARNING_PUSH
-DOCTEST_MSVC_SUPPRESS_WARNING(
-    4623)  // default constructor was implicitly defined as deleted
-
-#define DOCTEST_MAKE_STD_HEADERS_CLEAN_FROM_WARNINGS_ON_WALL_BEGIN             \
-  DOCTEST_MSVC_SUPPRESS_WARNING_PUSH                                           \
-  DOCTEST_MSVC_SUPPRESS_WARNING(                                               \
-      4548) /* before comma no effect; expected side - effect */               \
-  DOCTEST_MSVC_SUPPRESS_WARNING(                                               \
-      4265) /* virtual functions, but destructor is not virtual */             \
-  DOCTEST_MSVC_SUPPRESS_WARNING(                                               \
-      4986) /* exception specification does not match previous */              \
-  DOCTEST_MSVC_SUPPRESS_WARNING(                                               \
-      4350) /* 'member1' called instead of 'member2' */                        \
-  DOCTEST_MSVC_SUPPRESS_WARNING(                                               \
-      4668) /* not defined as a preprocessor macro */                          \
-  DOCTEST_MSVC_SUPPRESS_WARNING(4365) /* signed/unsigned mismatch */           \
-  DOCTEST_MSVC_SUPPRESS_WARNING(4774) /* format string not a string literal */ \
-  DOCTEST_MSVC_SUPPRESS_WARNING(4820) /* padding */                            \
-  DOCTEST_MSVC_SUPPRESS_WARNING(                                               \
-      4625) /* copy constructor was implicitly deleted */                      \
-  DOCTEST_MSVC_SUPPRESS_WARNING(                                               \
-      4626) /* assignment operator was implicitly deleted */                   \
-  DOCTEST_MSVC_SUPPRESS_WARNING(                                               \
-      5027) /* move assignment operator implicitly deleted */                  \
-  DOCTEST_MSVC_SUPPRESS_WARNING(                                               \
-      5026) /* move constructor was implicitly deleted */                      \
-  DOCTEST_MSVC_SUPPRESS_WARNING(                                               \
-      4623) /* default constructor was implicitly deleted */                   \
-  DOCTEST_MSVC_SUPPRESS_WARNING(                                               \
-      5039) /* pointer to pot. throwing function passed to extern C */         \
-  DOCTEST_MSVC_SUPPRESS_WARNING(5045) /* Spectre mitigation for memory load */ \
-  DOCTEST_MSVC_SUPPRESS_WARNING(                                               \
-      5105) /* macro producing 'defined' has undefined behavior */
-
-#define DOCTEST_MAKE_STD_HEADERS_CLEAN_FROM_WARNINGS_ON_WALL_END \
-  DOCTEST_MSVC_SUPPRESS_WARNING_POP
-
-// =================================================================================================
-// == FEATURE DETECTION
-// ============================================================================
-// =================================================================================================
-
-// general compiler feature support table:
-// https://en.cppreference.com/w/cpp/compiler_support MSVC C++11 feature support
-// table: https://msdn.microsoft.com/en-us/library/hh567368.aspx GCC C++11
-// feature support table: https://gcc.gnu.org/projects/cxx-status.html MSVC
-// version table:
-// https://en.wikipedia.org/wiki/Microsoft_Visual_C%2B%2B#Internal_version_numbering
-// MSVC++ 14.3 (17) _MSC_VER == 1930 (Visual Studio 2022)
-// MSVC++ 14.2 (16) _MSC_VER == 1920 (Visual Studio 2019)
-// MSVC++ 14.1 (15) _MSC_VER == 1910 (Visual Studio 2017)
-// MSVC++ 14.0      _MSC_VER == 1900 (Visual Studio 2015)
-// MSVC++ 12.0      _MSC_VER == 1800 (Visual Studio 2013)
-// MSVC++ 11.0      _MSC_VER == 1700 (Visual Studio 2012)
-// MSVC++ 10.0      _MSC_VER == 1600 (Visual Studio 2010)
-// MSVC++ 9.0       _MSC_VER == 1500 (Visual Studio 2008)
-// MSVC++ 8.0       _MSC_VER == 1400 (Visual Studio 2005)
-
-// Universal Windows Platform support
-#if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_APP)
-#define DOCTEST_CONFIG_NO_WINDOWS_SEH
-#endif  // WINAPI_FAMILY
-#if DOCTEST_MSVC && !defined(DOCTEST_CONFIG_WINDOWS_SEH)
-#define DOCTEST_CONFIG_WINDOWS_SEH
-#endif  // MSVC
-#if defined(DOCTEST_CONFIG_NO_WINDOWS_SEH) && \
-    defined(DOCTEST_CONFIG_WINDOWS_SEH)
-#undef DOCTEST_CONFIG_WINDOWS_SEH
-#endif  // DOCTEST_CONFIG_NO_WINDOWS_SEH
-
-#if !defined(_WIN32) && !defined(__QNX__) && \
-    !defined(DOCTEST_CONFIG_POSIX_SIGNALS) && !defined(__EMSCRIPTEN__)
-#define DOCTEST_CONFIG_POSIX_SIGNALS
-#endif  // _WIN32
-#if defined(DOCTEST_CONFIG_NO_POSIX_SIGNALS) && \
-    defined(DOCTEST_CONFIG_POSIX_SIGNALS)
-#undef DOCTEST_CONFIG_POSIX_SIGNALS
-#endif  // DOCTEST_CONFIG_NO_POSIX_SIGNALS
-
-#ifndef DOCTEST_CONFIG_NO_EXCEPTIONS
-#if !defined(__cpp_exceptions) && !defined(__EXCEPTIONS) && !defined(_CPPUNWIND)
-#define DOCTEST_CONFIG_NO_EXCEPTIONS
-#endif  // no exceptions
-#endif  // DOCTEST_CONFIG_NO_EXCEPTIONS
-
-#ifdef DOCTEST_CONFIG_NO_EXCEPTIONS_BUT_WITH_ALL_ASSERTS
-#ifndef DOCTEST_CONFIG_NO_EXCEPTIONS
-#define DOCTEST_CONFIG_NO_EXCEPTIONS
-#endif  // DOCTEST_CONFIG_NO_EXCEPTIONS
-#endif  // DOCTEST_CONFIG_NO_EXCEPTIONS_BUT_WITH_ALL_ASSERTS
-
-#if defined(DOCTEST_CONFIG_NO_EXCEPTIONS) && \
-    !defined(DOCTEST_CONFIG_NO_TRY_CATCH_IN_ASSERTS)
-#define DOCTEST_CONFIG_NO_TRY_CATCH_IN_ASSERTS
-#endif  // DOCTEST_CONFIG_NO_EXCEPTIONS &&
-        // !DOCTEST_CONFIG_NO_TRY_CATCH_IN_ASSERTS
-
-#if defined(DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN) && \
-    !defined(DOCTEST_CONFIG_IMPLEMENT)
-#define DOCTEST_CONFIG_IMPLEMENT
-#endif  // DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
-
-#if defined(_WIN32) || defined(__CYGWIN__)
-#if DOCTEST_MSVC
-#define DOCTEST_SYMBOL_EXPORT __declspec(dllexport)
-#define DOCTEST_SYMBOL_IMPORT __declspec(dllimport)
-#else  // MSVC
-#define DOCTEST_SYMBOL_EXPORT __attribute__((dllexport))
-#define DOCTEST_SYMBOL_IMPORT __attribute__((dllimport))
-#endif  // MSVC
-#else   // _WIN32
-#define DOCTEST_SYMBOL_EXPORT __attribute__((visibility("default")))
-#define DOCTEST_SYMBOL_IMPORT
-#endif  // _WIN32
-
-#ifdef DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
-#ifdef DOCTEST_CONFIG_IMPLEMENT
-#define DOCTEST_INTERFACE DOCTEST_SYMBOL_EXPORT
-#else  // DOCTEST_CONFIG_IMPLEMENT
-#define DOCTEST_INTERFACE DOCTEST_SYMBOL_IMPORT
-#endif  // DOCTEST_CONFIG_IMPLEMENT
-#else   // DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
-#define DOCTEST_INTERFACE
-#endif  // DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
-
-#define DOCTEST_EMPTY
-
-#if DOCTEST_MSVC
-#define DOCTEST_NOINLINE __declspec(noinline)
-#define DOCTEST_UNUSED
-#define DOCTEST_ALIGNMENT(x)
-#elif DOCTEST_CLANG && DOCTEST_CLANG < DOCTEST_COMPILER(3, 5, 0)
-#define DOCTEST_NOINLINE
-#define DOCTEST_UNUSED
-#define DOCTEST_ALIGNMENT(x)
-#else
-#define DOCTEST_NOINLINE __attribute__((noinline))
-#define DOCTEST_UNUSED __attribute__((unused))
-#define DOCTEST_ALIGNMENT(x) __attribute__((aligned(x)))
-#endif
-
-#ifndef DOCTEST_NORETURN
-#if DOCTEST_MSVC && (DOCTEST_MSVC < DOCTEST_COMPILER(19, 0, 0))
-#define DOCTEST_NORETURN
-#else  // DOCTEST_MSVC
-#define DOCTEST_NORETURN [[noreturn]]
-#endif  // DOCTEST_MSVC
-#endif  // DOCTEST_NORETURN
-
-#ifndef DOCTEST_NOEXCEPT
-#if DOCTEST_MSVC && (DOCTEST_MSVC < DOCTEST_COMPILER(19, 0, 0))
-#define DOCTEST_NOEXCEPT
-#else  // DOCTEST_MSVC
-#define DOCTEST_NOEXCEPT noexcept
-#endif  // DOCTEST_MSVC
-#endif  // DOCTEST_NOEXCEPT
-
-#ifndef DOCTEST_CONSTEXPR
-#if DOCTEST_MSVC && (DOCTEST_MSVC < DOCTEST_COMPILER(19, 0, 0))
-#define DOCTEST_CONSTEXPR const
-#else  // DOCTEST_MSVC
-#define DOCTEST_CONSTEXPR constexpr
-#endif  // DOCTEST_MSVC
-#endif  // DOCTEST_CONSTEXPR
-
-// =================================================================================================
-// == FEATURE DETECTION END
-// ========================================================================
-// =================================================================================================
-
-// internal macros for string concatenation and anonymous variable name
-// generation
-#define DOCTEST_CAT_IMPL(s1, s2) s1##s2
-#define DOCTEST_CAT(s1, s2) DOCTEST_CAT_IMPL(s1, s2)
-#ifdef __COUNTER__  // not standard and may be missing for some compilers
-#define DOCTEST_ANONYMOUS(x) DOCTEST_CAT(x, __COUNTER__)
-#else  // __COUNTER__
-#define DOCTEST_ANONYMOUS(x) DOCTEST_CAT(x, __LINE__)
-#endif  // __COUNTER__
-
-#ifndef DOCTEST_CONFIG_ASSERTION_PARAMETERS_BY_VALUE
-#define DOCTEST_REF_WRAP(x) x&
-#else  // DOCTEST_CONFIG_ASSERTION_PARAMETERS_BY_VALUE
-#define DOCTEST_REF_WRAP(x) x
-#endif  // DOCTEST_CONFIG_ASSERTION_PARAMETERS_BY_VALUE
-
-// not using __APPLE__ because... this is how Catch does it
-#ifdef __MAC_OS_X_VERSION_MIN_REQUIRED
-#define DOCTEST_PLATFORM_MAC
-#elif defined(__IPHONE_OS_VERSION_MIN_REQUIRED)
-#define DOCTEST_PLATFORM_IPHONE
-#elif defined(_WIN32)
-#define DOCTEST_PLATFORM_WINDOWS
-#else  // DOCTEST_PLATFORM
-#define DOCTEST_PLATFORM_LINUX
-#endif  // DOCTEST_PLATFORM
-
-namespace doctest { namespace detail {
-static DOCTEST_CONSTEXPR int
-consume(const int*, int)
-{
-  return 0;
-}
-}}  // namespace doctest::detail
-
-#define DOCTEST_GLOBAL_NO_WARNINGS(var, ...)                          \
-  DOCTEST_CLANG_SUPPRESS_WARNING_WITH_PUSH("-Wglobal-constructors")   \
-  static const int var = doctest::detail::consume(&var, __VA_ARGS__); \
-  DOCTEST_CLANG_SUPPRESS_WARNING_POP
-
-#ifndef DOCTEST_BREAK_INTO_DEBUGGER
-// should probably take a look at https://github.com/scottt/debugbreak
-#ifdef DOCTEST_PLATFORM_LINUX
-#if defined(__GNUC__) && (defined(__i386) || defined(__x86_64))
-// Break at the location of the failing check if possible
-#define DOCTEST_BREAK_INTO_DEBUGGER() \
-  __asm__("int $3\n" : :)  // NOLINT (hicpp-no-assembler)
-#else
-#include <signal.h>
-#define DOCTEST_BREAK_INTO_DEBUGGER() raise(SIGTRAP)
-#endif
-#elif defined(DOCTEST_PLATFORM_MAC)
-#if defined(__x86_64) || defined(__x86_64__) || defined(__amd64__) || \
-    defined(__i386)
-#define DOCTEST_BREAK_INTO_DEBUGGER() \
-  __asm__("int $3\n" : :)  // NOLINT (hicpp-no-assembler)
-#else
-#define DOCTEST_BREAK_INTO_DEBUGGER() \
-  __asm__("brk #0");  // NOLINT (hicpp-no-assembler)
-#endif
-#elif DOCTEST_MSVC
-#define DOCTEST_BREAK_INTO_DEBUGGER() __debugbreak()
-#elif defined(__MINGW32__)
-DOCTEST_GCC_SUPPRESS_WARNING_WITH_PUSH("-Wredundant-decls")
-extern "C" __declspec(dllimport) void __stdcall DebugBreak();
-DOCTEST_GCC_SUPPRESS_WARNING_POP
-#define DOCTEST_BREAK_INTO_DEBUGGER() ::DebugBreak()
-#else  // linux
-#define DOCTEST_BREAK_INTO_DEBUGGER() (static_cast<void>(0))
-#endif  // linux
-#endif  // DOCTEST_BREAK_INTO_DEBUGGER
-
-// this is kept here for backwards compatibility since the config option was
-// changed
-#ifdef DOCTEST_CONFIG_USE_IOSFWD
-#define DOCTEST_CONFIG_USE_STD_HEADERS
-#endif  // DOCTEST_CONFIG_USE_IOSFWD
-
-// for clang - always include ciso646 (which drags some std stuff) because
-// we want to check if we are using libc++ with the _LIBCPP_VERSION macro in
-// which case we don't want to forward declare stuff from std - for reference:
-// https://github.com/doctest/doctest/issues/126
-// https://github.com/doctest/doctest/issues/356
-#if DOCTEST_CLANG
-#include <ciso646>
-#ifdef _LIBCPP_VERSION
-#define DOCTEST_CONFIG_USE_STD_HEADERS
-#endif  // _LIBCPP_VERSION
-#endif  // clang
-
-#ifdef DOCTEST_CONFIG_USE_STD_HEADERS
-#ifndef DOCTEST_CONFIG_INCLUDE_TYPE_TRAITS
-#define DOCTEST_CONFIG_INCLUDE_TYPE_TRAITS
-#endif  // DOCTEST_CONFIG_INCLUDE_TYPE_TRAITS
-#include <cstddef>
-#include <istream>
-#include <ostream>
-#else  // DOCTEST_CONFIG_USE_STD_HEADERS
-
-// Forward declaring 'X' in namespace std is not permitted by the C++ Standard.
-DOCTEST_MSVC_SUPPRESS_WARNING_WITH_PUSH(4643)
-
-namespace std {  // NOLINT (cert-dcl58-cpp)
-typedef decltype(nullptr) nullptr_t;
-template <class charT>
-struct char_traits;
-template <>
-struct char_traits<char>;
-template <class charT, class traits>
-class basic_ostream;
-typedef basic_ostream<char, char_traits<char>> ostream;
-template <class charT, class traits>
-class basic_istream;
-typedef basic_istream<char, char_traits<char>> istream;
-template <class... Types>
-class tuple;
-#if DOCTEST_MSVC >= DOCTEST_COMPILER(19, 20, 0)
-// see this issue on why this is needed:
-// https://github.com/doctest/doctest/issues/183
-template <class Ty>
-class allocator;
-template <class Elem, class Traits, class Alloc>
-class basic_string;
-using string = basic_string<char, char_traits<char>, allocator<char>>;
-#endif  // VS 2019
-}  // namespace std
-
-DOCTEST_MSVC_SUPPRESS_WARNING_POP
-
-#endif  // DOCTEST_CONFIG_USE_STD_HEADERS
-
-#ifdef DOCTEST_CONFIG_INCLUDE_TYPE_TRAITS
-#include <type_traits>
-#endif  // DOCTEST_CONFIG_INCLUDE_TYPE_TRAITS
-
-namespace doctest {
-
-DOCTEST_INTERFACE extern bool is_running_in_test;
-
-// A 24 byte string class (can be as small as 17 for x64 and 13 for x86) that
-// can hold strings with length of up to 23 chars on the stack before going on
-// the heap - the last byte of the buffer is used for:
-// - "is small" bit - the highest bit - if "0" then it is small - otherwise its
-// "1" (128)
-// - if small - capacity left before going on the heap - using the lowest 5 bits
-// - if small - 2 bits are left unused - the second and third highest ones
-// - if small - acts as a null terminator if strlen() is 23 (24 including the
-// null terminator)
-//              and the "is small" bit remains "0" ("as well as the capacity
-//              left") so its OK
-// Idea taken from this lecture about the string implementation of
-// facebook/folly - fbstring https://www.youtube.com/watch?v=kPR8h4-qZdk
-// TODO:
-// - optimizations - like not deleting memory unnecessarily in operator= and
-// etc.
-// - resize/reserve/clear
-// - substr
-// - replace
-// - back/front
-// - iterator stuff
-// - find & friends
-// - push_back/pop_back
-// - assign/insert/erase
-// - relational operators as free functions - taking const char* as one of the
-// params
-class DOCTEST_INTERFACE String {
-  static const unsigned len = 24;        //! OCLINT avoid private static members
-  static const unsigned last = len - 1;  //! OCLINT avoid private static members
-
-  struct view  // len should be more than sizeof(view) - because of the final
-               // byte for flags
-  {
-    char* ptr;
-    unsigned size;
-    unsigned capacity;
-  };
-
-  union {
-    char buf[len];
-    view data;
-  };
-
-  char* allocate(unsigned sz);
-
-  bool isOnStack() const { return (buf[last] & 128) == 0; }
-  void setOnHeap();
-  void setLast(unsigned in = last);
-
-  void copy(const String& other);
-
- public:
-  String();
-  ~String();
-
-  // cppcheck-suppress noExplicitConstructor
-  String(const char* in);
-  String(const char* in, unsigned in_size);
-
-  String(std::istream& in, unsigned in_size);
-
-  String(const String& other);
-  String& operator=(const String& other);
-
-  String& operator+=(const String& other);
-
-  String(String&& other);
-  String& operator=(String&& other);
-
-  char operator[](unsigned i) const;
-  char& operator[](unsigned i);
-
-  // the only functions I'm willing to leave in the interface - available for
-  // inlining
-  const char* c_str() const
-  {
-    return const_cast<String*>(this)->c_str();
-  }  // NOLINT
-  char* c_str()
-  {
-    if (isOnStack())
-      return reinterpret_cast<char*>(buf);
-    return data.ptr;
-  }
-
-  unsigned size() const;
-  unsigned capacity() const;
-
-  int compare(const char* other, bool no_case = false) const;
-  int compare(const String& other, bool no_case = false) const;
-};
-
-DOCTEST_INTERFACE String operator+(const String& lhs, const String& rhs);
-
-DOCTEST_INTERFACE bool operator==(const String& lhs, const String& rhs);
-DOCTEST_INTERFACE bool operator!=(const String& lhs, const String& rhs);
-DOCTEST_INTERFACE bool operator<(const String& lhs, const String& rhs);
-DOCTEST_INTERFACE bool operator>(const String& lhs, const String& rhs);
-DOCTEST_INTERFACE bool operator<=(const String& lhs, const String& rhs);
-DOCTEST_INTERFACE bool operator>=(const String& lhs, const String& rhs);
-
-DOCTEST_INTERFACE std::ostream& operator<<(std::ostream& s, const String& in);
-
-namespace Color {
-enum Enum {
-  None = 0,
-  White,
-  Red,
-  Green,
-  Blue,
-  Cyan,
-  Yellow,
-  Grey,
-
-  Bright = 0x10,
-
-  BrightRed = Bright | Red,
-  BrightGreen = Bright | Green,
-  LightGrey = Bright | Grey,
-  BrightWhite = Bright | White
-};
-
-DOCTEST_INTERFACE std::ostream& operator<<(std::ostream& s, Color::Enum code);
-}  // namespace Color
-
-namespace assertType {
-enum Enum {
-  // macro traits
-
-  is_warn = 1,
-  is_check = 2 * is_warn,
-  is_require = 2 * is_check,
-
-  is_normal = 2 * is_require,
-  is_throws = 2 * is_normal,
-  is_throws_as = 2 * is_throws,
-  is_throws_with = 2 * is_throws_as,
-  is_nothrow = 2 * is_throws_with,
-
-  is_false = 2 * is_nothrow,
-  is_unary =
-      2 *
-      is_false,  // not checked anywhere - used just to distinguish the types
-
-  is_eq = 2 * is_unary,
-  is_ne = 2 * is_eq,
-
-  is_lt = 2 * is_ne,
-  is_gt = 2 * is_lt,
-
-  is_ge = 2 * is_gt,
-  is_le = 2 * is_ge,
-
-  // macro types
-
-  DT_WARN = is_normal | is_warn,
-  DT_CHECK = is_normal | is_check,
-  DT_REQUIRE = is_normal | is_require,
-
-  DT_WARN_FALSE = is_normal | is_false | is_warn,
-  DT_CHECK_FALSE = is_normal | is_false | is_check,
-  DT_REQUIRE_FALSE = is_normal | is_false | is_require,
-
-  DT_WARN_THROWS = is_throws | is_warn,
-  DT_CHECK_THROWS = is_throws | is_check,
-  DT_REQUIRE_THROWS = is_throws | is_require,
-
-  DT_WARN_THROWS_AS = is_throws_as | is_warn,
-  DT_CHECK_THROWS_AS = is_throws_as | is_check,
-  DT_REQUIRE_THROWS_AS = is_throws_as | is_require,
-
-  DT_WARN_THROWS_WITH = is_throws_with | is_warn,
-  DT_CHECK_THROWS_WITH = is_throws_with | is_check,
-  DT_REQUIRE_THROWS_WITH = is_throws_with | is_require,
-
-  DT_WARN_THROWS_WITH_AS = is_throws_with | is_throws_as | is_warn,
-  DT_CHECK_THROWS_WITH_AS = is_throws_with | is_throws_as | is_check,
-  DT_REQUIRE_THROWS_WITH_AS = is_throws_with | is_throws_as | is_require,
-
-  DT_WARN_NOTHROW = is_nothrow | is_warn,
-  DT_CHECK_NOTHROW = is_nothrow | is_check,
-  DT_REQUIRE_NOTHROW = is_nothrow | is_require,
-
-  DT_WARN_EQ = is_normal | is_eq | is_warn,
-  DT_CHECK_EQ = is_normal | is_eq | is_check,
-  DT_REQUIRE_EQ = is_normal | is_eq | is_require,
-
-  DT_WARN_NE = is_normal | is_ne | is_warn,
-  DT_CHECK_NE = is_normal | is_ne | is_check,
-  DT_REQUIRE_NE = is_normal | is_ne | is_require,
-
-  DT_WARN_GT = is_normal | is_gt | is_warn,
-  DT_CHECK_GT = is_normal | is_gt | is_check,
-  DT_REQUIRE_GT = is_normal | is_gt | is_require,
-
-  DT_WARN_LT = is_normal | is_lt | is_warn,
-  DT_CHECK_LT = is_normal | is_lt | is_check,
-  DT_REQUIRE_LT = is_normal | is_lt | is_require,
-
-  DT_WARN_GE = is_normal | is_ge | is_warn,
-  DT_CHECK_GE = is_normal | is_ge | is_check,
-  DT_REQUIRE_GE = is_normal | is_ge | is_require,
-
-  DT_WARN_LE = is_normal | is_le | is_warn,
-  DT_CHECK_LE = is_normal | is_le | is_check,
-  DT_REQUIRE_LE = is_normal | is_le | is_require,
-
-  DT_WARN_UNARY = is_normal | is_unary | is_warn,
-  DT_CHECK_UNARY = is_normal | is_unary | is_check,
-  DT_REQUIRE_UNARY = is_normal | is_unary | is_require,
-
-  DT_WARN_UNARY_FALSE = is_normal | is_false | is_unary | is_warn,
-  DT_CHECK_UNARY_FALSE = is_normal | is_false | is_unary | is_check,
-  DT_REQUIRE_UNARY_FALSE = is_normal | is_false | is_unary | is_require,
-};
-}  // namespace assertType
-
-DOCTEST_INTERFACE const char* assertString(assertType::Enum at);
-DOCTEST_INTERFACE const char* failureString(assertType::Enum at);
-DOCTEST_INTERFACE const char* skipPathFromFilename(const char* file);
-
-struct DOCTEST_INTERFACE TestCaseData {
-  String m_file;    // the file in which the test was registered (using String -
-                    // see #350)
-  unsigned m_line;  // the line where the test was registered
-  const char* m_name;        // name of the test case
-  const char* m_test_suite;  // the test suite in which the test was added
-  const char* m_description;
-  bool m_skip;
-  bool m_no_breaks;
-  bool m_no_output;
-  bool m_may_fail;
-  bool m_should_fail;
-  int m_expected_failures;
-  double m_timeout;
-};
-
-struct DOCTEST_INTERFACE AssertData {
-  // common - for all asserts
-  const TestCaseData* m_test_case;
-  assertType::Enum m_at;
-  const char* m_file;
-  int m_line;
-  const char* m_expr;
-  bool m_failed;
-
-  // exception-related - for all asserts
-  bool m_threw;
-  String m_exception;
-
-  // for normal asserts
-  String m_decomp;
-
-  // for specific exception-related asserts
-  bool m_threw_as;
-  const char* m_exception_type;
-  const char* m_exception_string;
-};
-
-struct DOCTEST_INTERFACE MessageData {
-  String m_string;
-  const char* m_file;
-  int m_line;
-  assertType::Enum m_severity;
-};
-
-struct DOCTEST_INTERFACE SubcaseSignature {
-  String m_name;
-  const char* m_file;
-  int m_line;
-
-  bool operator<(const SubcaseSignature& other) const;
-};
-
-struct DOCTEST_INTERFACE IContextScope {
-  IContextScope();
-  virtual ~IContextScope();
-  virtual void stringify(std::ostream*) const = 0;
-};
-
-namespace detail {
-struct DOCTEST_INTERFACE TestCase;
-}  // namespace detail
-
-struct ContextOptions  //! OCLINT too many fields
-{
-  std::ostream* cout = nullptr;  // stdout stream
-  String binary_name;            // the test binary name
-
-  const detail::TestCase* currentTest = nullptr;
-
-  // == parameters from the command line
-  String out;          // output filename
-  String order_by;     // how tests should be ordered
-  unsigned rand_seed;  // the seed for rand ordering
-
-  unsigned first;  // the first (matching) test to be executed
-  unsigned last;   // the last (matching) test to be executed
-
-  int abort_after;            // stop tests after this many failed assertions
-  int subcase_filter_levels;  // apply the subcase filters for the first N
-                              // levels
-
-  bool success;         // include successful assertions in output
-  bool case_sensitive;  // if filtering should be case sensitive
-  bool
-      exit;  // if the program should be exited after the tests are ran/whatever
-  bool duration;     // print the time duration of each test case
-  bool minimal;      // minimal console output (only test failures)
-  bool quiet;        // no console output
-  bool no_throw;     // to skip exceptions-related assertion macros
-  bool no_exitcode;  // if the framework should return 0 as the exitcode
-  bool no_run;  // to not run the tests at all (can be done with an "*" exclude)
-  bool no_intro;       // to not print the intro of the framework
-  bool no_version;     // to not print the version of the framework
-  bool no_colors;      // if output to the console should be colorized
-  bool force_colors;   // forces the use of colors even when a tty cannot be
-                       // detected
-  bool no_breaks;      // to not break into the debugger
-  bool no_skip;        // don't skip test cases which are marked to be skipped
-  bool gnu_file_line;  // if line numbers should be surrounded with :x: and not
-                       // (x):
-  bool no_path_in_filenames;  // if the path to files should be removed from the
-                              // output
-  bool no_line_numbers;  // if source code line numbers should be omitted from
-                         // the output
-  bool no_debug_output;  // no output in the debug console when a debugger is
-                         // attached
-  bool no_skipped_summary;  // don't print "skipped" in the summary !!!
-                            // UNDOCUMENTED !!!
-  bool no_time_in_output;   // omit any time/timestamps from output !!!
-                            // UNDOCUMENTED !!!
-
-  bool help;     // to print the help
-  bool version;  // to print the version
-  bool count;    // if only the count of matching tests is to be retrieved
-  bool list_test_cases;   // to list all tests matching the filters
-  bool list_test_suites;  // to list all suites matching the filters
-  bool list_reporters;    // lists all registered reporters
-};
-
-namespace detail {
-template <bool CONDITION, typename TYPE = void>
-struct enable_if {};
-
-template <typename TYPE>
-struct enable_if<true, TYPE> {
-  typedef TYPE type;
-};
-
-// clang-format off
-    template<class T> struct remove_reference      { typedef T type; };
-    template<class T> struct remove_reference<T&>  { typedef T type; };
-    template<class T> struct remove_reference<T&&> { typedef T type; };
-
-    template<typename T, typename U = T&&> U declval(int);
-
-    template<typename T> T declval(long);
-
-    template<typename T> auto declval() DOCTEST_NOEXCEPT -> decltype(declval<T>(0)) ;
-
-    template<class T> struct is_lvalue_reference { const static bool value=false; };
-    template<class T> struct is_lvalue_reference<T&> { const static bool value=true; };
-
-    template<class T> struct is_rvalue_reference { const static bool value=false; };
-    template<class T> struct is_rvalue_reference<T&&> { const static bool value=true; };
-
-    template <class T>
-    inline T&& forward(typename remove_reference<T>::type& t) DOCTEST_NOEXCEPT
-    {
-        return static_cast<T&&>(t);
-    }
-
-    template <class T>
-    inline T&& forward(typename remove_reference<T>::type&& t) DOCTEST_NOEXCEPT
-    {
-        static_assert(!is_lvalue_reference<T>::value,
-                        "Can not forward an rvalue as an lvalue.");
-        return static_cast<T&&>(t);
-    }
-
-    template<class T> struct remove_const          { typedef T type; };
-    template<class T> struct remove_const<const T> { typedef T type; };
-#ifdef DOCTEST_CONFIG_INCLUDE_TYPE_TRAITS
-    template<class T> struct is_enum : public std::is_enum<T> {};
-    template<class T> struct underlying_type : public std::underlying_type<T> {};
-#else
-    // Use compiler intrinsics
-    template<class T> struct is_enum { DOCTEST_CONSTEXPR static bool value = __is_enum(T); };
-    template<class T> struct underlying_type { typedef __underlying_type(T) type; };
-#endif
-// clang-format on
-
-template <typename T>
-struct deferred_false
-// cppcheck-suppress unusedStructMember
-{
-  static const bool value = false;
-};
-
-namespace has_insertion_operator_impl {
-std::ostream& os();
-template <class T>
-DOCTEST_REF_WRAP(T)
-val();
-
-template <class, class = void>
-struct check {
-  static DOCTEST_CONSTEXPR bool value = false;
-};
-
-template <class T>
-struct check<T, decltype(os() << val<T>(), void())> {
-  static DOCTEST_CONSTEXPR bool value = true;
-};
-}  // namespace has_insertion_operator_impl
-
-template <class T>
-using has_insertion_operator = has_insertion_operator_impl::check<const T>;
-
-DOCTEST_INTERFACE std::ostream* tlssPush();
-DOCTEST_INTERFACE String tlssPop();
-
-
-template <bool C>
-struct StringMakerBase {
-  template <typename T>
-  static String convert(const DOCTEST_REF_WRAP(T))
-  {
-    return "{?}";
-  }
-};
-
-// Vector<int> and various type other than pointer or array.
-template <typename T>
-struct filldata {
-  static void fill(std::ostream* stream, const T& in) { *stream << in; }
-};
-
-template <typename T, unsigned long N>
-struct filldata<T[N]> {
-  static void fill(std::ostream* stream, const T (&in)[N])
-  {
-    for (unsigned long i = 0; i < N; i++) {
-      *stream << in[i];
-    }
-  }
-};
-
-// Specialized since we don't want the terminating null byte!
-template <unsigned long N>
-struct filldata<const char[N]> {
-  static void fill(std::ostream* stream, const char (&in)[N]) { *stream << in; }
-};
-
-template <typename T>
-void
-filloss(std::ostream* stream, const T& in)
-{
-  filldata<T>::fill(stream, in);
-}
-
-template <typename T, unsigned long N>
-void
-filloss(std::ostream* stream, const T (&in)[N])
-{
-  // T[N], T(&)[N], T(&&)[N] have same behaviour.
-  // Hence remove reference.
-  filldata<typename remove_reference<decltype(in)>::type>::fill(stream, in);
-}
-
-template <>
-struct StringMakerBase<true> {
-  template <typename T>
-  static String convert(const DOCTEST_REF_WRAP(T) in)
-  {
-    /* When parameter "in" is a null terminated const char* it works.
-     * When parameter "in" is a T arr[N] without '\0' we can fill the
-     * stringstream with N objects (T=char).If in is char pointer *
-     * without '\0' , it would cause segfault
-     * stepping over unaccessible memory.
-     */
-
-    std::ostream* stream = tlssPush();
-    filloss(stream, in);
-    return tlssPop();
-  }
-};
-
-DOCTEST_INTERFACE String rawMemoryToString(const void* object, unsigned size);
-
-template <typename T>
-String
-rawMemoryToString(const DOCTEST_REF_WRAP(T) object)
-{
-  return rawMemoryToString(&object, sizeof(object));
-}
-
-template <typename T>
-const char*
-type_to_string()
-{
-  return "<>";
-}
-}  // namespace detail
-
-template <typename T>
-struct StringMaker
-    : public detail::StringMakerBase<detail::has_insertion_operator<T>::value> {
-};
-
-template <typename T>
-struct StringMaker<T*> {
-  template <typename U>
-  static String convert(U* p)
-  {
-    if (p)
-      return detail::rawMemoryToString(p);
-    return "NULL";
-  }
-};
-
-template <typename R, typename C>
-struct StringMaker<R C::*> {
-  static String convert(R C::*p)
-  {
-    if (p)
-      return detail::rawMemoryToString(p);
-    return "NULL";
-  }
-};
-
-template <
-    typename T,
-    typename detail::enable_if<!detail::is_enum<T>::value, bool>::type = true>
-String
-toString(const DOCTEST_REF_WRAP(T) value)
-{
-  return StringMaker<T>::convert(value);
-}
-
-#ifdef DOCTEST_CONFIG_TREAT_CHAR_STAR_AS_STRING
-DOCTEST_INTERFACE String toString(char* in);
-DOCTEST_INTERFACE String toString(const char* in);
-#endif  // DOCTEST_CONFIG_TREAT_CHAR_STAR_AS_STRING
-DOCTEST_INTERFACE String toString(bool in);
-DOCTEST_INTERFACE String toString(float in);
-DOCTEST_INTERFACE String toString(double in);
-DOCTEST_INTERFACE String toString(double long in);
-
-DOCTEST_INTERFACE String toString(char in);
-DOCTEST_INTERFACE String toString(char signed in);
-DOCTEST_INTERFACE String toString(char unsigned in);
-DOCTEST_INTERFACE String toString(int short in);
-DOCTEST_INTERFACE String toString(int short unsigned in);
-DOCTEST_INTERFACE String toString(int in);
-DOCTEST_INTERFACE String toString(int unsigned in);
-DOCTEST_INTERFACE String toString(int long in);
-DOCTEST_INTERFACE String toString(int long unsigned in);
-DOCTEST_INTERFACE String toString(int long long in);
-DOCTEST_INTERFACE String toString(int long long unsigned in);
-DOCTEST_INTERFACE String toString(std::nullptr_t in);
-
-template <
-    typename T,
-    typename detail::enable_if<detail::is_enum<T>::value, bool>::type = true>
-String
-toString(const DOCTEST_REF_WRAP(T) value)
-{
-  typedef typename detail::underlying_type<T>::type UT;
-  return toString(static_cast<UT>(value));
-}
-
-#if DOCTEST_MSVC >= DOCTEST_COMPILER(19, 20, 0)
-// see this issue on why this is needed:
-// https://github.com/doctest/doctest/issues/183
-DOCTEST_INTERFACE String toString(const std::string& in);
-#endif  // VS 2019
-
-class DOCTEST_INTERFACE Approx {
- public:
-  explicit Approx(double value);
-
-  Approx operator()(double value) const;
-
-#ifdef DOCTEST_CONFIG_INCLUDE_TYPE_TRAITS
-  template <typename T>
-  explicit Approx(
-      const T& value, typename detail::enable_if<
-                          std::is_constructible<double, T>::value>::type* =
-                          static_cast<T*>(nullptr))
-  {
-    *this = Approx(static_cast<double>(value));
-  }
-#endif  // DOCTEST_CONFIG_INCLUDE_TYPE_TRAITS
-
-  Approx& epsilon(double newEpsilon);
-
-#ifdef DOCTEST_CONFIG_INCLUDE_TYPE_TRAITS
-  template <typename T>
-  typename detail::enable_if<
-      std::is_constructible<double, T>::value, Approx&>::type
-  epsilon(const T& newEpsilon)
-  {
-    m_epsilon = static_cast<double>(newEpsilon);
-    return *this;
-  }
-#endif  //  DOCTEST_CONFIG_INCLUDE_TYPE_TRAITS
-
-  Approx& scale(double newScale);
-
-#ifdef DOCTEST_CONFIG_INCLUDE_TYPE_TRAITS
-  template <typename T>
-  typename detail::enable_if<
-      std::is_constructible<double, T>::value, Approx&>::type
-  scale(const T& newScale)
-  {
-    m_scale = static_cast<double>(newScale);
-    return *this;
-  }
-#endif  // DOCTEST_CONFIG_INCLUDE_TYPE_TRAITS
-
-  // clang-format off
-    DOCTEST_INTERFACE friend bool operator==(double lhs, const Approx & rhs);
-    DOCTEST_INTERFACE friend bool operator==(const Approx & lhs, double rhs);
-    DOCTEST_INTERFACE friend bool operator!=(double lhs, const Approx & rhs);
-    DOCTEST_INTERFACE friend bool operator!=(const Approx & lhs, double rhs);
-    DOCTEST_INTERFACE friend bool operator<=(double lhs, const Approx & rhs);
-    DOCTEST_INTERFACE friend bool operator<=(const Approx & lhs, double rhs);
-    DOCTEST_INTERFACE friend bool operator>=(double lhs, const Approx & rhs);
-    DOCTEST_INTERFACE friend bool operator>=(const Approx & lhs, double rhs);
-    DOCTEST_INTERFACE friend bool operator< (double lhs, const Approx & rhs);
-    DOCTEST_INTERFACE friend bool operator< (const Approx & lhs, double rhs);
-    DOCTEST_INTERFACE friend bool operator> (double lhs, const Approx & rhs);
-    DOCTEST_INTERFACE friend bool operator> (const Approx & lhs, double rhs);
-
-    DOCTEST_INTERFACE friend String toString(const Approx& in);
-
-#ifdef DOCTEST_CONFIG_INCLUDE_TYPE_TRAITS
-#define DOCTEST_APPROX_PREFIX \
-    template <typename T> friend typename detail::enable_if<std::is_constructible<double, T>::value, bool>::type
-
-    DOCTEST_APPROX_PREFIX operator==(const T& lhs, const Approx& rhs) { return operator==(double(lhs), rhs); }
-    DOCTEST_APPROX_PREFIX operator==(const Approx& lhs, const T& rhs) { return operator==(rhs, lhs); }
-    DOCTEST_APPROX_PREFIX operator!=(const T& lhs, const Approx& rhs) { return !operator==(lhs, rhs); }
-    DOCTEST_APPROX_PREFIX operator!=(const Approx& lhs, const T& rhs) { return !operator==(rhs, lhs); }
-    DOCTEST_APPROX_PREFIX operator<=(const T& lhs, const Approx& rhs) { return double(lhs) < rhs.m_value || lhs == rhs; }
-    DOCTEST_APPROX_PREFIX operator<=(const Approx& lhs, const T& rhs) { return lhs.m_value < double(rhs) || lhs == rhs; }
-    DOCTEST_APPROX_PREFIX operator>=(const T& lhs, const Approx& rhs) { return double(lhs) > rhs.m_value || lhs == rhs; }
-    DOCTEST_APPROX_PREFIX operator>=(const Approx& lhs, const T& rhs) { return lhs.m_value > double(rhs) || lhs == rhs; }
-    DOCTEST_APPROX_PREFIX operator< (const T& lhs, const Approx& rhs) { return double(lhs) < rhs.m_value && lhs != rhs; }
-    DOCTEST_APPROX_PREFIX operator< (const Approx& lhs, const T& rhs) { return lhs.m_value < double(rhs) && lhs != rhs; }
-    DOCTEST_APPROX_PREFIX operator> (const T& lhs, const Approx& rhs) { return double(lhs) > rhs.m_value && lhs != rhs; }
-    DOCTEST_APPROX_PREFIX operator> (const Approx& lhs, const T& rhs) { return lhs.m_value > double(rhs) && lhs != rhs; }
-#undef DOCTEST_APPROX_PREFIX
-#endif // DOCTEST_CONFIG_INCLUDE_TYPE_TRAITS
-
-  // clang-format on
-
- private:
-  double m_epsilon;
-  double m_scale;
-  double m_value;
-};
-
-DOCTEST_INTERFACE String toString(const Approx& in);
-
-DOCTEST_INTERFACE const ContextOptions* getContextOptions();
-
-#if !defined(DOCTEST_CONFIG_DISABLE)
-
-namespace detail {
-// clang-format off
-#ifdef DOCTEST_CONFIG_TREAT_CHAR_STAR_AS_STRING
-    template<class T>               struct decay_array       { typedef T type; };
-    template<class T, unsigned N>   struct decay_array<T[N]> { typedef T* type; };
-    template<class T>               struct decay_array<T[]>  { typedef T* type; };
-
-    template<class T>   struct not_char_pointer              { enum { value = 1 }; };
-    template<>          struct not_char_pointer<char*>       { enum { value = 0 }; };
-    template<>          struct not_char_pointer<const char*> { enum { value = 0 }; };
-
-    template<class T> struct can_use_op : public not_char_pointer<typename decay_array<T>::type> {};
-#endif // DOCTEST_CONFIG_TREAT_CHAR_STAR_AS_STRING
-// clang-format on
-
-struct DOCTEST_INTERFACE TestFailureException {};
-
-DOCTEST_INTERFACE bool checkIfShouldThrow(assertType::Enum at);
-
-#ifndef DOCTEST_CONFIG_NO_EXCEPTIONS
-DOCTEST_NORETURN
-#endif  // DOCTEST_CONFIG_NO_EXCEPTIONS
-DOCTEST_INTERFACE void throwException();
-
-struct DOCTEST_INTERFACE Subcase {
-  SubcaseSignature m_signature;
-  bool m_entered = false;
-
-  Subcase(const String& name, const char* file, int line);
-  ~Subcase();
-
-  operator bool() const;
-};
-
-template <typename L, typename R>
-String
-stringifyBinaryExpr(
-    const DOCTEST_REF_WRAP(L) lhs, const char* op,
-    const DOCTEST_REF_WRAP(R) rhs)
-{
-  // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
-  return toString(lhs) + op + toString(rhs);
-}
-
-#if DOCTEST_CLANG && DOCTEST_CLANG < DOCTEST_COMPILER(3, 6, 0)
-DOCTEST_CLANG_SUPPRESS_WARNING_WITH_PUSH("-Wunused-comparison")
-#endif
-
-// This will check if there is any way it could find a operator like member or
-// friend and uses it. If not it doesn't find the operator or if the operator at
-// global scope is defined after this template, the template won't be
-// instantiated due to SFINAE. Once the template is not instantiated it can look
-// for global operator using normal conversions.
-#define SFINAE_OP(ret, op) \
-  decltype((void)(doctest::detail::declval<L>() op doctest::detail::declval<R>()), ret{})
-
-#define DOCTEST_DO_BINARY_EXPRESSION_COMPARISON(op, op_str, op_macro)  \
-  template <typename R>                                                \
-  DOCTEST_NOINLINE SFINAE_OP(Result, op) operator op(const R&& rhs)    \
-  {                                                                    \
-    bool res = op_macro(                                               \
-        doctest::detail::forward<const L>(lhs),                        \
-        doctest::detail::forward<const R>(rhs));                       \
-    if (m_at & assertType::is_false)                                   \
-      res = !res;                                                      \
-    if (!res || doctest::getContextOptions()->success)                 \
-      return Result(res, stringifyBinaryExpr(lhs, op_str, rhs));       \
-    return Result(res);                                                \
-  }                                                                    \
-  template <                                                           \
-      typename R, typename enable_if<                                  \
-                      !doctest::detail::is_rvalue_reference<R>::value, \
-                      void>::type* = nullptr>                          \
-  DOCTEST_NOINLINE SFINAE_OP(Result, op) operator op(const R& rhs)     \
-  {                                                                    \
-    bool res = op_macro(doctest::detail::forward<const L>(lhs), rhs);  \
-    if (m_at & assertType::is_false)                                   \
-      res = !res;                                                      \
-    if (!res || doctest::getContextOptions()->success)                 \
-      return Result(res, stringifyBinaryExpr(lhs, op_str, rhs));       \
-    return Result(res);                                                \
-  }
-
-// more checks could be added - like in Catch:
-// https://github.com/catchorg/Catch2/pull/1480/files
-// https://github.com/catchorg/Catch2/pull/1481/files
-#define DOCTEST_FORBIT_EXPRESSION(rt, op)                               \
-  template <typename R>                                                 \
-  rt& operator op(const R&)                                             \
-  {                                                                     \
-    static_assert(                                                      \
-        deferred_false<R>::value,                                       \
-        "Expression Too Complex Please Rewrite As Binary Comparison!"); \
-    return *this;                                                       \
-  }
-
-struct DOCTEST_INTERFACE Result {
-  bool m_passed;
-  String m_decomp;
-
-  Result() = default;
-  Result(bool passed, const String& decomposition = String());
-
-  // forbidding some expressions based on this table:
-  // https://en.cppreference.com/w/cpp/language/operator_precedence
-  DOCTEST_FORBIT_EXPRESSION(Result, &)
-  DOCTEST_FORBIT_EXPRESSION(Result, ^)
-  DOCTEST_FORBIT_EXPRESSION(Result, |)
-  DOCTEST_FORBIT_EXPRESSION(Result, &&)
-  DOCTEST_FORBIT_EXPRESSION(Result, ||)
-  DOCTEST_FORBIT_EXPRESSION(Result, ==)
-  DOCTEST_FORBIT_EXPRESSION(Result, !=)
-  DOCTEST_FORBIT_EXPRESSION(Result, <)
-  DOCTEST_FORBIT_EXPRESSION(Result, >)
-  DOCTEST_FORBIT_EXPRESSION(Result, <=)
-  DOCTEST_FORBIT_EXPRESSION(Result, >=)
-  DOCTEST_FORBIT_EXPRESSION(Result, =)
-  DOCTEST_FORBIT_EXPRESSION(Result, +=)
-  DOCTEST_FORBIT_EXPRESSION(Result, -=)
-  DOCTEST_FORBIT_EXPRESSION(Result, *=)
-  DOCTEST_FORBIT_EXPRESSION(Result, /=)
-  DOCTEST_FORBIT_EXPRESSION(Result, %=)
-  DOCTEST_FORBIT_EXPRESSION(Result, <<=)
-  DOCTEST_FORBIT_EXPRESSION(Result, >>=)
-  DOCTEST_FORBIT_EXPRESSION(Result, &=)
-  DOCTEST_FORBIT_EXPRESSION(Result, ^=)
-  DOCTEST_FORBIT_EXPRESSION(Result, |=)
-};
-
-#ifndef DOCTEST_CONFIG_NO_COMPARISON_WARNING_SUPPRESSION
-
-DOCTEST_CLANG_SUPPRESS_WARNING_PUSH
-DOCTEST_CLANG_SUPPRESS_WARNING("-Wsign-conversion")
-DOCTEST_CLANG_SUPPRESS_WARNING("-Wsign-compare")
-// DOCTEST_CLANG_SUPPRESS_WARNING("-Wdouble-promotion")
-// DOCTEST_CLANG_SUPPRESS_WARNING("-Wconversion")
-// DOCTEST_CLANG_SUPPRESS_WARNING("-Wfloat-equal")
-
-DOCTEST_GCC_SUPPRESS_WARNING_PUSH
-DOCTEST_GCC_SUPPRESS_WARNING("-Wsign-conversion")
-DOCTEST_GCC_SUPPRESS_WARNING("-Wsign-compare")
-// DOCTEST_GCC_SUPPRESS_WARNING("-Wdouble-promotion")
-// DOCTEST_GCC_SUPPRESS_WARNING("-Wconversion")
-// DOCTEST_GCC_SUPPRESS_WARNING("-Wfloat-equal")
-
-DOCTEST_MSVC_SUPPRESS_WARNING_PUSH
-// https://stackoverflow.com/questions/39479163 what's the difference between
-// 4018 and 4389
-DOCTEST_MSVC_SUPPRESS_WARNING(4388)  // signed/unsigned mismatch
-DOCTEST_MSVC_SUPPRESS_WARNING(4389)  // 'operator' : signed/unsigned mismatch
-DOCTEST_MSVC_SUPPRESS_WARNING(4018)  // 'expression' : signed/unsigned mismatch
-// DOCTEST_MSVC_SUPPRESS_WARNING(4805) // 'operation' : unsafe mix of type
-// 'type' and type 'type' in operation
-
-#endif  // DOCTEST_CONFIG_NO_COMPARISON_WARNING_SUPPRESSION
-
-// clang-format off
-#ifndef DOCTEST_CONFIG_TREAT_CHAR_STAR_AS_STRING
-#define DOCTEST_COMPARISON_RETURN_TYPE bool
-#else // DOCTEST_CONFIG_TREAT_CHAR_STAR_AS_STRING
-#define DOCTEST_COMPARISON_RETURN_TYPE typename enable_if<can_use_op<L>::value || can_use_op<R>::value, bool>::type
-    // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
-    inline bool eq(const char* lhs, const char* rhs) { return String(lhs) == String(rhs); }
-    inline bool ne(const char* lhs, const char* rhs) { return String(lhs) != String(rhs); }
-    inline bool lt(const char* lhs, const char* rhs) { return String(lhs) <  String(rhs); }
-    inline bool gt(const char* lhs, const char* rhs) { return String(lhs) >  String(rhs); }
-    inline bool le(const char* lhs, const char* rhs) { return String(lhs) <= String(rhs); }
-    inline bool ge(const char* lhs, const char* rhs) { return String(lhs) >= String(rhs); }
-#endif // DOCTEST_CONFIG_TREAT_CHAR_STAR_AS_STRING
-// clang-format on
-
-#define DOCTEST_RELATIONAL_OP(name, op)                             \
-  template <typename L, typename R>                                 \
-  DOCTEST_COMPARISON_RETURN_TYPE name(                              \
-      const DOCTEST_REF_WRAP(L) lhs, const DOCTEST_REF_WRAP(R) rhs) \
-  {                                                                 \
-    return lhs op rhs;                                              \
-  }
-
-DOCTEST_RELATIONAL_OP(eq, ==)
-DOCTEST_RELATIONAL_OP(ne, !=)
-DOCTEST_RELATIONAL_OP(lt, <)
-DOCTEST_RELATIONAL_OP(gt, >)
-DOCTEST_RELATIONAL_OP(le, <=)
-DOCTEST_RELATIONAL_OP(ge, >=)
-
-#ifndef DOCTEST_CONFIG_TREAT_CHAR_STAR_AS_STRING
-#define DOCTEST_CMP_EQ(l, r) l == r
-#define DOCTEST_CMP_NE(l, r) l != r
-#define DOCTEST_CMP_GT(l, r) l > r
-#define DOCTEST_CMP_LT(l, r) l < r
-#define DOCTEST_CMP_GE(l, r) l >= r
-#define DOCTEST_CMP_LE(l, r) l <= r
-#else  // DOCTEST_CONFIG_TREAT_CHAR_STAR_AS_STRING
-#define DOCTEST_CMP_EQ(l, r) eq(l, r)
-#define DOCTEST_CMP_NE(l, r) ne(l, r)
-#define DOCTEST_CMP_GT(l, r) gt(l, r)
-#define DOCTEST_CMP_LT(l, r) lt(l, r)
-#define DOCTEST_CMP_GE(l, r) ge(l, r)
-#define DOCTEST_CMP_LE(l, r) le(l, r)
-#endif  // DOCTEST_CONFIG_TREAT_CHAR_STAR_AS_STRING
-
-template <typename L>
-// cppcheck-suppress copyCtorAndEqOperator
-struct Expression_lhs {
-  L lhs;
-  assertType::Enum m_at;
-
-  explicit Expression_lhs(L&& in, assertType::Enum at)
-      : lhs(doctest::detail::forward<L>(in)), m_at(at)
-  {
-  }
-
-  DOCTEST_NOINLINE operator Result()
-  {
-    // this is needed only for MSVC 2015
-    DOCTEST_MSVC_SUPPRESS_WARNING_WITH_PUSH(
-        4800)  // 'int': forcing value to bool
-    bool res = static_cast<bool>(lhs);
-    DOCTEST_MSVC_SUPPRESS_WARNING_POP
-    if (m_at & assertType::is_false)  //! OCLINT bitwise operator in conditional
-      res = !res;
-
-    if (!res || getContextOptions()->success)
-      return Result(res, toString(lhs));
-    return Result(res);
-  }
-
-  /* This is required for user-defined conversions from Expression_lhs to L */
-  operator L() const { return lhs; }
-
-  // clang-format off
-        DOCTEST_DO_BINARY_EXPRESSION_COMPARISON(==, " == ", DOCTEST_CMP_EQ) //!OCLINT bitwise operator in conditional
-        DOCTEST_DO_BINARY_EXPRESSION_COMPARISON(!=, " != ", DOCTEST_CMP_NE) //!OCLINT bitwise operator in conditional
-        DOCTEST_DO_BINARY_EXPRESSION_COMPARISON(>,  " >  ", DOCTEST_CMP_GT) //!OCLINT bitwise operator in conditional
-        DOCTEST_DO_BINARY_EXPRESSION_COMPARISON(<,  " <  ", DOCTEST_CMP_LT) //!OCLINT bitwise operator in conditional
-        DOCTEST_DO_BINARY_EXPRESSION_COMPARISON(>=, " >= ", DOCTEST_CMP_GE) //!OCLINT bitwise operator in conditional
-        DOCTEST_DO_BINARY_EXPRESSION_COMPARISON(<=, " <= ", DOCTEST_CMP_LE) //!OCLINT bitwise operator in conditional
-  // clang-format on
-
-  // forbidding some expressions based on this table:
-  // https://en.cppreference.com/w/cpp/language/operator_precedence
-  DOCTEST_FORBIT_EXPRESSION(Expression_lhs, &)
-  DOCTEST_FORBIT_EXPRESSION(Expression_lhs, ^)
-  DOCTEST_FORBIT_EXPRESSION(Expression_lhs, |)
-  DOCTEST_FORBIT_EXPRESSION(Expression_lhs, &&)
-  DOCTEST_FORBIT_EXPRESSION(Expression_lhs, ||)
-  DOCTEST_FORBIT_EXPRESSION(Expression_lhs, =)
-  DOCTEST_FORBIT_EXPRESSION(Expression_lhs, +=)
-  DOCTEST_FORBIT_EXPRESSION(Expression_lhs, -=)
-  DOCTEST_FORBIT_EXPRESSION(Expression_lhs, *=)
-  DOCTEST_FORBIT_EXPRESSION(Expression_lhs, /=)
-  DOCTEST_FORBIT_EXPRESSION(Expression_lhs, %=)
-  DOCTEST_FORBIT_EXPRESSION(Expression_lhs, <<=)
-  DOCTEST_FORBIT_EXPRESSION(Expression_lhs, >>=)
-  DOCTEST_FORBIT_EXPRESSION(Expression_lhs, &=)
-  DOCTEST_FORBIT_EXPRESSION(Expression_lhs, ^=)
-  DOCTEST_FORBIT_EXPRESSION(Expression_lhs, |=)
-  // these 2 are unfortunate because they should be allowed - they have higher
-  // precedence over the comparisons, but the ExpressionDecomposer class uses
-  // the left shift operator to capture the left operand of the binary
-  // expression...
-  DOCTEST_FORBIT_EXPRESSION(Expression_lhs, <<)
-  DOCTEST_FORBIT_EXPRESSION(Expression_lhs, >>)
-};
-
-#ifndef DOCTEST_CONFIG_NO_COMPARISON_WARNING_SUPPRESSION
-
-DOCTEST_CLANG_SUPPRESS_WARNING_POP
-DOCTEST_MSVC_SUPPRESS_WARNING_POP
-DOCTEST_GCC_SUPPRESS_WARNING_POP
-
-#endif  // DOCTEST_CONFIG_NO_COMPARISON_WARNING_SUPPRESSION
-
-#if DOCTEST_CLANG && DOCTEST_CLANG < DOCTEST_COMPILER(3, 6, 0)
-DOCTEST_CLANG_SUPPRESS_WARNING_POP
-#endif
-
-struct DOCTEST_INTERFACE ExpressionDecomposer {
-  assertType::Enum m_at;
-
-  ExpressionDecomposer(assertType::Enum at);
-
-  // The right operator for capturing expressions is "<=" instead of "<<" (based
-  // on the operator precedence table) but then there will be warnings from GCC
-  // about "-Wparentheses" and since "_Pragma()" is problematic this will stay
-  // for now... https://github.com/catchorg/Catch2/issues/870
-  // https://github.com/catchorg/Catch2/issues/565
-  template <typename L>
-  Expression_lhs<const L> operator<<(const L&& operand)
-  {
-    return Expression_lhs<const L>(
-        doctest::detail::forward<const L>(operand), m_at);
-  }
-
-  template <
-      typename L, typename enable_if<
-                      !doctest::detail::is_rvalue_reference<L>::value,
-                      void>::type* = nullptr>
-  Expression_lhs<const L&> operator<<(const L& operand)
-  {
-    return Expression_lhs<const L&>(operand, m_at);
-  }
-};
-
-struct DOCTEST_INTERFACE TestSuite {
-  const char* m_test_suite = nullptr;
-  const char* m_description = nullptr;
-  bool m_skip = false;
-  bool m_no_breaks = false;
-  bool m_no_output = false;
-  bool m_may_fail = false;
-  bool m_should_fail = false;
-  int m_expected_failures = 0;
-  double m_timeout = 0;
-
-  TestSuite& operator*(const char* in);
-
-  template <typename T>
-  TestSuite& operator*(const T& in)
-  {
-    in.fill(*this);
-    return *this;
-  }
-};
-
-typedef void (*funcType)();
-
-struct DOCTEST_INTERFACE TestCase : public TestCaseData {
-  funcType m_test;  // a function pointer to the test case
-
-  const char*
-      m_type;  // for templated test cases - gets appended to the real name
-  int m_template_id;   // an ID used to distinguish between the different
-                       // versions of a templated test case
-  String m_full_name;  // contains the name (only for templated test cases!) +
-                       // the template type
-
-  TestCase(
-      funcType test, const char* file, unsigned line,
-      const TestSuite& test_suite, const char* type = "", int template_id = -1);
-
-  TestCase(const TestCase& other);
-
-  DOCTEST_MSVC_SUPPRESS_WARNING_WITH_PUSH(
-      26434)  // hides a non-virtual function
-  TestCase& operator=(const TestCase& other);
-  DOCTEST_MSVC_SUPPRESS_WARNING_POP
-
-  TestCase& operator*(const char* in);
-
-  template <typename T>
-  TestCase& operator*(const T& in)
-  {
-    in.fill(*this);
-    return *this;
-  }
-
-  bool operator<(const TestCase& other) const;
-};
-
-// forward declarations of functions used by the macros
-DOCTEST_INTERFACE int regTest(const TestCase& tc);
-DOCTEST_INTERFACE int setTestSuite(const TestSuite& ts);
-DOCTEST_INTERFACE bool isDebuggerActive();
-
-template <typename T>
-int
-instantiationHelper(const T&)
-{
-  return 0;
-}
-
-namespace binaryAssertComparison {
-enum Enum { eq = 0, ne, gt, lt, ge, le };
-}  // namespace binaryAssertComparison
-
-// clang-format off
-    template <int, class L, class R> struct RelationalComparator     { bool operator()(const DOCTEST_REF_WRAP(L),     const DOCTEST_REF_WRAP(R)    ) const { return false;        } };
-
-#define DOCTEST_BINARY_RELATIONAL_OP(n, op) \
-    template <class L, class R> struct RelationalComparator<n, L, R> { bool operator()(const DOCTEST_REF_WRAP(L) lhs, const DOCTEST_REF_WRAP(R) rhs) const { return op(lhs, rhs); } };
-// clang-format on
-
-DOCTEST_BINARY_RELATIONAL_OP(0, doctest::detail::eq)
-DOCTEST_BINARY_RELATIONAL_OP(1, doctest::detail::ne)
-DOCTEST_BINARY_RELATIONAL_OP(2, doctest::detail::gt)
-DOCTEST_BINARY_RELATIONAL_OP(3, doctest::detail::lt)
-DOCTEST_BINARY_RELATIONAL_OP(4, doctest::detail::ge)
-DOCTEST_BINARY_RELATIONAL_OP(5, doctest::detail::le)
-
-struct DOCTEST_INTERFACE ResultBuilder : public AssertData {
-  ResultBuilder(
-      assertType::Enum at, const char* file, int line, const char* expr,
-      const char* exception_type = "", const char* exception_string = "");
-
-  void setResult(const Result& res);
-
-  template <int comparison, typename L, typename R>
-  DOCTEST_NOINLINE bool binary_assert(
-      const DOCTEST_REF_WRAP(L) lhs, const DOCTEST_REF_WRAP(R) rhs)
-  {
-    m_failed = !RelationalComparator<comparison, L, R>()(lhs, rhs);
-    if (m_failed || getContextOptions()->success)
-      m_decomp = stringifyBinaryExpr(lhs, ", ", rhs);
-    return !m_failed;
-  }
-
-  template <typename L>
-  DOCTEST_NOINLINE bool unary_assert(const DOCTEST_REF_WRAP(L) val)
-  {
-    m_failed = !val;
-
-    if (m_at & assertType::is_false)  //! OCLINT bitwise operator in conditional
-      m_failed = !m_failed;
-
-    if (m_failed || getContextOptions()->success)
-      m_decomp = toString(val);
-
-    return !m_failed;
-  }
-
-  void translateException();
-
-  bool log();
-  void react() const;
-};
-
-namespace assertAction {
-enum Enum { nothing = 0, dbgbreak = 1, shouldthrow = 2 };
-}  // namespace assertAction
-
-DOCTEST_INTERFACE void failed_out_of_a_testing_context(const AssertData& ad);
-
-DOCTEST_INTERFACE bool decomp_assert(
-    assertType::Enum at, const char* file, int line, const char* expr,
-    Result result);
-
-#define DOCTEST_ASSERT_OUT_OF_TESTS(decomp)                        \
-  do {                                                             \
-    if (!is_running_in_test) {                                     \
-      if (failed) {                                                \
-        ResultBuilder rb(at, file, line, expr);                    \
-        rb.m_failed = failed;                                      \
-        rb.m_decomp = decomp;                                      \
-        failed_out_of_a_testing_context(rb);                       \
-        if (isDebuggerActive() && !getContextOptions()->no_breaks) \
-          DOCTEST_BREAK_INTO_DEBUGGER();                           \
-        if (checkIfShouldThrow(at))                                \
-          throwException();                                        \
-      }                                                            \
-      return !failed;                                              \
-    }                                                              \
-  } while (false)
-
-#define DOCTEST_ASSERT_IN_TESTS(decomp)            \
-  ResultBuilder rb(at, file, line, expr);          \
-  rb.m_failed = failed;                            \
-  if (rb.m_failed || getContextOptions()->success) \
-    rb.m_decomp = decomp;                          \
-  if (rb.log())                                    \
-    DOCTEST_BREAK_INTO_DEBUGGER();                 \
-  if (rb.m_failed && checkIfShouldThrow(at))       \
-  throwException()
-
-template <int comparison, typename L, typename R>
-DOCTEST_NOINLINE bool
-binary_assert(
-    assertType::Enum at, const char* file, int line, const char* expr,
-    const DOCTEST_REF_WRAP(L) lhs, const DOCTEST_REF_WRAP(R) rhs)
-{
-  bool failed = !RelationalComparator<comparison, L, R>()(lhs, rhs);
-
-  // ###################################################################################
-  // IF THE DEBUGGER BREAKS HERE - GO 1 LEVEL UP IN THE CALLSTACK FOR THE
-  // FAILING ASSERT THIS IS THE EFFECT OF HAVING
-  // 'DOCTEST_CONFIG_SUPER_FAST_ASSERTS' DEFINED
-  // ###################################################################################
-  DOCTEST_ASSERT_OUT_OF_TESTS(stringifyBinaryExpr(lhs, ", ", rhs));
-  DOCTEST_ASSERT_IN_TESTS(stringifyBinaryExpr(lhs, ", ", rhs));
-  return !failed;
-}
-
-template <typename L>
-DOCTEST_NOINLINE bool
-unary_assert(
-    assertType::Enum at, const char* file, int line, const char* expr,
-    const DOCTEST_REF_WRAP(L) val)
-{
-  bool failed = !val;
-
-  if (at & assertType::is_false)  //! OCLINT bitwise operator in conditional
-    failed = !failed;
-
-  // ###################################################################################
-  // IF THE DEBUGGER BREAKS HERE - GO 1 LEVEL UP IN THE CALLSTACK FOR THE
-  // FAILING ASSERT THIS IS THE EFFECT OF HAVING
-  // 'DOCTEST_CONFIG_SUPER_FAST_ASSERTS' DEFINED
-  // ###################################################################################
-  DOCTEST_ASSERT_OUT_OF_TESTS(toString(val));
-  DOCTEST_ASSERT_IN_TESTS(toString(val));
-  return !failed;
-}
-
-struct DOCTEST_INTERFACE IExceptionTranslator {
-  IExceptionTranslator();
-  virtual ~IExceptionTranslator();
-  virtual bool translate(String&) const = 0;
-};
-
-template <typename T>
-class ExceptionTranslator
-    : public IExceptionTranslator  //! OCLINT destructor of virtual class
-{
- public:
-  explicit ExceptionTranslator(String (*translateFunction)(T))
-      : m_translateFunction(translateFunction)
-  {
-  }
-
-  bool translate(String& res) const override
-  {
-#ifndef DOCTEST_CONFIG_NO_EXCEPTIONS
-    try {
-      throw;  // lgtm [cpp/rethrow-no-exception]
-              // cppcheck-suppress catchExceptionByValue
-    }
-    catch (T ex) {                    // NOLINT
-      res = m_translateFunction(ex);  //! OCLINT parameter reassignment
-      return true;
-    }
-    catch (...) {
-    }                        //! OCLINT -  empty catch statement
-#endif                       // DOCTEST_CONFIG_NO_EXCEPTIONS
-    static_cast<void>(res);  // to silence -Wunused-parameter
-    return false;
-  }
-
- private:
-  String (*m_translateFunction)(T);
-};
-
-DOCTEST_INTERFACE void registerExceptionTranslatorImpl(
-    const IExceptionTranslator* et);
-
-template <bool C>
-struct StringStreamBase {
-  template <typename T>
-  static void convert(std::ostream* s, const T& in)
-  {
-    *s << toString(in);
-  }
-
-  // always treat char* as a string in this context - no matter
-  // if DOCTEST_CONFIG_TREAT_CHAR_STAR_AS_STRING is defined
-  static void convert(std::ostream* s, const char* in) { *s << String(in); }
-};
-
-template <>
-struct StringStreamBase<true> {
-  template <typename T>
-  static void convert(std::ostream* s, const T& in)
-  {
-    *s << in;
-  }
-};
-
-template <typename T>
-struct StringStream
-    : public StringStreamBase<has_insertion_operator<T>::value> {};
-
-template <typename T>
-void
-toStream(std::ostream* s, const T& value)
-{
-  StringStream<T>::convert(s, value);
-}
-
-#ifdef DOCTEST_CONFIG_TREAT_CHAR_STAR_AS_STRING
-DOCTEST_INTERFACE void toStream(std::ostream* s, char* in);
-DOCTEST_INTERFACE void toStream(std::ostream* s, const char* in);
-#endif  // DOCTEST_CONFIG_TREAT_CHAR_STAR_AS_STRING
-DOCTEST_INTERFACE void toStream(std::ostream* s, bool in);
-DOCTEST_INTERFACE void toStream(std::ostream* s, float in);
-DOCTEST_INTERFACE void toStream(std::ostream* s, double in);
-DOCTEST_INTERFACE void toStream(std::ostream* s, double long in);
-
-DOCTEST_INTERFACE void toStream(std::ostream* s, char in);
-DOCTEST_INTERFACE void toStream(std::ostream* s, char signed in);
-DOCTEST_INTERFACE void toStream(std::ostream* s, char unsigned in);
-DOCTEST_INTERFACE void toStream(std::ostream* s, int short in);
-DOCTEST_INTERFACE void toStream(std::ostream* s, int short unsigned in);
-DOCTEST_INTERFACE void toStream(std::ostream* s, int in);
-DOCTEST_INTERFACE void toStream(std::ostream* s, int unsigned in);
-DOCTEST_INTERFACE void toStream(std::ostream* s, int long in);
-DOCTEST_INTERFACE void toStream(std::ostream* s, int long unsigned in);
-DOCTEST_INTERFACE void toStream(std::ostream* s, int long long in);
-DOCTEST_INTERFACE void toStream(std::ostream* s, int long long unsigned in);
-
-// ContextScope base class used to allow implementing methods of ContextScope
-// that don't depend on the template parameter in doctest.cpp.
-class DOCTEST_INTERFACE ContextScopeBase : public IContextScope {
- protected:
-  ContextScopeBase();
-  ContextScopeBase(ContextScopeBase&& other);
-
-  void destroy();
-  bool need_to_destroy{true};
-};
-
-template <typename L>
-class ContextScope : public ContextScopeBase {
-  const L lambda_;
-
- public:
-  explicit ContextScope(const L& lambda) : lambda_(lambda) {}
-
-  ContextScope(ContextScope&& other)
-      : ContextScopeBase(static_cast<ContextScopeBase&&>(other)),
-        lambda_(other.lambda_)
-  {
-  }
-
-  void stringify(std::ostream* s) const override { lambda_(s); }
-
-  ~ContextScope() override
-  {
-    if (need_to_destroy) {
-      destroy();
-    }
-  }
-};
-
-struct DOCTEST_INTERFACE MessageBuilder : public MessageData {
-  std::ostream* m_stream;
-  bool logged = false;
-
-  MessageBuilder(const char* file, int line, assertType::Enum severity);
-  MessageBuilder() = delete;
-  ~MessageBuilder();
-
-  // the preferred way of chaining parameters for stringification
-  template <typename T>
-  MessageBuilder& operator,(const T& in)
-  {
-    toStream(m_stream, in);
-    return *this;
-  }
-
-  // kept here just for backwards-compatibility - the comma operator should be
-  // preferred now
-  template <typename T>
-  MessageBuilder& operator<<(const T& in)
-  {
-    return this->operator,(in);
-  }
-
-  // the `,` operator has the lowest operator precedence - if `<<` is used by
-  // the user then the `,` operator will be called last which is not what we
-  // want and thus the `*` operator is used first (has higher operator
-  // precedence compared to `<<`) so that we guarantee that an operator of the
-  // MessageBuilder class is called first before the rest of the parameters
-  template <typename T>
-  MessageBuilder& operator*(const T& in)
-  {
-    return this->operator,(in);
-  }
-
-  bool log();
-  void react();
-};
-
-template <typename L>
-ContextScope<L>
-MakeContextScope(const L& lambda)
-{
-  return ContextScope<L>(lambda);
-}
-}  // namespace detail
-
-#define DOCTEST_DEFINE_DECORATOR(name, type, def) \
-  struct name {                                   \
-    type data;                                    \
-    name(type in = def) : data(in) {}             \
-    void fill(detail::TestCase& state) const      \
-    {                                             \
-      state.DOCTEST_CAT(m_, name) = data;         \
-    }                                             \
-    void fill(detail::TestSuite& state) const     \
-    {                                             \
-      state.DOCTEST_CAT(m_, name) = data;         \
-    }                                             \
-  }
-
-DOCTEST_DEFINE_DECORATOR(test_suite, const char*, "");
-DOCTEST_DEFINE_DECORATOR(description, const char*, "");
-DOCTEST_DEFINE_DECORATOR(skip, bool, true);
-DOCTEST_DEFINE_DECORATOR(no_breaks, bool, true);
-DOCTEST_DEFINE_DECORATOR(no_output, bool, true);
-DOCTEST_DEFINE_DECORATOR(timeout, double, 0);
-DOCTEST_DEFINE_DECORATOR(may_fail, bool, true);
-DOCTEST_DEFINE_DECORATOR(should_fail, bool, true);
-DOCTEST_DEFINE_DECORATOR(expected_failures, int, 0);
-
-template <typename T>
-int
-registerExceptionTranslator(String (*translateFunction)(T))
-{
-  DOCTEST_CLANG_SUPPRESS_WARNING_WITH_PUSH("-Wexit-time-destructors")
-  static detail::ExceptionTranslator<T> exceptionTranslator(translateFunction);
-  DOCTEST_CLANG_SUPPRESS_WARNING_POP
-  detail::registerExceptionTranslatorImpl(&exceptionTranslator);
-  return 0;
-}
-
-}  // namespace doctest
-
-// in a separate namespace outside of doctest because the DOCTEST_TEST_SUITE
-// macro introduces an anonymous namespace in which getCurrentTestSuite gets
-// overridden
-namespace doctest_detail_test_suite_ns {
-DOCTEST_INTERFACE doctest::detail::TestSuite& getCurrentTestSuite();
-}  // namespace doctest_detail_test_suite_ns
-
-namespace doctest {
-#else   // DOCTEST_CONFIG_DISABLE
-template <typename T>
-int
-registerExceptionTranslator(String (*)(T))
-{
-  return 0;
-}
-#endif  // DOCTEST_CONFIG_DISABLE
-
-namespace detail {
-typedef void (*assert_handler)(const AssertData&);
-struct ContextState;
-}  // namespace detail
-
-class DOCTEST_INTERFACE Context {
-  detail::ContextState* p;
-
-  void parseArgs(int argc, const char* const* argv, bool withDefaults = false);
-
- public:
-  explicit Context(int argc = 0, const char* const* argv = nullptr);
-
-  ~Context();
-
-  void applyCommandLine(int argc, const char* const* argv);
-
-  void addFilter(const char* filter, const char* value);
-  void clearFilters();
-  void setOption(const char* option, bool value);
-  void setOption(const char* option, int value);
-  void setOption(const char* option, const char* value);
-
-  bool shouldExit();
-
-  void setAsDefaultForAssertsOutOfTestCases();
-
-  void setAssertHandler(detail::assert_handler ah);
-
-  void setCout(std::ostream* out);
-
-  int run();
-};
-
-namespace TestCaseFailureReason {
-enum Enum {
-  None = 0,
-  AssertFailure = 1,               // an assertion has failed in the test case
-  Exception = 2,                   // test case threw an exception
-  Crash = 4,                       // a crash...
-  TooManyFailedAsserts = 8,        // the abort-after option
-  Timeout = 16,                    // see the timeout decorator
-  ShouldHaveFailedButDidnt = 32,   // see the should_fail decorator
-  ShouldHaveFailedAndDid = 64,     // see the should_fail decorator
-  DidntFailExactlyNumTimes = 128,  // see the expected_failures decorator
-  FailedExactlyNumTimes = 256,     // see the expected_failures decorator
-  CouldHaveFailedAndDid = 512      // see the may_fail decorator
-};
-}  // namespace TestCaseFailureReason
-
-struct DOCTEST_INTERFACE CurrentTestCaseStats {
-  int numAssertsCurrentTest;
-  int numAssertsFailedCurrentTest;
-  double seconds;
-  int failure_flags;  // use TestCaseFailureReason::Enum
-  bool testCaseSuccess;
-};
-
-struct DOCTEST_INTERFACE TestCaseException {
-  String error_string;
-  bool is_crash;
-};
-
-struct DOCTEST_INTERFACE TestRunStats {
-  unsigned numTestCases;
-  unsigned numTestCasesPassingFilters;
-  unsigned numTestSuitesPassingFilters;
-  unsigned numTestCasesFailed;
-  int numAsserts;
-  int numAssertsFailed;
-};
-
-struct QueryData {
-  const TestRunStats* run_stats = nullptr;
-  const TestCaseData** data = nullptr;
-  unsigned num_data = 0;
-};
-
-struct DOCTEST_INTERFACE IReporter {
-  // The constructor has to accept "const ContextOptions&" as a single argument
-  // which has most of the options for the run + a pointer to the stdout stream
-  // Reporter(const ContextOptions& in)
-
-  // called when a query should be reported (listing test cases, printing the
-  // version, etc.)
-  virtual void report_query(const QueryData&) = 0;
-
-  // called when the whole test run starts
-  virtual void test_run_start() = 0;
-  // called when the whole test run ends (caching a pointer to the input doesn't
-  // make sense here)
-  virtual void test_run_end(const TestRunStats&) = 0;
-
-  // called when a test case is started (safe to cache a pointer to the input)
-  virtual void test_case_start(const TestCaseData&) = 0;
-  // called when a test case is reentered because of unfinished subcases (safe
-  // to cache a pointer to the input)
-  virtual void test_case_reenter(const TestCaseData&) = 0;
-  // called when a test case has ended
-  virtual void test_case_end(const CurrentTestCaseStats&) = 0;
-
-  // called when an exception is thrown from the test case (or it crashes)
-  virtual void test_case_exception(const TestCaseException&) = 0;
-
-  // called whenever a subcase is entered (don't cache pointers to the input)
-  virtual void subcase_start(const SubcaseSignature&) = 0;
-  // called whenever a subcase is exited (don't cache pointers to the input)
-  virtual void subcase_end() = 0;
-
-  // called for each assert (don't cache pointers to the input)
-  virtual void log_assert(const AssertData&) = 0;
-  // called for each message (don't cache pointers to the input)
-  virtual void log_message(const MessageData&) = 0;
-
-  // called when a test case is skipped either because it doesn't pass the
-  // filters, has a skip decorator or isn't in the execution range (between
-  // first and last) (safe to cache a pointer to the input)
-  virtual void test_case_skipped(const TestCaseData&) = 0;
-
-  // doctest will not be managing the lifetimes of reporters given to it but
-  // this would still be nice to have
-  virtual ~IReporter();
-
-  // can obtain all currently active contexts and stringify them if one wishes
-  // to do so
-  static int get_num_active_contexts();
-  static const IContextScope* const* get_active_contexts();
-
-  // can iterate through contexts which have been stringified automatically in
-  // their destructors when an exception has been thrown
-  static int get_num_stringified_contexts();
-  static const String* get_stringified_contexts();
-};
-
-namespace detail {
-typedef IReporter* (*reporterCreatorFunc)(const ContextOptions&);
-
-DOCTEST_INTERFACE void registerReporterImpl(
-    const char* name, int prio, reporterCreatorFunc c, bool isReporter);
-
-template <typename Reporter>
-IReporter*
-reporterCreator(const ContextOptions& o)
-{
-  return new Reporter(o);
-}
-}  // namespace detail
-
-template <typename Reporter>
-int
-registerReporter(const char* name, int priority, bool isReporter)
-{
-  detail::registerReporterImpl(
-      name, priority, detail::reporterCreator<Reporter>, isReporter);
-  return 0;
-}
-}  // namespace doctest
-
-// if registering is not disabled
-#if !defined(DOCTEST_CONFIG_DISABLE)
-
-// common code in asserts - for convenience
-#define DOCTEST_ASSERT_LOG_REACT_RETURN(b) \
-  if (b.log())                             \
-    DOCTEST_BREAK_INTO_DEBUGGER();         \
-  b.react();                               \
-  return !b.m_failed
-
-#ifdef DOCTEST_CONFIG_NO_TRY_CATCH_IN_ASSERTS
-#define DOCTEST_WRAP_IN_TRY(x) x;
-#else  // DOCTEST_CONFIG_NO_TRY_CATCH_IN_ASSERTS
-#define DOCTEST_WRAP_IN_TRY(x)       \
-  try {                              \
-    x;                               \
-  }                                  \
-  catch (...) {                      \
-    DOCTEST_RB.translateException(); \
-  }
-#endif  // DOCTEST_CONFIG_NO_TRY_CATCH_IN_ASSERTS
-
-#ifdef DOCTEST_CONFIG_VOID_CAST_EXPRESSIONS
-#define DOCTEST_CAST_TO_VOID(...)                          \
-  DOCTEST_GCC_SUPPRESS_WARNING_WITH_PUSH("-Wuseless-cast") \
-  static_cast<void>(__VA_ARGS__);                          \
-  DOCTEST_GCC_SUPPRESS_WARNING_POP
-#else  // DOCTEST_CONFIG_VOID_CAST_EXPRESSIONS
-#define DOCTEST_CAST_TO_VOID(...) __VA_ARGS__;
-#endif  // DOCTEST_CONFIG_VOID_CAST_EXPRESSIONS
-
-// registers the test by initializing a dummy var with a function
-#define DOCTEST_REGISTER_FUNCTION(global_prefix, f, decorators)      \
-  global_prefix DOCTEST_GLOBAL_NO_WARNINGS(                          \
-      DOCTEST_ANONYMOUS(DOCTEST_ANON_VAR_),                          \
-      doctest::detail::regTest(                                      \
-          doctest::detail::TestCase(                                 \
-              f, __FILE__, __LINE__,                                 \
-              doctest_detail_test_suite_ns::getCurrentTestSuite()) * \
-          decorators))
-
-#define DOCTEST_IMPLEMENT_FIXTURE(der, base, func, decorators) \
-  namespace {                                                  \
-  struct der : public base {                                   \
-    void f();                                                  \
-  };                                                           \
-  static void func()                                           \
-  {                                                            \
-    der v;                                                     \
-    v.f();                                                     \
-  }                                                            \
-  DOCTEST_REGISTER_FUNCTION(DOCTEST_EMPTY, func, decorators)   \
-  }                                                            \
-  inline DOCTEST_NOINLINE void der::f()
-
-#define DOCTEST_CREATE_AND_REGISTER_FUNCTION(f, decorators) \
-  static void f();                                          \
-  DOCTEST_REGISTER_FUNCTION(DOCTEST_EMPTY, f, decorators)   \
-  static void f()
-
-#define DOCTEST_CREATE_AND_REGISTER_FUNCTION_IN_CLASS(f, proxy, decorators) \
-  static doctest::detail::funcType proxy()                                  \
-  {                                                                         \
-    return f;                                                               \
-  }                                                                         \
-  DOCTEST_REGISTER_FUNCTION(inline, proxy(), decorators)                    \
-  static void f()
-
-// for registering tests
-#define DOCTEST_TEST_CASE(decorators)   \
-  DOCTEST_CREATE_AND_REGISTER_FUNCTION( \
-      DOCTEST_ANONYMOUS(DOCTEST_ANON_FUNC_), decorators)
-
-// for registering tests in classes - requires C++17 for inline variables!
-#if __cplusplus >= 201703L || \
-    (DOCTEST_MSVC >= DOCTEST_COMPILER(19, 12, 0) && _MSVC_LANG >= 201703L)
-#define DOCTEST_TEST_CASE_CLASS(decorators)      \
-  DOCTEST_CREATE_AND_REGISTER_FUNCTION_IN_CLASS( \
-      DOCTEST_ANONYMOUS(DOCTEST_ANON_FUNC_),     \
-      DOCTEST_ANONYMOUS(DOCTEST_ANON_PROXY_), decorators)
-#else  // DOCTEST_TEST_CASE_CLASS
-#define DOCTEST_TEST_CASE_CLASS(...) \
-  TEST_CASES_CAN_BE_REGISTERED_IN_CLASSES_ONLY_IN_CPP17_MODE_OR_WITH_VS_2017_OR_NEWER
-#endif  // DOCTEST_TEST_CASE_CLASS
-
-// for registering tests with a fixture
-#define DOCTEST_TEST_CASE_FIXTURE(c, decorators) \
-  DOCTEST_IMPLEMENT_FIXTURE(                     \
-      DOCTEST_ANONYMOUS(DOCTEST_ANON_CLASS_), c, \
-      DOCTEST_ANONYMOUS(DOCTEST_ANON_FUNC_), decorators)
-
-// for converting types to strings without the <typeinfo> header and demangling
-#define DOCTEST_TYPE_TO_STRING_IMPL(...)           \
-  template <>                                      \
-  inline const char* type_to_string<__VA_ARGS__>() \
-  {                                                \
-    return "<" #__VA_ARGS__ ">";                   \
-  }
-#define DOCTEST_TYPE_TO_STRING(...)        \
-  namespace doctest { namespace detail {   \
-  DOCTEST_TYPE_TO_STRING_IMPL(__VA_ARGS__) \
-  }                                        \
-  }                                        \
-  static_assert(true, "")
-
-#define DOCTEST_TEST_CASE_TEMPLATE_DEFINE_IMPL(dec, T, iter, func) \
-  template <typename T>                                            \
-  static void func();                                              \
-  namespace {                                                      \
-  template <typename Tuple>                                        \
-  struct iter;                                                     \
-  template <typename Type, typename... Rest>                       \
-  struct iter<std::tuple<Type, Rest...>> {                         \
-    iter(const char* file, unsigned line, int index)               \
-    {                                                              \
-      doctest::detail::regTest(                                    \
-          doctest::detail::TestCase(                               \
-              func<Type>, file, line,                              \
-              doctest_detail_test_suite_ns::getCurrentTestSuite(), \
-              doctest::detail::type_to_string<Type>(),             \
-              int(line) * 1000 + index) *                          \
-          dec);                                                    \
-      iter<std::tuple<Rest...>>(file, line, index + 1);            \
-    }                                                              \
-  };                                                               \
-  template <>                                                      \
-  struct iter<std::tuple<>> {                                      \
-    iter(const char*, unsigned, int) {}                            \
-  };                                                               \
-  }                                                                \
-  template <typename T>                                            \
-  static void func()
-
-#define DOCTEST_TEST_CASE_TEMPLATE_DEFINE(dec, T, id) \
-  DOCTEST_TEST_CASE_TEMPLATE_DEFINE_IMPL(             \
-      dec, T, DOCTEST_CAT(id, ITERATOR), DOCTEST_ANONYMOUS(DOCTEST_ANON_TMP_))
-
-#define DOCTEST_TEST_CASE_TEMPLATE_INSTANTIATE_IMPL(id, anon, ...) \
-  DOCTEST_GLOBAL_NO_WARNINGS(                                      \
-      DOCTEST_CAT(anon, DUMMY),                                    \
-      doctest::detail::instantiationHelper(                        \
-          DOCTEST_CAT(id, ITERATOR) < __VA_ARGS__ > (__FILE__, __LINE__, 0)))
-
-#define DOCTEST_TEST_CASE_TEMPLATE_INVOKE(id, ...)                       \
-  DOCTEST_TEST_CASE_TEMPLATE_INSTANTIATE_IMPL(                           \
-      id, DOCTEST_ANONYMOUS(DOCTEST_ANON_TMP_), std::tuple<__VA_ARGS__>) \
-  static_assert(true, "")
-
-#define DOCTEST_TEST_CASE_TEMPLATE_APPLY(id, ...)            \
-  DOCTEST_TEST_CASE_TEMPLATE_INSTANTIATE_IMPL(               \
-      id, DOCTEST_ANONYMOUS(DOCTEST_ANON_TMP_), __VA_ARGS__) \
-  static_assert(true, "")
-
-#define DOCTEST_TEST_CASE_TEMPLATE_IMPL(dec, T, anon, ...) \
-  DOCTEST_TEST_CASE_TEMPLATE_DEFINE_IMPL(                  \
-      dec, T, DOCTEST_CAT(anon, ITERATOR), anon);          \
-  DOCTEST_TEST_CASE_TEMPLATE_INSTANTIATE_IMPL(             \
-      anon, anon, std::tuple<__VA_ARGS__>)                 \
-  template <typename T>                                    \
-  static void anon()
-
-#define DOCTEST_TEST_CASE_TEMPLATE(dec, T, ...) \
-  DOCTEST_TEST_CASE_TEMPLATE_IMPL(              \
-      dec, T, DOCTEST_ANONYMOUS(DOCTEST_ANON_TMP_), __VA_ARGS__)
-
-// for subcases
-#define DOCTEST_SUBCASE(name)                                       \
-  if (const doctest::detail::Subcase &                              \
-          DOCTEST_ANONYMOUS(DOCTEST_ANON_SUBCASE_) DOCTEST_UNUSED = \
-          doctest::detail::Subcase(name, __FILE__, __LINE__))
-
-// for grouping tests in test suites by using code blocks
-#define DOCTEST_TEST_SUITE_IMPL(decorators, ns_name)                        \
-  namespace ns_name { namespace doctest_detail_test_suite_ns {              \
-  static DOCTEST_NOINLINE doctest::detail::TestSuite& getCurrentTestSuite() \
-  {                                                                         \
-    DOCTEST_MSVC_SUPPRESS_WARNING_WITH_PUSH(4640)                           \
-    DOCTEST_CLANG_SUPPRESS_WARNING_WITH_PUSH("-Wexit-time-destructors")     \
-    DOCTEST_GCC_SUPPRESS_WARNING_WITH_PUSH("-Wmissing-field-initializers")  \
-    static doctest::detail::TestSuite data{};                               \
-    static bool inited = false;                                             \
-    DOCTEST_MSVC_SUPPRESS_WARNING_POP                                       \
-    DOCTEST_CLANG_SUPPRESS_WARNING_POP                                      \
-    DOCTEST_GCC_SUPPRESS_WARNING_POP                                        \
-    if (!inited) {                                                          \
-      data* decorators;                                                     \
-      inited = true;                                                        \
-    }                                                                       \
-    return data;                                                            \
-  }                                                                         \
-  }                                                                         \
-  }                                                                         \
-  namespace ns_name
-
-#define DOCTEST_TEST_SUITE(decorators) \
-  DOCTEST_TEST_SUITE_IMPL(decorators, DOCTEST_ANONYMOUS(DOCTEST_ANON_SUITE_))
-
-// for starting a testsuite block
-#define DOCTEST_TEST_SUITE_BEGIN(decorators)          \
-  DOCTEST_GLOBAL_NO_WARNINGS(                         \
-      DOCTEST_ANONYMOUS(DOCTEST_ANON_VAR_),           \
-      doctest::detail::setTestSuite(                  \
-          doctest::detail::TestSuite() * decorators)) \
-  static_assert(true, "")
-
-// for ending a testsuite block
-#define DOCTEST_TEST_SUITE_END                                          \
-  DOCTEST_GLOBAL_NO_WARNINGS(                                           \
-      DOCTEST_ANONYMOUS(DOCTEST_ANON_VAR_),                             \
-      doctest::detail::setTestSuite(doctest::detail::TestSuite() * "")) \
-  typedef int DOCTEST_ANONYMOUS(DOCTEST_ANON_FOR_SEMICOLON_)
-
-// for registering exception translators
-#define DOCTEST_REGISTER_EXCEPTION_TRANSLATOR_IMPL(translatorName, signature) \
-  inline doctest::String translatorName(signature);                           \
-  DOCTEST_GLOBAL_NO_WARNINGS(                                                 \
-      DOCTEST_ANONYMOUS(DOCTEST_ANON_TRANSLATOR_),                            \
-      doctest::registerExceptionTranslator(translatorName))                   \
-  doctest::String translatorName(signature)
-
-#define DOCTEST_REGISTER_EXCEPTION_TRANSLATOR(signature) \
-  DOCTEST_REGISTER_EXCEPTION_TRANSLATOR_IMPL(            \
-      DOCTEST_ANONYMOUS(DOCTEST_ANON_TRANSLATOR_), signature)
-
-// for registering reporters
-#define DOCTEST_REGISTER_REPORTER(name, priority, reporter)      \
-  DOCTEST_GLOBAL_NO_WARNINGS(                                    \
-      DOCTEST_ANONYMOUS(DOCTEST_ANON_REPORTER_),                 \
-      doctest::registerReporter<reporter>(name, priority, true)) \
-  static_assert(true, "")
-
-// for registering listeners
-#define DOCTEST_REGISTER_LISTENER(name, priority, reporter)       \
-  DOCTEST_GLOBAL_NO_WARNINGS(                                     \
-      DOCTEST_ANONYMOUS(DOCTEST_ANON_REPORTER_),                  \
-      doctest::registerReporter<reporter>(name, priority, false)) \
-  static_assert(true, "")
-
-// clang-format off
-// for logging - disabling formatting because it's important to have these on 2 separate lines - see PR #557
-#define DOCTEST_INFO(...)                                                                          \
-    DOCTEST_INFO_IMPL(DOCTEST_ANONYMOUS(DOCTEST_CAPTURE_),                                         \
-                      DOCTEST_ANONYMOUS(DOCTEST_CAPTURE_OTHER_),                                   \
-                      __VA_ARGS__)
-// clang-format on
-
-#define DOCTEST_INFO_IMPL(mb_name, s_name, ...)                     \
-  auto DOCTEST_ANONYMOUS(DOCTEST_CAPTURE_) =                        \
-      doctest::detail::MakeContextScope([&](std::ostream* s_name) { \
-        doctest::detail::MessageBuilder mb_name(                    \
-            __FILE__, __LINE__, doctest::assertType::is_warn);      \
-        mb_name.m_stream = s_name;                                  \
-        mb_name* __VA_ARGS__;                                       \
-      })
-
-#define DOCTEST_CAPTURE(x) DOCTEST_INFO(#x " := ", x)
-
-#define DOCTEST_ADD_AT_IMPL(type, file, line, mb, ...)                         \
-  [&] {                                                                        \
-    doctest::detail::MessageBuilder mb(file, line, doctest::assertType::type); \
-    mb* __VA_ARGS__;                                                           \
-    if (mb.log())                                                              \
-      DOCTEST_BREAK_INTO_DEBUGGER();                                           \
-    mb.react();                                                                \
-  }()
-
-// clang-format off
-#define DOCTEST_ADD_MESSAGE_AT(file, line, ...) DOCTEST_ADD_AT_IMPL(is_warn, file, line, DOCTEST_ANONYMOUS(DOCTEST_MESSAGE_), __VA_ARGS__)
-#define DOCTEST_ADD_FAIL_CHECK_AT(file, line, ...) DOCTEST_ADD_AT_IMPL(is_check, file, line, DOCTEST_ANONYMOUS(DOCTEST_MESSAGE_), __VA_ARGS__)
-#define DOCTEST_ADD_FAIL_AT(file, line, ...) DOCTEST_ADD_AT_IMPL(is_require, file, line, DOCTEST_ANONYMOUS(DOCTEST_MESSAGE_), __VA_ARGS__)
-// clang-format on
-
-#define DOCTEST_MESSAGE(...) \
-  DOCTEST_ADD_MESSAGE_AT(__FILE__, __LINE__, __VA_ARGS__)
-#define DOCTEST_FAIL_CHECK(...) \
-  DOCTEST_ADD_FAIL_CHECK_AT(__FILE__, __LINE__, __VA_ARGS__)
-#define DOCTEST_FAIL(...) DOCTEST_ADD_FAIL_AT(__FILE__, __LINE__, __VA_ARGS__)
-
-#define DOCTEST_TO_LVALUE(...) \
-  __VA_ARGS__  // Not removed to keep backwards compatibility.
-
-#ifndef DOCTEST_CONFIG_SUPER_FAST_ASSERTS
-
-#define DOCTEST_ASSERT_IMPLEMENT_2(assert_type, ...)                          \
-  DOCTEST_CLANG_SUPPRESS_WARNING_WITH_PUSH(                                   \
-      "-Woverloaded-shift-op-parentheses")                                    \
-  doctest::detail::ResultBuilder DOCTEST_RB(                                  \
-      doctest::assertType::assert_type, __FILE__, __LINE__, #__VA_ARGS__);    \
-  DOCTEST_WRAP_IN_TRY(DOCTEST_RB.setResult(                                   \
-      doctest::detail::ExpressionDecomposer(doctest::assertType::assert_type) \
-      << __VA_ARGS__))                                                        \
-  DOCTEST_ASSERT_LOG_REACT_RETURN(DOCTEST_RB)                                 \
-  DOCTEST_CLANG_SUPPRESS_WARNING_POP
-
-#define DOCTEST_ASSERT_IMPLEMENT_1(assert_type, ...) \
-  [&] { DOCTEST_ASSERT_IMPLEMENT_2(assert_type, __VA_ARGS__); }()
-
-#else  // DOCTEST_CONFIG_SUPER_FAST_ASSERTS
-
-// necessary for <ASSERT>_MESSAGE
-#define DOCTEST_ASSERT_IMPLEMENT_2 DOCTEST_ASSERT_IMPLEMENT_1
-
-#define DOCTEST_ASSERT_IMPLEMENT_1(assert_type, ...)                          \
-  DOCTEST_CLANG_SUPPRESS_WARNING_WITH_PUSH(                                   \
-      "-Woverloaded-shift-op-parentheses")                                    \
-  doctest::detail::decomp_assert(                                             \
-      doctest::assertType::assert_type, __FILE__, __LINE__, #__VA_ARGS__,     \
-      doctest::detail::ExpressionDecomposer(doctest::assertType::assert_type) \
-          << __VA_ARGS__) DOCTEST_CLANG_SUPPRESS_WARNING_POP
-
-#endif  // DOCTEST_CONFIG_SUPER_FAST_ASSERTS
-
-#define DOCTEST_WARN(...) DOCTEST_ASSERT_IMPLEMENT_1(DT_WARN, __VA_ARGS__)
-#define DOCTEST_CHECK(...) DOCTEST_ASSERT_IMPLEMENT_1(DT_CHECK, __VA_ARGS__)
-#define DOCTEST_REQUIRE(...) DOCTEST_ASSERT_IMPLEMENT_1(DT_REQUIRE, __VA_ARGS__)
-#define DOCTEST_WARN_FALSE(...) \
-  DOCTEST_ASSERT_IMPLEMENT_1(DT_WARN_FALSE, __VA_ARGS__)
-#define DOCTEST_CHECK_FALSE(...) \
-  DOCTEST_ASSERT_IMPLEMENT_1(DT_CHECK_FALSE, __VA_ARGS__)
-#define DOCTEST_REQUIRE_FALSE(...) \
-  DOCTEST_ASSERT_IMPLEMENT_1(DT_REQUIRE_FALSE, __VA_ARGS__)
-
-// clang-format off
-#define DOCTEST_WARN_MESSAGE(cond, ...) [&] {DOCTEST_INFO(__VA_ARGS__); DOCTEST_ASSERT_IMPLEMENT_2(DT_WARN, cond); }()
-#define DOCTEST_CHECK_MESSAGE(cond, ...) [&] {DOCTEST_INFO(__VA_ARGS__); DOCTEST_ASSERT_IMPLEMENT_2(DT_CHECK, cond); }()
-#define DOCTEST_REQUIRE_MESSAGE(cond, ...) [&] {DOCTEST_INFO(__VA_ARGS__); DOCTEST_ASSERT_IMPLEMENT_2(DT_REQUIRE, cond); }()
-#define DOCTEST_WARN_FALSE_MESSAGE(cond, ...) [&] {DOCTEST_INFO(__VA_ARGS__); DOCTEST_ASSERT_IMPLEMENT_2(DT_WARN_FALSE, cond); }()
-#define DOCTEST_CHECK_FALSE_MESSAGE(cond, ...) [&] {DOCTEST_INFO(__VA_ARGS__); DOCTEST_ASSERT_IMPLEMENT_2(DT_CHECK_FALSE, cond); }()
-#define DOCTEST_REQUIRE_FALSE_MESSAGE(cond, ...) [&] {DOCTEST_INFO(__VA_ARGS__); DOCTEST_ASSERT_IMPLEMENT_2(DT_REQUIRE_FALSE, cond); }()
-// clang-format on
-
-#define DOCTEST_ASSERT_THROWS_AS(expr, assert_type, message, ...)             \
-  [&] {                                                                       \
-    if (!doctest::getContextOptions()->no_throw) {                            \
-      doctest::detail::ResultBuilder DOCTEST_RB(                              \
-          doctest::assertType::assert_type, __FILE__, __LINE__, #expr,        \
-          #__VA_ARGS__, message);                                             \
-      try {                                                                   \
-        DOCTEST_CAST_TO_VOID(expr)                                            \
-      }                                                                       \
-      catch (const typename doctest::detail::remove_const<                    \
-             typename doctest::detail::remove_reference<__VA_ARGS__>::type>:: \
-                 type&) {                                                     \
-        DOCTEST_RB.translateException();                                      \
-        DOCTEST_RB.m_threw_as = true;                                         \
-      }                                                                       \
-      catch (...) {                                                           \
-        DOCTEST_RB.translateException();                                      \
-      }                                                                       \
-      DOCTEST_ASSERT_LOG_REACT_RETURN(DOCTEST_RB);                            \
-    } else {                                                                  \
-      return false;                                                           \
-    }                                                                         \
-  }()
-
-#define DOCTEST_ASSERT_THROWS_WITH(expr, expr_str, assert_type, ...)          \
-  [&] {                                                                       \
-    if (!doctest::getContextOptions()->no_throw) {                            \
-      doctest::detail::ResultBuilder DOCTEST_RB(                              \
-          doctest::assertType::assert_type, __FILE__, __LINE__, expr_str, "", \
-          __VA_ARGS__);                                                       \
-      try {                                                                   \
-        DOCTEST_CAST_TO_VOID(expr)                                            \
-      }                                                                       \
-      catch (...) {                                                           \
-        DOCTEST_RB.translateException();                                      \
-      }                                                                       \
-      DOCTEST_ASSERT_LOG_REACT_RETURN(DOCTEST_RB);                            \
-    } else {                                                                  \
-      return false;                                                           \
-    }                                                                         \
-  }()
-
-#define DOCTEST_ASSERT_NOTHROW(assert_type, ...)                             \
-  [&] {                                                                      \
-    doctest::detail::ResultBuilder DOCTEST_RB(                               \
-        doctest::assertType::assert_type, __FILE__, __LINE__, #__VA_ARGS__); \
-    try {                                                                    \
-      DOCTEST_CAST_TO_VOID(__VA_ARGS__)                                      \
-    }                                                                        \
-    catch (...) {                                                            \
-      DOCTEST_RB.translateException();                                       \
-    }                                                                        \
-    DOCTEST_ASSERT_LOG_REACT_RETURN(DOCTEST_RB);                             \
-  }()
-
-// clang-format off
-#define DOCTEST_WARN_THROWS(...) DOCTEST_ASSERT_THROWS_WITH((__VA_ARGS__), #__VA_ARGS__, DT_WARN_THROWS, "")
-#define DOCTEST_CHECK_THROWS(...) DOCTEST_ASSERT_THROWS_WITH((__VA_ARGS__), #__VA_ARGS__, DT_CHECK_THROWS, "")
-#define DOCTEST_REQUIRE_THROWS(...) DOCTEST_ASSERT_THROWS_WITH((__VA_ARGS__), #__VA_ARGS__, DT_REQUIRE_THROWS, "")
-
-#define DOCTEST_WARN_THROWS_AS(expr, ...) DOCTEST_ASSERT_THROWS_AS(expr, DT_WARN_THROWS_AS, "", __VA_ARGS__)
-#define DOCTEST_CHECK_THROWS_AS(expr, ...) DOCTEST_ASSERT_THROWS_AS(expr, DT_CHECK_THROWS_AS, "", __VA_ARGS__)
-#define DOCTEST_REQUIRE_THROWS_AS(expr, ...) DOCTEST_ASSERT_THROWS_AS(expr, DT_REQUIRE_THROWS_AS, "", __VA_ARGS__)
-
-#define DOCTEST_WARN_THROWS_WITH(expr, ...) DOCTEST_ASSERT_THROWS_WITH(expr, #expr, DT_WARN_THROWS_WITH, __VA_ARGS__)
-#define DOCTEST_CHECK_THROWS_WITH(expr, ...) DOCTEST_ASSERT_THROWS_WITH(expr, #expr, DT_CHECK_THROWS_WITH, __VA_ARGS__)
-#define DOCTEST_REQUIRE_THROWS_WITH(expr, ...) DOCTEST_ASSERT_THROWS_WITH(expr, #expr, DT_REQUIRE_THROWS_WITH, __VA_ARGS__)
-
-#define DOCTEST_WARN_THROWS_WITH_AS(expr, message, ...) DOCTEST_ASSERT_THROWS_AS(expr, DT_WARN_THROWS_WITH_AS, message, __VA_ARGS__)
-#define DOCTEST_CHECK_THROWS_WITH_AS(expr, message, ...) DOCTEST_ASSERT_THROWS_AS(expr, DT_CHECK_THROWS_WITH_AS, message, __VA_ARGS__)
-#define DOCTEST_REQUIRE_THROWS_WITH_AS(expr, message, ...) DOCTEST_ASSERT_THROWS_AS(expr, DT_REQUIRE_THROWS_WITH_AS, message, __VA_ARGS__)
-
-#define DOCTEST_WARN_NOTHROW(...) DOCTEST_ASSERT_NOTHROW(DT_WARN_NOTHROW, __VA_ARGS__)
-#define DOCTEST_CHECK_NOTHROW(...) DOCTEST_ASSERT_NOTHROW(DT_CHECK_NOTHROW, __VA_ARGS__)
-#define DOCTEST_REQUIRE_NOTHROW(...) DOCTEST_ASSERT_NOTHROW(DT_REQUIRE_NOTHROW, __VA_ARGS__)
-
-#define DOCTEST_WARN_THROWS_MESSAGE(expr, ...) [&] {DOCTEST_INFO(__VA_ARGS__); DOCTEST_WARN_THROWS(expr); }()
-#define DOCTEST_CHECK_THROWS_MESSAGE(expr, ...) [&] {DOCTEST_INFO(__VA_ARGS__); DOCTEST_CHECK_THROWS(expr); }()
-#define DOCTEST_REQUIRE_THROWS_MESSAGE(expr, ...) [&] {DOCTEST_INFO(__VA_ARGS__); DOCTEST_REQUIRE_THROWS(expr); }()
-#define DOCTEST_WARN_THROWS_AS_MESSAGE(expr, ex, ...) [&] {DOCTEST_INFO(__VA_ARGS__); DOCTEST_WARN_THROWS_AS(expr, ex); }()
-#define DOCTEST_CHECK_THROWS_AS_MESSAGE(expr, ex, ...) [&] {DOCTEST_INFO(__VA_ARGS__); DOCTEST_CHECK_THROWS_AS(expr, ex); }()
-#define DOCTEST_REQUIRE_THROWS_AS_MESSAGE(expr, ex, ...) [&] {DOCTEST_INFO(__VA_ARGS__); DOCTEST_REQUIRE_THROWS_AS(expr, ex); }()
-#define DOCTEST_WARN_THROWS_WITH_MESSAGE(expr, with, ...) [&] {DOCTEST_INFO(__VA_ARGS__); DOCTEST_WARN_THROWS_WITH(expr, with); }()
-#define DOCTEST_CHECK_THROWS_WITH_MESSAGE(expr, with, ...) [&] {DOCTEST_INFO(__VA_ARGS__); DOCTEST_CHECK_THROWS_WITH(expr, with); }()
-#define DOCTEST_REQUIRE_THROWS_WITH_MESSAGE(expr, with, ...) [&] {DOCTEST_INFO(__VA_ARGS__); DOCTEST_REQUIRE_THROWS_WITH(expr, with); }()
-#define DOCTEST_WARN_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) [&] {DOCTEST_INFO(__VA_ARGS__); DOCTEST_WARN_THROWS_WITH_AS(expr, with, ex); }()
-#define DOCTEST_CHECK_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) [&] {DOCTEST_INFO(__VA_ARGS__); DOCTEST_CHECK_THROWS_WITH_AS(expr, with, ex); }()
-#define DOCTEST_REQUIRE_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) [&] {DOCTEST_INFO(__VA_ARGS__); DOCTEST_REQUIRE_THROWS_WITH_AS(expr, with, ex); }()
-#define DOCTEST_WARN_NOTHROW_MESSAGE(expr, ...) [&] {DOCTEST_INFO(__VA_ARGS__); DOCTEST_WARN_NOTHROW(expr); }()
-#define DOCTEST_CHECK_NOTHROW_MESSAGE(expr, ...) [&] {DOCTEST_INFO(__VA_ARGS__); DOCTEST_CHECK_NOTHROW(expr); }()
-#define DOCTEST_REQUIRE_NOTHROW_MESSAGE(expr, ...) [&] {DOCTEST_INFO(__VA_ARGS__); DOCTEST_REQUIRE_NOTHROW(expr); }()
-// clang-format on
-
-#ifndef DOCTEST_CONFIG_SUPER_FAST_ASSERTS
-
-#define DOCTEST_BINARY_ASSERT(assert_type, comp, ...)                        \
-  [&] {                                                                      \
-    doctest::detail::ResultBuilder DOCTEST_RB(                               \
-        doctest::assertType::assert_type, __FILE__, __LINE__, #__VA_ARGS__); \
-    DOCTEST_WRAP_IN_TRY(                                                     \
-        DOCTEST_RB                                                           \
-            .binary_assert<doctest::detail::binaryAssertComparison::comp>(   \
-                __VA_ARGS__))                                                \
-    DOCTEST_ASSERT_LOG_REACT_RETURN(DOCTEST_RB);                             \
-  }()
-
-#define DOCTEST_UNARY_ASSERT(assert_type, ...)                               \
-  [&] {                                                                      \
-    doctest::detail::ResultBuilder DOCTEST_RB(                               \
-        doctest::assertType::assert_type, __FILE__, __LINE__, #__VA_ARGS__); \
-    DOCTEST_WRAP_IN_TRY(DOCTEST_RB.unary_assert(__VA_ARGS__))                \
-    DOCTEST_ASSERT_LOG_REACT_RETURN(DOCTEST_RB);                             \
-  }()
-
-#else  // DOCTEST_CONFIG_SUPER_FAST_ASSERTS
-
-#define DOCTEST_BINARY_ASSERT(assert_type, comparison, ...)               \
-  doctest::detail::binary_assert<                                         \
-      doctest::detail::binaryAssertComparison::comparison>(               \
-      doctest::assertType::assert_type, __FILE__, __LINE__, #__VA_ARGS__, \
-      __VA_ARGS__)
-
-#define DOCTEST_UNARY_ASSERT(assert_type, ...)                            \
-  doctest::detail::unary_assert(                                          \
-      doctest::assertType::assert_type, __FILE__, __LINE__, #__VA_ARGS__, \
-      __VA_ARGS__)
-
-#endif  // DOCTEST_CONFIG_SUPER_FAST_ASSERTS
-
-#define DOCTEST_WARN_EQ(...) DOCTEST_BINARY_ASSERT(DT_WARN_EQ, eq, __VA_ARGS__)
-#define DOCTEST_CHECK_EQ(...) \
-  DOCTEST_BINARY_ASSERT(DT_CHECK_EQ, eq, __VA_ARGS__)
-#define DOCTEST_REQUIRE_EQ(...) \
-  DOCTEST_BINARY_ASSERT(DT_REQUIRE_EQ, eq, __VA_ARGS__)
-#define DOCTEST_WARN_NE(...) DOCTEST_BINARY_ASSERT(DT_WARN_NE, ne, __VA_ARGS__)
-#define DOCTEST_CHECK_NE(...) \
-  DOCTEST_BINARY_ASSERT(DT_CHECK_NE, ne, __VA_ARGS__)
-#define DOCTEST_REQUIRE_NE(...) \
-  DOCTEST_BINARY_ASSERT(DT_REQUIRE_NE, ne, __VA_ARGS__)
-#define DOCTEST_WARN_GT(...) DOCTEST_BINARY_ASSERT(DT_WARN_GT, gt, __VA_ARGS__)
-#define DOCTEST_CHECK_GT(...) \
-  DOCTEST_BINARY_ASSERT(DT_CHECK_GT, gt, __VA_ARGS__)
-#define DOCTEST_REQUIRE_GT(...) \
-  DOCTEST_BINARY_ASSERT(DT_REQUIRE_GT, gt, __VA_ARGS__)
-#define DOCTEST_WARN_LT(...) DOCTEST_BINARY_ASSERT(DT_WARN_LT, lt, __VA_ARGS__)
-#define DOCTEST_CHECK_LT(...) \
-  DOCTEST_BINARY_ASSERT(DT_CHECK_LT, lt, __VA_ARGS__)
-#define DOCTEST_REQUIRE_LT(...) \
-  DOCTEST_BINARY_ASSERT(DT_REQUIRE_LT, lt, __VA_ARGS__)
-#define DOCTEST_WARN_GE(...) DOCTEST_BINARY_ASSERT(DT_WARN_GE, ge, __VA_ARGS__)
-#define DOCTEST_CHECK_GE(...) \
-  DOCTEST_BINARY_ASSERT(DT_CHECK_GE, ge, __VA_ARGS__)
-#define DOCTEST_REQUIRE_GE(...) \
-  DOCTEST_BINARY_ASSERT(DT_REQUIRE_GE, ge, __VA_ARGS__)
-#define DOCTEST_WARN_LE(...) DOCTEST_BINARY_ASSERT(DT_WARN_LE, le, __VA_ARGS__)
-#define DOCTEST_CHECK_LE(...) \
-  DOCTEST_BINARY_ASSERT(DT_CHECK_LE, le, __VA_ARGS__)
-#define DOCTEST_REQUIRE_LE(...) \
-  DOCTEST_BINARY_ASSERT(DT_REQUIRE_LE, le, __VA_ARGS__)
-
-#define DOCTEST_WARN_UNARY(...) DOCTEST_UNARY_ASSERT(DT_WARN_UNARY, __VA_ARGS__)
-#define DOCTEST_CHECK_UNARY(...) \
-  DOCTEST_UNARY_ASSERT(DT_CHECK_UNARY, __VA_ARGS__)
-#define DOCTEST_REQUIRE_UNARY(...) \
-  DOCTEST_UNARY_ASSERT(DT_REQUIRE_UNARY, __VA_ARGS__)
-#define DOCTEST_WARN_UNARY_FALSE(...) \
-  DOCTEST_UNARY_ASSERT(DT_WARN_UNARY_FALSE, __VA_ARGS__)
-#define DOCTEST_CHECK_UNARY_FALSE(...) \
-  DOCTEST_UNARY_ASSERT(DT_CHECK_UNARY_FALSE, __VA_ARGS__)
-#define DOCTEST_REQUIRE_UNARY_FALSE(...) \
-  DOCTEST_UNARY_ASSERT(DT_REQUIRE_UNARY_FALSE, __VA_ARGS__)
-
-#ifdef DOCTEST_CONFIG_NO_EXCEPTIONS
-
-#undef DOCTEST_WARN_THROWS
-#undef DOCTEST_CHECK_THROWS
-#undef DOCTEST_REQUIRE_THROWS
-#undef DOCTEST_WARN_THROWS_AS
-#undef DOCTEST_CHECK_THROWS_AS
-#undef DOCTEST_REQUIRE_THROWS_AS
-#undef DOCTEST_WARN_THROWS_WITH
-#undef DOCTEST_CHECK_THROWS_WITH
-#undef DOCTEST_REQUIRE_THROWS_WITH
-#undef DOCTEST_WARN_THROWS_WITH_AS
-#undef DOCTEST_CHECK_THROWS_WITH_AS
-#undef DOCTEST_REQUIRE_THROWS_WITH_AS
-#undef DOCTEST_WARN_NOTHROW
-#undef DOCTEST_CHECK_NOTHROW
-#undef DOCTEST_REQUIRE_NOTHROW
-
-#undef DOCTEST_WARN_THROWS_MESSAGE
-#undef DOCTEST_CHECK_THROWS_MESSAGE
-#undef DOCTEST_REQUIRE_THROWS_MESSAGE
-#undef DOCTEST_WARN_THROWS_AS_MESSAGE
-#undef DOCTEST_CHECK_THROWS_AS_MESSAGE
-#undef DOCTEST_REQUIRE_THROWS_AS_MESSAGE
-#undef DOCTEST_WARN_THROWS_WITH_MESSAGE
-#undef DOCTEST_CHECK_THROWS_WITH_MESSAGE
-#undef DOCTEST_REQUIRE_THROWS_WITH_MESSAGE
-#undef DOCTEST_WARN_THROWS_WITH_AS_MESSAGE
-#undef DOCTEST_CHECK_THROWS_WITH_AS_MESSAGE
-#undef DOCTEST_REQUIRE_THROWS_WITH_AS_MESSAGE
-#undef DOCTEST_WARN_NOTHROW_MESSAGE
-#undef DOCTEST_CHECK_NOTHROW_MESSAGE
-#undef DOCTEST_REQUIRE_NOTHROW_MESSAGE
-
-#ifdef DOCTEST_CONFIG_NO_EXCEPTIONS_BUT_WITH_ALL_ASSERTS
-
-#define DOCTEST_WARN_THROWS(...) ([] { return false; })
-#define DOCTEST_CHECK_THROWS(...) ([] { return false; })
-#define DOCTEST_REQUIRE_THROWS(...) ([] { return false; })
-#define DOCTEST_WARN_THROWS_AS(expr, ...) ([] { return false; })
-#define DOCTEST_CHECK_THROWS_AS(expr, ...) ([] { return false; })
-#define DOCTEST_REQUIRE_THROWS_AS(expr, ...) ([] { return false; })
-#define DOCTEST_WARN_THROWS_WITH(expr, ...) ([] { return false; })
-#define DOCTEST_CHECK_THROWS_WITH(expr, ...) ([] { return false; })
-#define DOCTEST_REQUIRE_THROWS_WITH(expr, ...) ([] { return false; })
-#define DOCTEST_WARN_THROWS_WITH_AS(expr, with, ...) ([] { return false; })
-#define DOCTEST_CHECK_THROWS_WITH_AS(expr, with, ...) ([] { return false; })
-#define DOCTEST_REQUIRE_THROWS_WITH_AS(expr, with, ...) ([] { return false; })
-#define DOCTEST_WARN_NOTHROW(...) ([] { return false; })
-#define DOCTEST_CHECK_NOTHROW(...) ([] { return false; })
-#define DOCTEST_REQUIRE_NOTHROW(...) ([] { return false; })
-
-#define DOCTEST_WARN_THROWS_MESSAGE(expr, ...) ([] { return false; })
-#define DOCTEST_CHECK_THROWS_MESSAGE(expr, ...) ([] { return false; })
-#define DOCTEST_REQUIRE_THROWS_MESSAGE(expr, ...) ([] { return false; })
-#define DOCTEST_WARN_THROWS_AS_MESSAGE(expr, ex, ...) ([] { return false; })
-#define DOCTEST_CHECK_THROWS_AS_MESSAGE(expr, ex, ...) ([] { return false; })
-#define DOCTEST_REQUIRE_THROWS_AS_MESSAGE(expr, ex, ...) ([] { return false; })
-#define DOCTEST_WARN_THROWS_WITH_MESSAGE(expr, with, ...) ([] { return false; })
-#define DOCTEST_CHECK_THROWS_WITH_MESSAGE(expr, with, ...) \
-  ([] { return false; })
-#define DOCTEST_REQUIRE_THROWS_WITH_MESSAGE(expr, with, ...) \
-  ([] { return false; })
-#define DOCTEST_WARN_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) \
-  ([] { return false; })
-#define DOCTEST_CHECK_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) \
-  ([] { return false; })
-#define DOCTEST_REQUIRE_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) \
-  ([] { return false; })
-#define DOCTEST_WARN_NOTHROW_MESSAGE(expr, ...) ([] { return false; })
-#define DOCTEST_CHECK_NOTHROW_MESSAGE(expr, ...) ([] { return false; })
-#define DOCTEST_REQUIRE_NOTHROW_MESSAGE(expr, ...) ([] { return false; })
-
-#else  // DOCTEST_CONFIG_NO_EXCEPTIONS_BUT_WITH_ALL_ASSERTS
-
-#undef DOCTEST_REQUIRE
-#undef DOCTEST_REQUIRE_FALSE
-#undef DOCTEST_REQUIRE_MESSAGE
-#undef DOCTEST_REQUIRE_FALSE_MESSAGE
-#undef DOCTEST_REQUIRE_EQ
-#undef DOCTEST_REQUIRE_NE
-#undef DOCTEST_REQUIRE_GT
-#undef DOCTEST_REQUIRE_LT
-#undef DOCTEST_REQUIRE_GE
-#undef DOCTEST_REQUIRE_LE
-#undef DOCTEST_REQUIRE_UNARY
-#undef DOCTEST_REQUIRE_UNARY_FALSE
-
-#endif  // DOCTEST_CONFIG_NO_EXCEPTIONS_BUT_WITH_ALL_ASSERTS
-
-#endif  // DOCTEST_CONFIG_NO_EXCEPTIONS
-
-// =================================================================================================
-// == WHAT FOLLOWS IS VERSIONS OF THE MACROS THAT DO NOT DO ANY REGISTERING! ==
-// == THIS CAN BE ENABLED BY DEFINING DOCTEST_CONFIG_DISABLE GLOBALLY! ==
-// =================================================================================================
-#else  // DOCTEST_CONFIG_DISABLE
-
-#define DOCTEST_IMPLEMENT_FIXTURE(der, base, func, name) \
-  namespace {                                            \
-  template <typename DOCTEST_UNUSED_TEMPLATE_TYPE>       \
-  struct der : public base {                             \
-    void f();                                            \
-  };                                                     \
-  }                                                      \
-  template <typename DOCTEST_UNUSED_TEMPLATE_TYPE>       \
-  inline void der<DOCTEST_UNUSED_TEMPLATE_TYPE>::f()
-
-#define DOCTEST_CREATE_AND_REGISTER_FUNCTION(f, name) \
-  template <typename DOCTEST_UNUSED_TEMPLATE_TYPE>    \
-  static inline void f()
-
-// for registering tests
-#define DOCTEST_TEST_CASE(name)         \
-  DOCTEST_CREATE_AND_REGISTER_FUNCTION( \
-      DOCTEST_ANONYMOUS(DOCTEST_ANON_FUNC_), name)
-
-// for registering tests in classes
-#define DOCTEST_TEST_CASE_CLASS(name)   \
-  DOCTEST_CREATE_AND_REGISTER_FUNCTION( \
-      DOCTEST_ANONYMOUS(DOCTEST_ANON_FUNC_), name)
-
-// for registering tests with a fixture
-#define DOCTEST_TEST_CASE_FIXTURE(x, name)       \
-  DOCTEST_IMPLEMENT_FIXTURE(                     \
-      DOCTEST_ANONYMOUS(DOCTEST_ANON_CLASS_), x, \
-      DOCTEST_ANONYMOUS(DOCTEST_ANON_FUNC_), name)
-
-// for converting types to strings without the <typeinfo> header and demangling
-#define DOCTEST_TYPE_TO_STRING(...) static_assert(true, "")
-#define DOCTEST_TYPE_TO_STRING_IMPL(...)
-
-// for typed tests
-#define DOCTEST_TEST_CASE_TEMPLATE(name, type, ...) \
-  template <typename type>                          \
-  inline void DOCTEST_ANONYMOUS(DOCTEST_ANON_TMP_)()
-
-#define DOCTEST_TEST_CASE_TEMPLATE_DEFINE(name, type, id) \
-  template <typename type>                                \
-  inline void DOCTEST_ANONYMOUS(DOCTEST_ANON_TMP_)()
-
-#define DOCTEST_TEST_CASE_TEMPLATE_INVOKE(id, ...) static_assert(true, "")
-#define DOCTEST_TEST_CASE_TEMPLATE_APPLY(id, ...) static_assert(true, "")
-
-// for subcases
-#define DOCTEST_SUBCASE(name)
-
-// for a testsuite block
-#define DOCTEST_TEST_SUITE(name) namespace
-
-// for starting a testsuite block
-#define DOCTEST_TEST_SUITE_BEGIN(name) static_assert(true, "")
-
-// for ending a testsuite block
-#define DOCTEST_TEST_SUITE_END \
-  typedef int DOCTEST_ANONYMOUS(DOCTEST_ANON_FOR_SEMICOLON_)
-
-#define DOCTEST_REGISTER_EXCEPTION_TRANSLATOR(signature)                     \
-  template <typename DOCTEST_UNUSED_TEMPLATE_TYPE>                           \
-  static inline doctest::String DOCTEST_ANONYMOUS(DOCTEST_ANON_TRANSLATOR_)( \
-      signature)
-
-#define DOCTEST_REGISTER_REPORTER(name, priority, reporter)
-#define DOCTEST_REGISTER_LISTENER(name, priority, reporter)
-
-#define DOCTEST_INFO(...) (static_cast<void>(0))
-#define DOCTEST_CAPTURE(x) (static_cast<void>(0))
-#define DOCTEST_ADD_MESSAGE_AT(file, line, ...) (static_cast<void>(0))
-#define DOCTEST_ADD_FAIL_CHECK_AT(file, line, ...) (static_cast<void>(0))
-#define DOCTEST_ADD_FAIL_AT(file, line, ...) (static_cast<void>(0))
-#define DOCTEST_MESSAGE(...) (static_cast<void>(0))
-#define DOCTEST_FAIL_CHECK(...) (static_cast<void>(0))
-#define DOCTEST_FAIL(...) (static_cast<void>(0))
-
-#ifdef DOCTEST_CONFIG_EVALUATE_ASSERTS_EVEN_WHEN_DISABLED
-
-#define DOCTEST_WARN(...) [&] { return __VA_ARGS__; }()
-#define DOCTEST_CHECK(...) [&] { return __VA_ARGS__; }()
-#define DOCTEST_REQUIRE(...) [&] { return __VA_ARGS__; }()
-#define DOCTEST_WARN_FALSE(...) [&] { return !(__VA_ARGS__); }()
-#define DOCTEST_CHECK_FALSE(...) [&] { return !(__VA_ARGS__); }()
-#define DOCTEST_REQUIRE_FALSE(...) [&] { return !(__VA_ARGS__); }()
-
-#define DOCTEST_WARN_MESSAGE(cond, ...) [&] { return cond; }()
-#define DOCTEST_CHECK_MESSAGE(cond, ...) [&] { return cond; }()
-#define DOCTEST_REQUIRE_MESSAGE(cond, ...) [&] { return cond; }()
-#define DOCTEST_WARN_FALSE_MESSAGE(cond, ...) [&] { return !(cond); }()
-#define DOCTEST_CHECK_FALSE_MESSAGE(cond, ...) [&] { return !(cond); }()
-#define DOCTEST_REQUIRE_FALSE_MESSAGE(cond, ...) [&] { return !(cond); }()
-
-namespace doctest { namespace detail {
-#define DOCTEST_RELATIONAL_OP(name, op)                                   \
-  template <typename L, typename R>                                       \
-  bool name(const DOCTEST_REF_WRAP(L) lhs, const DOCTEST_REF_WRAP(R) rhs) \
-  {                                                                       \
-    return lhs op rhs;                                                    \
-  }
-
-DOCTEST_RELATIONAL_OP(eq, ==)
-DOCTEST_RELATIONAL_OP(ne, !=)
-DOCTEST_RELATIONAL_OP(lt, <)
-DOCTEST_RELATIONAL_OP(gt, >)
-DOCTEST_RELATIONAL_OP(le, <=)
-DOCTEST_RELATIONAL_OP(ge, >=)
-}}  // namespace doctest::detail
-
-#define DOCTEST_WARN_EQ(...) [&] { return doctest::detail::eq(__VA_ARGS__); }()
-#define DOCTEST_CHECK_EQ(...) [&] { return doctest::detail::eq(__VA_ARGS__); }()
-#define DOCTEST_REQUIRE_EQ(...) \
-  [&] { return doctest::detail::eq(__VA_ARGS__); }()
-#define DOCTEST_WARN_NE(...) [&] { return doctest::detail::ne(__VA_ARGS__); }()
-#define DOCTEST_CHECK_NE(...) [&] { return doctest::detail::ne(__VA_ARGS__); }()
-#define DOCTEST_REQUIRE_NE(...) \
-  [&] { return doctest::detail::ne(__VA_ARGS__); }()
-#define DOCTEST_WARN_LT(...) [&] { return doctest::detail::lt(__VA_ARGS__); }()
-#define DOCTEST_CHECK_LT(...) [&] { return doctest::detail::lt(__VA_ARGS__); }()
-#define DOCTEST_REQUIRE_LT(...) \
-  [&] { return doctest::detail::lt(__VA_ARGS__); }()
-#define DOCTEST_WARN_GT(...) [&] { return doctest::detail::gt(__VA_ARGS__); }()
-#define DOCTEST_CHECK_GT(...) [&] { return doctest::detail::gt(__VA_ARGS__); }()
-#define DOCTEST_REQUIRE_GT(...) \
-  [&] { return doctest::detail::gt(__VA_ARGS__); }()
-#define DOCTEST_WARN_LE(...) [&] { return doctest::detail::le(__VA_ARGS__); }()
-#define DOCTEST_CHECK_LE(...) [&] { return doctest::detail::le(__VA_ARGS__); }()
-#define DOCTEST_REQUIRE_LE(...) \
-  [&] { return doctest::detail::le(__VA_ARGS__); }()
-#define DOCTEST_WARN_GE(...) [&] { return doctest::detail::ge(__VA_ARGS__); }()
-#define DOCTEST_CHECK_GE(...) [&] { return doctest::detail::ge(__VA_ARGS__); }()
-#define DOCTEST_REQUIRE_GE(...) \
-  [&] { return doctest::detail::ge(__VA_ARGS__); }()
-#define DOCTEST_WARN_UNARY(...) [&] { return __VA_ARGS__; }()
-#define DOCTEST_CHECK_UNARY(...) [&] { return __VA_ARGS__; }()
-#define DOCTEST_REQUIRE_UNARY(...) [&] { return __VA_ARGS__; }()
-#define DOCTEST_WARN_UNARY_FALSE(...) [&] { return !(__VA_ARGS__); }()
-#define DOCTEST_CHECK_UNARY_FALSE(...) [&] { return !(__VA_ARGS__); }()
-#define DOCTEST_REQUIRE_UNARY_FALSE(...) [&] { return !(__VA_ARGS__); }()
-
-#else  // DOCTEST_CONFIG_EVALUATE_ASSERTS_EVEN_WHEN_DISABLED
-
-#define DOCTEST_WARN(...) ([] { return false; })
-#define DOCTEST_CHECK(...) ([] { return false; })
-#define DOCTEST_REQUIRE(...) ([] { return false; })
-#define DOCTEST_WARN_FALSE(...) ([] { return false; })
-#define DOCTEST_CHECK_FALSE(...) ([] { return false; })
-#define DOCTEST_REQUIRE_FALSE(...) ([] { return false; })
-
-#define DOCTEST_WARN_MESSAGE(cond, ...) ([] { return false; })
-#define DOCTEST_CHECK_MESSAGE(cond, ...) ([] { return false; })
-#define DOCTEST_REQUIRE_MESSAGE(cond, ...) ([] { return false; })
-#define DOCTEST_WARN_FALSE_MESSAGE(cond, ...) ([] { return false; })
-#define DOCTEST_CHECK_FALSE_MESSAGE(cond, ...) ([] { return false; })
-#define DOCTEST_REQUIRE_FALSE_MESSAGE(cond, ...) ([] { return false; })
-
-#define DOCTEST_WARN_EQ(...) ([] { return false; })
-#define DOCTEST_CHECK_EQ(...) ([] { return false; })
-#define DOCTEST_REQUIRE_EQ(...) ([] { return false; })
-#define DOCTEST_WARN_NE(...) ([] { return false; })
-#define DOCTEST_CHECK_NE(...) ([] { return false; })
-#define DOCTEST_REQUIRE_NE(...) ([] { return false; })
-#define DOCTEST_WARN_GT(...) ([] { return false; })
-#define DOCTEST_CHECK_GT(...) ([] { return false; })
-#define DOCTEST_REQUIRE_GT(...) ([] { return false; })
-#define DOCTEST_WARN_LT(...) ([] { return false; })
-#define DOCTEST_CHECK_LT(...) ([] { return false; })
-#define DOCTEST_REQUIRE_LT(...) ([] { return false; })
-#define DOCTEST_WARN_GE(...) ([] { return false; })
-#define DOCTEST_CHECK_GE(...) ([] { return false; })
-#define DOCTEST_REQUIRE_GE(...) ([] { return false; })
-#define DOCTEST_WARN_LE(...) ([] { return false; })
-#define DOCTEST_CHECK_LE(...) ([] { return false; })
-#define DOCTEST_REQUIRE_LE(...) ([] { return false; })
-
-#define DOCTEST_WARN_UNARY(...) ([] { return false; })
-#define DOCTEST_CHECK_UNARY(...) ([] { return false; })
-#define DOCTEST_REQUIRE_UNARY(...) ([] { return false; })
-#define DOCTEST_WARN_UNARY_FALSE(...) ([] { return false; })
-#define DOCTEST_CHECK_UNARY_FALSE(...) ([] { return false; })
-#define DOCTEST_REQUIRE_UNARY_FALSE(...) ([] { return false; })
-
-#endif  // DOCTEST_CONFIG_EVALUATE_ASSERTS_EVEN_WHEN_DISABLED
-
-// TODO: think about if these also need to work properly even when doctest is
-// disabled
-#define DOCTEST_WARN_THROWS(...) ([] { return false; })
-#define DOCTEST_CHECK_THROWS(...) ([] { return false; })
-#define DOCTEST_REQUIRE_THROWS(...) ([] { return false; })
-#define DOCTEST_WARN_THROWS_AS(expr, ...) ([] { return false; })
-#define DOCTEST_CHECK_THROWS_AS(expr, ...) ([] { return false; })
-#define DOCTEST_REQUIRE_THROWS_AS(expr, ...) ([] { return false; })
-#define DOCTEST_WARN_THROWS_WITH(expr, ...) ([] { return false; })
-#define DOCTEST_CHECK_THROWS_WITH(expr, ...) ([] { return false; })
-#define DOCTEST_REQUIRE_THROWS_WITH(expr, ...) ([] { return false; })
-#define DOCTEST_WARN_THROWS_WITH_AS(expr, with, ...) ([] { return false; })
-#define DOCTEST_CHECK_THROWS_WITH_AS(expr, with, ...) ([] { return false; })
-#define DOCTEST_REQUIRE_THROWS_WITH_AS(expr, with, ...) ([] { return false; })
-#define DOCTEST_WARN_NOTHROW(...) ([] { return false; })
-#define DOCTEST_CHECK_NOTHROW(...) ([] { return false; })
-#define DOCTEST_REQUIRE_NOTHROW(...) ([] { return false; })
-
-#define DOCTEST_WARN_THROWS_MESSAGE(expr, ...) ([] { return false; })
-#define DOCTEST_CHECK_THROWS_MESSAGE(expr, ...) ([] { return false; })
-#define DOCTEST_REQUIRE_THROWS_MESSAGE(expr, ...) ([] { return false; })
-#define DOCTEST_WARN_THROWS_AS_MESSAGE(expr, ex, ...) ([] { return false; })
-#define DOCTEST_CHECK_THROWS_AS_MESSAGE(expr, ex, ...) ([] { return false; })
-#define DOCTEST_REQUIRE_THROWS_AS_MESSAGE(expr, ex, ...) ([] { return false; })
-#define DOCTEST_WARN_THROWS_WITH_MESSAGE(expr, with, ...) ([] { return false; })
-#define DOCTEST_CHECK_THROWS_WITH_MESSAGE(expr, with, ...) \
-  ([] { return false; })
-#define DOCTEST_REQUIRE_THROWS_WITH_MESSAGE(expr, with, ...) \
-  ([] { return false; })
-#define DOCTEST_WARN_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) \
-  ([] { return false; })
-#define DOCTEST_CHECK_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) \
-  ([] { return false; })
-#define DOCTEST_REQUIRE_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) \
-  ([] { return false; })
-#define DOCTEST_WARN_NOTHROW_MESSAGE(expr, ...) ([] { return false; })
-#define DOCTEST_CHECK_NOTHROW_MESSAGE(expr, ...) ([] { return false; })
-#define DOCTEST_REQUIRE_NOTHROW_MESSAGE(expr, ...) ([] { return false; })
-
-#endif  // DOCTEST_CONFIG_DISABLE
-
-// clang-format off
-// KEPT FOR BACKWARDS COMPATIBILITY - FORWARDING TO THE RIGHT MACROS
-#define DOCTEST_FAST_WARN_EQ             DOCTEST_WARN_EQ
-#define DOCTEST_FAST_CHECK_EQ            DOCTEST_CHECK_EQ
-#define DOCTEST_FAST_REQUIRE_EQ          DOCTEST_REQUIRE_EQ
-#define DOCTEST_FAST_WARN_NE             DOCTEST_WARN_NE
-#define DOCTEST_FAST_CHECK_NE            DOCTEST_CHECK_NE
-#define DOCTEST_FAST_REQUIRE_NE          DOCTEST_REQUIRE_NE
-#define DOCTEST_FAST_WARN_GT             DOCTEST_WARN_GT
-#define DOCTEST_FAST_CHECK_GT            DOCTEST_CHECK_GT
-#define DOCTEST_FAST_REQUIRE_GT          DOCTEST_REQUIRE_GT
-#define DOCTEST_FAST_WARN_LT             DOCTEST_WARN_LT
-#define DOCTEST_FAST_CHECK_LT            DOCTEST_CHECK_LT
-#define DOCTEST_FAST_REQUIRE_LT          DOCTEST_REQUIRE_LT
-#define DOCTEST_FAST_WARN_GE             DOCTEST_WARN_GE
-#define DOCTEST_FAST_CHECK_GE            DOCTEST_CHECK_GE
-#define DOCTEST_FAST_REQUIRE_GE          DOCTEST_REQUIRE_GE
-#define DOCTEST_FAST_WARN_LE             DOCTEST_WARN_LE
-#define DOCTEST_FAST_CHECK_LE            DOCTEST_CHECK_LE
-#define DOCTEST_FAST_REQUIRE_LE          DOCTEST_REQUIRE_LE
-
-#define DOCTEST_FAST_WARN_UNARY          DOCTEST_WARN_UNARY
-#define DOCTEST_FAST_CHECK_UNARY         DOCTEST_CHECK_UNARY
-#define DOCTEST_FAST_REQUIRE_UNARY       DOCTEST_REQUIRE_UNARY
-#define DOCTEST_FAST_WARN_UNARY_FALSE    DOCTEST_WARN_UNARY_FALSE
-#define DOCTEST_FAST_CHECK_UNARY_FALSE   DOCTEST_CHECK_UNARY_FALSE
-#define DOCTEST_FAST_REQUIRE_UNARY_FALSE DOCTEST_REQUIRE_UNARY_FALSE
-
-#define DOCTEST_TEST_CASE_TEMPLATE_INSTANTIATE(id, ...) DOCTEST_TEST_CASE_TEMPLATE_INVOKE(id,__VA_ARGS__)
-// clang-format on
-
-// BDD style macros
-// clang-format off
-#define DOCTEST_SCENARIO(name) DOCTEST_TEST_CASE("  Scenario: " name)
-#define DOCTEST_SCENARIO_CLASS(name) DOCTEST_TEST_CASE_CLASS("  Scenario: " name)
-#define DOCTEST_SCENARIO_TEMPLATE(name, T, ...)  DOCTEST_TEST_CASE_TEMPLATE("  Scenario: " name, T, __VA_ARGS__)
-#define DOCTEST_SCENARIO_TEMPLATE_DEFINE(name, T, id) DOCTEST_TEST_CASE_TEMPLATE_DEFINE("  Scenario: " name, T, id)
-
-#define DOCTEST_GIVEN(name)     DOCTEST_SUBCASE("   Given: " name)
-#define DOCTEST_WHEN(name)      DOCTEST_SUBCASE("    When: " name)
-#define DOCTEST_AND_WHEN(name)  DOCTEST_SUBCASE("And when: " name)
-#define DOCTEST_THEN(name)      DOCTEST_SUBCASE("    Then: " name)
-#define DOCTEST_AND_THEN(name)  DOCTEST_SUBCASE("     And: " name)
-// clang-format on
-
-// == SHORT VERSIONS OF THE MACROS
-#if !defined(DOCTEST_CONFIG_NO_SHORT_MACRO_NAMES)
-
-#define TEST_CASE(name) DOCTEST_TEST_CASE(name)
-#define TEST_CASE_CLASS(name) DOCTEST_TEST_CASE_CLASS(name)
-#define TEST_CASE_FIXTURE(x, name) DOCTEST_TEST_CASE_FIXTURE(x, name)
-#define TYPE_TO_STRING(...) DOCTEST_TYPE_TO_STRING(__VA_ARGS__)
-#define TEST_CASE_TEMPLATE(name, T, ...) \
-  DOCTEST_TEST_CASE_TEMPLATE(name, T, __VA_ARGS__)
-#define TEST_CASE_TEMPLATE_DEFINE(name, T, id) \
-  DOCTEST_TEST_CASE_TEMPLATE_DEFINE(name, T, id)
-#define TEST_CASE_TEMPLATE_INVOKE(id, ...) \
-  DOCTEST_TEST_CASE_TEMPLATE_INVOKE(id, __VA_ARGS__)
-#define TEST_CASE_TEMPLATE_APPLY(id, ...) \
-  DOCTEST_TEST_CASE_TEMPLATE_APPLY(id, __VA_ARGS__)
-#define SUBCASE(name) DOCTEST_SUBCASE(name)
-#define TEST_SUITE(decorators) DOCTEST_TEST_SUITE(decorators)
-#define TEST_SUITE_BEGIN(name) DOCTEST_TEST_SUITE_BEGIN(name)
-#define TEST_SUITE_END DOCTEST_TEST_SUITE_END
-#define REGISTER_EXCEPTION_TRANSLATOR(signature) \
-  DOCTEST_REGISTER_EXCEPTION_TRANSLATOR(signature)
-#define REGISTER_REPORTER(name, priority, reporter) \
-  DOCTEST_REGISTER_REPORTER(name, priority, reporter)
-#define REGISTER_LISTENER(name, priority, reporter) \
-  DOCTEST_REGISTER_LISTENER(name, priority, reporter)
-#define INFO(...) DOCTEST_INFO(__VA_ARGS__)
-#define CAPTURE(x) DOCTEST_CAPTURE(x)
-#define ADD_MESSAGE_AT(file, line, ...) \
-  DOCTEST_ADD_MESSAGE_AT(file, line, __VA_ARGS__)
-#define ADD_FAIL_CHECK_AT(file, line, ...) \
-  DOCTEST_ADD_FAIL_CHECK_AT(file, line, __VA_ARGS__)
-#define ADD_FAIL_AT(file, line, ...) \
-  DOCTEST_ADD_FAIL_AT(file, line, __VA_ARGS__)
-#define MESSAGE(...) DOCTEST_MESSAGE(__VA_ARGS__)
-#define FAIL_CHECK(...) DOCTEST_FAIL_CHECK(__VA_ARGS__)
-#define FAIL(...) DOCTEST_FAIL(__VA_ARGS__)
-#define TO_LVALUE(...) DOCTEST_TO_LVALUE(__VA_ARGS__)
-
-#define WARN(...) DOCTEST_WARN(__VA_ARGS__)
-#define WARN_FALSE(...) DOCTEST_WARN_FALSE(__VA_ARGS__)
-#define WARN_THROWS(...) DOCTEST_WARN_THROWS(__VA_ARGS__)
-#define WARN_THROWS_AS(expr, ...) DOCTEST_WARN_THROWS_AS(expr, __VA_ARGS__)
-#define WARN_THROWS_WITH(expr, ...) DOCTEST_WARN_THROWS_WITH(expr, __VA_ARGS__)
-#define WARN_THROWS_WITH_AS(expr, with, ...) \
-  DOCTEST_WARN_THROWS_WITH_AS(expr, with, __VA_ARGS__)
-#define WARN_NOTHROW(...) DOCTEST_WARN_NOTHROW(__VA_ARGS__)
-#define CHECK(...) DOCTEST_CHECK(__VA_ARGS__)
-#define CHECK_FALSE(...) DOCTEST_CHECK_FALSE(__VA_ARGS__)
-#define CHECK_THROWS(...) DOCTEST_CHECK_THROWS(__VA_ARGS__)
-#define CHECK_THROWS_AS(expr, ...) DOCTEST_CHECK_THROWS_AS(expr, __VA_ARGS__)
-#define CHECK_THROWS_WITH(expr, ...) \
-  DOCTEST_CHECK_THROWS_WITH(expr, __VA_ARGS__)
-#define CHECK_THROWS_WITH_AS(expr, with, ...) \
-  DOCTEST_CHECK_THROWS_WITH_AS(expr, with, __VA_ARGS__)
-#define CHECK_NOTHROW(...) DOCTEST_CHECK_NOTHROW(__VA_ARGS__)
-#define REQUIRE(...) DOCTEST_REQUIRE(__VA_ARGS__)
-#define REQUIRE_FALSE(...) DOCTEST_REQUIRE_FALSE(__VA_ARGS__)
-#define REQUIRE_THROWS(...) DOCTEST_REQUIRE_THROWS(__VA_ARGS__)
-#define REQUIRE_THROWS_AS(expr, ...) \
-  DOCTEST_REQUIRE_THROWS_AS(expr, __VA_ARGS__)
-#define REQUIRE_THROWS_WITH(expr, ...) \
-  DOCTEST_REQUIRE_THROWS_WITH(expr, __VA_ARGS__)
-#define REQUIRE_THROWS_WITH_AS(expr, with, ...) \
-  DOCTEST_REQUIRE_THROWS_WITH_AS(expr, with, __VA_ARGS__)
-#define REQUIRE_NOTHROW(...) DOCTEST_REQUIRE_NOTHROW(__VA_ARGS__)
-
-#define WARN_MESSAGE(cond, ...) DOCTEST_WARN_MESSAGE(cond, __VA_ARGS__)
-#define WARN_FALSE_MESSAGE(cond, ...) \
-  DOCTEST_WARN_FALSE_MESSAGE(cond, __VA_ARGS__)
-#define WARN_THROWS_MESSAGE(expr, ...) \
-  DOCTEST_WARN_THROWS_MESSAGE(expr, __VA_ARGS__)
-#define WARN_THROWS_AS_MESSAGE(expr, ex, ...) \
-  DOCTEST_WARN_THROWS_AS_MESSAGE(expr, ex, __VA_ARGS__)
-#define WARN_THROWS_WITH_MESSAGE(expr, with, ...) \
-  DOCTEST_WARN_THROWS_WITH_MESSAGE(expr, with, __VA_ARGS__)
-#define WARN_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) \
-  DOCTEST_WARN_THROWS_WITH_AS_MESSAGE(expr, with, ex, __VA_ARGS__)
-#define WARN_NOTHROW_MESSAGE(expr, ...) \
-  DOCTEST_WARN_NOTHROW_MESSAGE(expr, __VA_ARGS__)
-#define CHECK_MESSAGE(cond, ...) DOCTEST_CHECK_MESSAGE(cond, __VA_ARGS__)
-#define CHECK_FALSE_MESSAGE(cond, ...) \
-  DOCTEST_CHECK_FALSE_MESSAGE(cond, __VA_ARGS__)
-#define CHECK_THROWS_MESSAGE(expr, ...) \
-  DOCTEST_CHECK_THROWS_MESSAGE(expr, __VA_ARGS__)
-#define CHECK_THROWS_AS_MESSAGE(expr, ex, ...) \
-  DOCTEST_CHECK_THROWS_AS_MESSAGE(expr, ex, __VA_ARGS__)
-#define CHECK_THROWS_WITH_MESSAGE(expr, with, ...) \
-  DOCTEST_CHECK_THROWS_WITH_MESSAGE(expr, with, __VA_ARGS__)
-#define CHECK_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) \
-  DOCTEST_CHECK_THROWS_WITH_AS_MESSAGE(expr, with, ex, __VA_ARGS__)
-#define CHECK_NOTHROW_MESSAGE(expr, ...) \
-  DOCTEST_CHECK_NOTHROW_MESSAGE(expr, __VA_ARGS__)
-#define REQUIRE_MESSAGE(cond, ...) DOCTEST_REQUIRE_MESSAGE(cond, __VA_ARGS__)
-#define REQUIRE_FALSE_MESSAGE(cond, ...) \
-  DOCTEST_REQUIRE_FALSE_MESSAGE(cond, __VA_ARGS__)
-#define REQUIRE_THROWS_MESSAGE(expr, ...) \
-  DOCTEST_REQUIRE_THROWS_MESSAGE(expr, __VA_ARGS__)
-#define REQUIRE_THROWS_AS_MESSAGE(expr, ex, ...) \
-  DOCTEST_REQUIRE_THROWS_AS_MESSAGE(expr, ex, __VA_ARGS__)
-#define REQUIRE_THROWS_WITH_MESSAGE(expr, with, ...) \
-  DOCTEST_REQUIRE_THROWS_WITH_MESSAGE(expr, with, __VA_ARGS__)
-#define REQUIRE_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) \
-  DOCTEST_REQUIRE_THROWS_WITH_AS_MESSAGE(expr, with, ex, __VA_ARGS__)
-#define REQUIRE_NOTHROW_MESSAGE(expr, ...) \
-  DOCTEST_REQUIRE_NOTHROW_MESSAGE(expr, __VA_ARGS__)
-
-#define SCENARIO(name) DOCTEST_SCENARIO(name)
-#define SCENARIO_CLASS(name) DOCTEST_SCENARIO_CLASS(name)
-#define SCENARIO_TEMPLATE(name, T, ...) \
-  DOCTEST_SCENARIO_TEMPLATE(name, T, __VA_ARGS__)
-#define SCENARIO_TEMPLATE_DEFINE(name, T, id) \
-  DOCTEST_SCENARIO_TEMPLATE_DEFINE(name, T, id)
-#define GIVEN(name) DOCTEST_GIVEN(name)
-#define WHEN(name) DOCTEST_WHEN(name)
-#define AND_WHEN(name) DOCTEST_AND_WHEN(name)
-#define THEN(name) DOCTEST_THEN(name)
-#define AND_THEN(name) DOCTEST_AND_THEN(name)
-
-#define WARN_EQ(...) DOCTEST_WARN_EQ(__VA_ARGS__)
-#define CHECK_EQ(...) DOCTEST_CHECK_EQ(__VA_ARGS__)
-#define REQUIRE_EQ(...) DOCTEST_REQUIRE_EQ(__VA_ARGS__)
-#define WARN_NE(...) DOCTEST_WARN_NE(__VA_ARGS__)
-#define CHECK_NE(...) DOCTEST_CHECK_NE(__VA_ARGS__)
-#define REQUIRE_NE(...) DOCTEST_REQUIRE_NE(__VA_ARGS__)
-#define WARN_GT(...) DOCTEST_WARN_GT(__VA_ARGS__)
-#define CHECK_GT(...) DOCTEST_CHECK_GT(__VA_ARGS__)
-#define REQUIRE_GT(...) DOCTEST_REQUIRE_GT(__VA_ARGS__)
-#define WARN_LT(...) DOCTEST_WARN_LT(__VA_ARGS__)
-#define CHECK_LT(...) DOCTEST_CHECK_LT(__VA_ARGS__)
-#define REQUIRE_LT(...) DOCTEST_REQUIRE_LT(__VA_ARGS__)
-#define WARN_GE(...) DOCTEST_WARN_GE(__VA_ARGS__)
-#define CHECK_GE(...) DOCTEST_CHECK_GE(__VA_ARGS__)
-#define REQUIRE_GE(...) DOCTEST_REQUIRE_GE(__VA_ARGS__)
-#define WARN_LE(...) DOCTEST_WARN_LE(__VA_ARGS__)
-#define CHECK_LE(...) DOCTEST_CHECK_LE(__VA_ARGS__)
-#define REQUIRE_LE(...) DOCTEST_REQUIRE_LE(__VA_ARGS__)
-#define WARN_UNARY(...) DOCTEST_WARN_UNARY(__VA_ARGS__)
-#define CHECK_UNARY(...) DOCTEST_CHECK_UNARY(__VA_ARGS__)
-#define REQUIRE_UNARY(...) DOCTEST_REQUIRE_UNARY(__VA_ARGS__)
-#define WARN_UNARY_FALSE(...) DOCTEST_WARN_UNARY_FALSE(__VA_ARGS__)
-#define CHECK_UNARY_FALSE(...) DOCTEST_CHECK_UNARY_FALSE(__VA_ARGS__)
-#define REQUIRE_UNARY_FALSE(...) DOCTEST_REQUIRE_UNARY_FALSE(__VA_ARGS__)
-
-// KEPT FOR BACKWARDS COMPATIBILITY
-#define FAST_WARN_EQ(...) DOCTEST_FAST_WARN_EQ(__VA_ARGS__)
-#define FAST_CHECK_EQ(...) DOCTEST_FAST_CHECK_EQ(__VA_ARGS__)
-#define FAST_REQUIRE_EQ(...) DOCTEST_FAST_REQUIRE_EQ(__VA_ARGS__)
-#define FAST_WARN_NE(...) DOCTEST_FAST_WARN_NE(__VA_ARGS__)
-#define FAST_CHECK_NE(...) DOCTEST_FAST_CHECK_NE(__VA_ARGS__)
-#define FAST_REQUIRE_NE(...) DOCTEST_FAST_REQUIRE_NE(__VA_ARGS__)
-#define FAST_WARN_GT(...) DOCTEST_FAST_WARN_GT(__VA_ARGS__)
-#define FAST_CHECK_GT(...) DOCTEST_FAST_CHECK_GT(__VA_ARGS__)
-#define FAST_REQUIRE_GT(...) DOCTEST_FAST_REQUIRE_GT(__VA_ARGS__)
-#define FAST_WARN_LT(...) DOCTEST_FAST_WARN_LT(__VA_ARGS__)
-#define FAST_CHECK_LT(...) DOCTEST_FAST_CHECK_LT(__VA_ARGS__)
-#define FAST_REQUIRE_LT(...) DOCTEST_FAST_REQUIRE_LT(__VA_ARGS__)
-#define FAST_WARN_GE(...) DOCTEST_FAST_WARN_GE(__VA_ARGS__)
-#define FAST_CHECK_GE(...) DOCTEST_FAST_CHECK_GE(__VA_ARGS__)
-#define FAST_REQUIRE_GE(...) DOCTEST_FAST_REQUIRE_GE(__VA_ARGS__)
-#define FAST_WARN_LE(...) DOCTEST_FAST_WARN_LE(__VA_ARGS__)
-#define FAST_CHECK_LE(...) DOCTEST_FAST_CHECK_LE(__VA_ARGS__)
-#define FAST_REQUIRE_LE(...) DOCTEST_FAST_REQUIRE_LE(__VA_ARGS__)
-
-#define FAST_WARN_UNARY(...) DOCTEST_FAST_WARN_UNARY(__VA_ARGS__)
-#define FAST_CHECK_UNARY(...) DOCTEST_FAST_CHECK_UNARY(__VA_ARGS__)
-#define FAST_REQUIRE_UNARY(...) DOCTEST_FAST_REQUIRE_UNARY(__VA_ARGS__)
-#define FAST_WARN_UNARY_FALSE(...) DOCTEST_FAST_WARN_UNARY_FALSE(__VA_ARGS__)
-#define FAST_CHECK_UNARY_FALSE(...) DOCTEST_FAST_CHECK_UNARY_FALSE(__VA_ARGS__)
-#define FAST_REQUIRE_UNARY_FALSE(...) \
-  DOCTEST_FAST_REQUIRE_UNARY_FALSE(__VA_ARGS__)
-
-#define TEST_CASE_TEMPLATE_INSTANTIATE(id, ...) \
-  DOCTEST_TEST_CASE_TEMPLATE_INSTANTIATE(id, __VA_ARGS__)
-
-#endif  // DOCTEST_CONFIG_NO_SHORT_MACRO_NAMES
-
-#if !defined(DOCTEST_CONFIG_DISABLE)
-
-// this is here to clear the 'current test suite' for the current translation
-// unit - at the top
-DOCTEST_TEST_SUITE_END();
-
-// add stringification for primitive/fundamental types
-namespace doctest { namespace detail {
-DOCTEST_TYPE_TO_STRING_IMPL(bool)
-DOCTEST_TYPE_TO_STRING_IMPL(float)
-DOCTEST_TYPE_TO_STRING_IMPL(double)
-DOCTEST_TYPE_TO_STRING_IMPL(long double)
-DOCTEST_TYPE_TO_STRING_IMPL(char)
-DOCTEST_TYPE_TO_STRING_IMPL(signed char)
-DOCTEST_TYPE_TO_STRING_IMPL(unsigned char)
-#if !DOCTEST_MSVC || defined(_NATIVE_WCHAR_T_DEFINED)
-DOCTEST_TYPE_TO_STRING_IMPL(wchar_t)
-#endif  // not MSVC or wchar_t support enabled
-DOCTEST_TYPE_TO_STRING_IMPL(short int)
-DOCTEST_TYPE_TO_STRING_IMPL(unsigned short int)
-DOCTEST_TYPE_TO_STRING_IMPL(int)
-DOCTEST_TYPE_TO_STRING_IMPL(unsigned int)
-DOCTEST_TYPE_TO_STRING_IMPL(long int)
-DOCTEST_TYPE_TO_STRING_IMPL(unsigned long int)
-DOCTEST_TYPE_TO_STRING_IMPL(long long int)
-DOCTEST_TYPE_TO_STRING_IMPL(unsigned long long int)
-}}  // namespace doctest::detail
-
-#endif  // DOCTEST_CONFIG_DISABLE
-
-DOCTEST_CLANG_SUPPRESS_WARNING_POP
-DOCTEST_MSVC_SUPPRESS_WARNING_POP
-DOCTEST_GCC_SUPPRESS_WARNING_POP
-
-DOCTEST_SUPPRESS_COMMON_WARNINGS_POP
-
-#endif  // DOCTEST_LIBRARY_INCLUDED
-
-#ifndef DOCTEST_SINGLE_HEADER
-#define DOCTEST_SINGLE_HEADER
-#endif  // DOCTEST_SINGLE_HEADER
-
-#if defined(DOCTEST_CONFIG_IMPLEMENT) || !defined(DOCTEST_SINGLE_HEADER)
-
-#ifndef DOCTEST_SINGLE_HEADER
-#include "doctest_fwd.h"
-#endif  // DOCTEST_SINGLE_HEADER
-
-DOCTEST_CLANG_SUPPRESS_WARNING_WITH_PUSH("-Wunused-macros")
-
-#ifndef DOCTEST_LIBRARY_IMPLEMENTATION
-#define DOCTEST_LIBRARY_IMPLEMENTATION
-
-DOCTEST_CLANG_SUPPRESS_WARNING_POP
-
-DOCTEST_SUPPRESS_COMMON_WARNINGS_PUSH
-
-DOCTEST_CLANG_SUPPRESS_WARNING_PUSH
-DOCTEST_CLANG_SUPPRESS_WARNING("-Wglobal-constructors")
-DOCTEST_CLANG_SUPPRESS_WARNING("-Wexit-time-destructors")
-DOCTEST_CLANG_SUPPRESS_WARNING("-Wsign-conversion")
-DOCTEST_CLANG_SUPPRESS_WARNING("-Wshorten-64-to-32")
-DOCTEST_CLANG_SUPPRESS_WARNING("-Wmissing-variable-declarations")
-DOCTEST_CLANG_SUPPRESS_WARNING("-Wswitch")
-DOCTEST_CLANG_SUPPRESS_WARNING("-Wswitch-enum")
-DOCTEST_CLANG_SUPPRESS_WARNING("-Wcovered-switch-default")
-DOCTEST_CLANG_SUPPRESS_WARNING("-Wmissing-noreturn")
-DOCTEST_CLANG_SUPPRESS_WARNING("-Wdisabled-macro-expansion")
-DOCTEST_CLANG_SUPPRESS_WARNING("-Wmissing-braces")
-DOCTEST_CLANG_SUPPRESS_WARNING("-Wmissing-field-initializers")
-DOCTEST_CLANG_SUPPRESS_WARNING("-Wunused-member-function")
-DOCTEST_CLANG_SUPPRESS_WARNING("-Wnonportable-system-include-path")
-
-DOCTEST_GCC_SUPPRESS_WARNING_PUSH
-DOCTEST_GCC_SUPPRESS_WARNING("-Wconversion")
-DOCTEST_GCC_SUPPRESS_WARNING("-Wsign-conversion")
-DOCTEST_GCC_SUPPRESS_WARNING("-Wmissing-field-initializers")
-DOCTEST_GCC_SUPPRESS_WARNING("-Wmissing-braces")
-DOCTEST_GCC_SUPPRESS_WARNING("-Wswitch")
-DOCTEST_GCC_SUPPRESS_WARNING("-Wswitch-enum")
-DOCTEST_GCC_SUPPRESS_WARNING("-Wswitch-default")
-DOCTEST_GCC_SUPPRESS_WARNING("-Wunsafe-loop-optimizations")
-DOCTEST_GCC_SUPPRESS_WARNING("-Wold-style-cast")
-DOCTEST_GCC_SUPPRESS_WARNING("-Wunused-function")
-DOCTEST_GCC_SUPPRESS_WARNING("-Wmultiple-inheritance")
-DOCTEST_GCC_SUPPRESS_WARNING("-Wsuggest-attribute")
-
-DOCTEST_MSVC_SUPPRESS_WARNING_PUSH
-DOCTEST_MSVC_SUPPRESS_WARNING(
-    4267)  // 'var' : conversion from 'x' to 'y', possible loss of data
-DOCTEST_MSVC_SUPPRESS_WARNING(
-    4530)  // C++ exception handler used, but unwind semantics not enabled
-DOCTEST_MSVC_SUPPRESS_WARNING(
-    4577)  // 'noexcept' used with no exception handling mode specified
-DOCTEST_MSVC_SUPPRESS_WARNING(
-    4774)  // format string expected in argument is not a string literal
-DOCTEST_MSVC_SUPPRESS_WARNING(
-    4365)  // conversion from 'int' to 'unsigned', signed/unsigned mismatch
-DOCTEST_MSVC_SUPPRESS_WARNING(
-    5039)  // pointer to potentially throwing function passed to extern C
-DOCTEST_MSVC_SUPPRESS_WARNING(
-    4800)  // forcing value to bool 'true' or 'false' (performance warning)
-DOCTEST_MSVC_SUPPRESS_WARNING(
-    5245)  // unreferenced function with internal linkage has been removed
-
-DOCTEST_MAKE_STD_HEADERS_CLEAN_FROM_WARNINGS_ON_WALL_BEGIN
-
-// required includes - will go only in one translation unit!
-#include <climits>
-#include <cmath>
-#include <ctime>
-// borland (Embarcadero) compiler requires math.h and not cmath -
-// https://github.com/doctest/doctest/pull/37
-#ifdef __BORLANDC__
-#include <math.h>
-#endif  // __BORLANDC__
-#include <algorithm>
-#include <atomic>
-#include <cctype>
-#include <cfloat>
-#include <csignal>
-#include <cstdint>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <exception>
-#include <fstream>
-#include <iomanip>
-#include <iostream>
-#include <limits>
-#include <map>
-#include <mutex>
-#include <new>
-#include <set>
-#include <sstream>
-#include <stdexcept>
-#include <utility>
-#include <vector>
-
-#ifdef DOCTEST_PLATFORM_MAC
-#include <sys/sysctl.h>
-#include <sys/types.h>
-#include <unistd.h>
-#endif  // DOCTEST_PLATFORM_MAC
-
-#ifdef DOCTEST_PLATFORM_WINDOWS
-
-// defines for a leaner windows.h
-#ifndef WIN32_LEAN_AND_MEAN
-#define WIN32_LEAN_AND_MEAN
-#endif  // WIN32_LEAN_AND_MEAN
-#ifndef NOMINMAX
-#define NOMINMAX
-#endif  // NOMINMAX
-
-// not sure what AfxWin.h is for - here I do what Catch does
-#ifdef __AFXDLL
-#include <AfxWin.h>
-#else
-#include <windows.h>
-#endif
-#include <io.h>
-
-#else  // DOCTEST_PLATFORM_WINDOWS
-
-#include <sys/time.h>
-#include <unistd.h>
-
-#endif  // DOCTEST_PLATFORM_WINDOWS
-
-// this is a fix for https://github.com/doctest/doctest/issues/348
-// https://mail.gnome.org/archives/xml/2012-January/msg00000.html
-#if !defined(HAVE_UNISTD_H) && !defined(STDOUT_FILENO)
-#define STDOUT_FILENO fileno(stdout)
-#endif  // HAVE_UNISTD_H
-
-DOCTEST_MAKE_STD_HEADERS_CLEAN_FROM_WARNINGS_ON_WALL_END
-
-// counts the number of elements in a C array
-#define DOCTEST_COUNTOF(x) (sizeof(x) / sizeof(x[0]))
-
-#ifdef DOCTEST_CONFIG_DISABLE
-#define DOCTEST_BRANCH_ON_DISABLED(if_disabled, if_not_disabled) if_disabled
-#else  // DOCTEST_CONFIG_DISABLE
-#define DOCTEST_BRANCH_ON_DISABLED(if_disabled, if_not_disabled) if_not_disabled
-#endif  // DOCTEST_CONFIG_DISABLE
-
-#ifndef DOCTEST_CONFIG_OPTIONS_PREFIX
-#define DOCTEST_CONFIG_OPTIONS_PREFIX "dt-"
-#endif
-
-#ifndef DOCTEST_THREAD_LOCAL
-#if DOCTEST_MSVC && (DOCTEST_MSVC < DOCTEST_COMPILER(19, 0, 0))
-#define DOCTEST_THREAD_LOCAL
-#else  // DOCTEST_MSVC
-#define DOCTEST_THREAD_LOCAL thread_local
-#endif  // DOCTEST_MSVC
-#endif  // DOCTEST_THREAD_LOCAL
-
-#ifndef DOCTEST_MULTI_LANE_ATOMICS_THREAD_LANES
-#define DOCTEST_MULTI_LANE_ATOMICS_THREAD_LANES 32
-#endif
-
-#ifndef DOCTEST_MULTI_LANE_ATOMICS_CACHE_LINE_SIZE
-#define DOCTEST_MULTI_LANE_ATOMICS_CACHE_LINE_SIZE 64
-#endif
-
-#ifdef DOCTEST_CONFIG_NO_UNPREFIXED_OPTIONS
-#define DOCTEST_OPTIONS_PREFIX_DISPLAY DOCTEST_CONFIG_OPTIONS_PREFIX
-#else
-#define DOCTEST_OPTIONS_PREFIX_DISPLAY ""
-#endif
-
-#if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_APP)
-#define DOCTEST_CONFIG_NO_MULTI_LANE_ATOMICS
-#endif
-
-#ifndef DOCTEST_CDECL
-#define DOCTEST_CDECL __cdecl
-#endif
-
-namespace doctest {
-
-bool is_running_in_test = false;
-
-namespace {
-using namespace detail;
-
-template <typename Ex>
-DOCTEST_NORETURN void
-throw_exception(Ex const& e)
-{
-#ifndef DOCTEST_CONFIG_NO_EXCEPTIONS
-  throw e;
-#else   // DOCTEST_CONFIG_NO_EXCEPTIONS
-  std::cerr
-      << "doctest will terminate because it needed to throw an exception.\n"
-      << "The message was: " << e.what() << '\n';
-  std::terminate();
-#endif  // DOCTEST_CONFIG_NO_EXCEPTIONS
-}
-
-#ifndef DOCTEST_INTERNAL_ERROR
-#define DOCTEST_INTERNAL_ERROR(msg) \
-  throw_exception(std::logic_error( \
-      __FILE__ ":" DOCTEST_TOSTR(__LINE__) ": Internal doctest error: " msg))
-#endif  // DOCTEST_INTERNAL_ERROR
-
-// case insensitive strcmp
-int
-stricmp(const char* a, const char* b)
-{
-  for (;; a++, b++) {
-    const int d = tolower(*a) - tolower(*b);
-    if (d != 0 || !*a)
-      return d;
-  }
-}
-
-template <typename T>
-String
-fpToString(T value, int precision)
-{
-  std::ostringstream oss;
-  oss << std::setprecision(precision) << std::fixed << value;
-  std::string d = oss.str();
-  size_t i = d.find_last_not_of('0');
-  if (i != std::string::npos && i != d.size() - 1) {
-    if (d[i] == '.')
-      i++;
-    d = d.substr(0, i + 1);
-  }
-  return d.c_str();
-}
-
-struct Endianness {
-  enum Arch { Big, Little };
-
-  static Arch which()
-  {
-    int x = 1;
-    // casting any data pointer to char* is allowed
-    auto ptr = reinterpret_cast<char*>(&x);
-    if (*ptr)
-      return Little;
-    return Big;
-  }
-};
-}  // namespace
-
-namespace detail {
-String
-rawMemoryToString(const void* object, unsigned size)
-{
-  // Reverse order for little endian architectures
-  int i = 0, end = static_cast<int>(size), inc = 1;
-  if (Endianness::which() == Endianness::Little) {
-    i = end - 1;
-    end = inc = -1;
-  }
-
-  unsigned const char* bytes = static_cast<unsigned const char*>(object);
-  std::ostream* oss = tlssPush();
-  *oss << "0x" << std::setfill('0') << std::hex;
-  for (; i != end; i += inc)
-    *oss << std::setw(2) << static_cast<unsigned>(bytes[i]);
-  return tlssPop();
-}
-
-DOCTEST_THREAD_LOCAL class {
-  std::vector<std::streampos> stack;
-  std::stringstream ss;
-
- public:
-  std::ostream* push()
-  {
-    stack.push_back(ss.tellp());
-    return &ss;
-  }
-
-  String pop()
-  {
-    if (stack.empty())
-      DOCTEST_INTERNAL_ERROR("TLSS was empty when trying to pop!");
-
-    std::streampos pos = stack.back();
-    stack.pop_back();
-    unsigned sz = static_cast<unsigned>(ss.tellp() - pos);
-    ss.rdbuf()->pubseekpos(pos, std::ios::in | std::ios::out);
-    return String(ss, sz);
-  }
-} g_oss;
-
-std::ostream*
-tlssPush()
-{
-  return g_oss.push();
-}
-
-String
-tlssPop()
-{
-  return g_oss.pop();
-}
-
-#ifndef DOCTEST_CONFIG_DISABLE
-
-namespace timer_large_integer {
-
-#if defined(DOCTEST_PLATFORM_WINDOWS)
-typedef ULONGLONG type;
-#else   // DOCTEST_PLATFORM_WINDOWS
-typedef std::uint64_t type;
-#endif  // DOCTEST_PLATFORM_WINDOWS
-}  // namespace timer_large_integer
-
-typedef timer_large_integer::type ticks_t;
-
-#ifdef DOCTEST_CONFIG_GETCURRENTTICKS
-ticks_t
-getCurrentTicks()
-{
-  return DOCTEST_CONFIG_GETCURRENTTICKS();
-}
-#elif defined(DOCTEST_PLATFORM_WINDOWS)
-ticks_t
-getCurrentTicks()
-{
-  static LARGE_INTEGER hz = {0}, hzo = {0};
-  if (!hz.QuadPart) {
-    QueryPerformanceFrequency(&hz);
-    QueryPerformanceCounter(&hzo);
-  }
-  LARGE_INTEGER t;
-  QueryPerformanceCounter(&t);
-  return ((t.QuadPart - hzo.QuadPart) * LONGLONG(1000000)) / hz.QuadPart;
-}
-#else   // DOCTEST_PLATFORM_WINDOWS
-ticks_t
-getCurrentTicks()
-{
-  timeval t;
-  gettimeofday(&t, nullptr);
-  return static_cast<ticks_t>(t.tv_sec) * 1000000 +
-         static_cast<ticks_t>(t.tv_usec);
-}
-#endif  // DOCTEST_PLATFORM_WINDOWS
-
-struct Timer {
-  void start() { m_ticks = getCurrentTicks(); }
-  unsigned int getElapsedMicroseconds() const
-  {
-    return static_cast<unsigned int>(getCurrentTicks() - m_ticks);
-  }
-  // unsigned int getElapsedMilliseconds() const {
-  //    return static_cast<unsigned int>(getElapsedMicroseconds() / 1000);
-  //}
-  double getElapsedSeconds() const
-  {
-    return static_cast<double>(getCurrentTicks() - m_ticks) / 1000000.0;
-  }
-
- private:
-  ticks_t m_ticks = 0;
-};
-
-#ifdef DOCTEST_CONFIG_NO_MULTI_LANE_ATOMICS
-template <typename T>
-using AtomicOrMultiLaneAtomic = std::atomic<T>;
-#else   // DOCTEST_CONFIG_NO_MULTI_LANE_ATOMICS
-// Provides a multilane implementation of an atomic variable that supports add,
-// sub, load, store. Instead of using a single atomic variable, this splits up
-// into multiple ones, each sitting on a separate cache line. The goal is to
-// provide a speedup when most operations are modifying. It achieves this with
-// two properties:
-//
-// * Multiple atomics are used, so chance of congestion from the same atomic is
-// reduced.
-// * Each atomic sits on a separate cache line, so false sharing is reduced.
-//
-// The disadvantage is that there is a small overhead due to the use of TLS, and
-// load/store is slower because all atomics have to be accessed.
-template <typename T>
-class MultiLaneAtomic {
-  struct CacheLineAlignedAtomic {
-    std::atomic<T> atomic{};
-    char padding
-        [DOCTEST_MULTI_LANE_ATOMICS_CACHE_LINE_SIZE - sizeof(std::atomic<T>)];
-  };
-  CacheLineAlignedAtomic m_atomics[DOCTEST_MULTI_LANE_ATOMICS_THREAD_LANES];
-
-  static_assert(
-      sizeof(CacheLineAlignedAtomic) ==
-          DOCTEST_MULTI_LANE_ATOMICS_CACHE_LINE_SIZE,
-      "guarantee one atomic takes exactly one cache line");
-
- public:
-  T operator++() DOCTEST_NOEXCEPT { return fetch_add(1) + 1; }
-
-  T operator++(int) DOCTEST_NOEXCEPT { return fetch_add(1); }
-
-  T fetch_add(T arg, std::memory_order order = std::memory_order_seq_cst)
-      DOCTEST_NOEXCEPT
-  {
-    return myAtomic().fetch_add(arg, order);
-  }
-
-  T fetch_sub(T arg, std::memory_order order = std::memory_order_seq_cst)
-      DOCTEST_NOEXCEPT
-  {
-    return myAtomic().fetch_sub(arg, order);
-  }
-
-  operator T() const DOCTEST_NOEXCEPT { return load(); }
-
-  T load(std::memory_order order = std::memory_order_seq_cst) const
-      DOCTEST_NOEXCEPT
-  {
-    auto result = T();
-    for (auto const& c : m_atomics) {
-      result += c.atomic.load(order);
-    }
-    return result;
-  }
-
-  T operator=(T desired) DOCTEST_NOEXCEPT
-  {  // lgtm [cpp/assignment-does-not-return-this]
-    store(desired);
-    return desired;
-  }
-
-  void store(T desired, std::memory_order order = std::memory_order_seq_cst)
-      DOCTEST_NOEXCEPT
-  {
-    // first value becomes desired", all others become 0.
-    for (auto& c : m_atomics) {
-      c.atomic.store(desired, order);
-      desired = {};
-    }
-  }
-
- private:
-  // Each thread has a different atomic that it operates on. If more than
-  // NumLanes threads use this, some will use the same atomic. So performance
-  // will degrade a bit, but still everything will work.
-  //
-  // The logic here is a bit tricky. The call should be as fast as possible, so
-  // that there is minimal to no overhead in determining the correct atomic for
-  // the current thread.
-  //
-  // 1. A global static counter laneCounter counts continuously up.
-  // 2. Each successive thread will use modulo operation of that counter so it
-  // gets an atomic
-  //    assigned in a round-robin fashion.
-  // 3. This tlsLaneIdx is stored in the thread local data, so it is directly
-  // available with
-  //    little overhead.
-  std::atomic<T>& myAtomic() DOCTEST_NOEXCEPT
-  {
-    static std::atomic<size_t> laneCounter;
-    DOCTEST_THREAD_LOCAL size_t tlsLaneIdx =
-        laneCounter++ % DOCTEST_MULTI_LANE_ATOMICS_THREAD_LANES;
-
-    return m_atomics[tlsLaneIdx].atomic;
-  }
-};
-
-template <typename T>
-using AtomicOrMultiLaneAtomic = MultiLaneAtomic<T>;
-#endif  // DOCTEST_CONFIG_NO_MULTI_LANE_ATOMICS
-
-// this holds both parameters from the command line and runtime data for tests
-struct ContextState : ContextOptions, TestRunStats, CurrentTestCaseStats {
-  AtomicOrMultiLaneAtomic<int> numAssertsCurrentTest_atomic;
-  AtomicOrMultiLaneAtomic<int> numAssertsFailedCurrentTest_atomic;
-
-  std::vector<std::vector<String>> filters =
-      decltype(filters)(9);  // 9 different filters
-
-  std::vector<IReporter*> reporters_currently_used;
-
-  assert_handler ah = nullptr;
-
-  Timer timer;
-
-  std::vector<String>
-      stringifiedContexts;  // logging from INFO() due to an exception
-
-  // stuff for subcases
-  std::vector<SubcaseSignature> subcasesStack;
-  std::set<decltype(subcasesStack)> subcasesPassed;
-  int subcasesCurrentMaxLevel;
-  bool should_reenter;
-  std::atomic<bool> shouldLogCurrentException;
-
-  void resetRunData()
-  {
-    numTestCases = 0;
-    numTestCasesPassingFilters = 0;
-    numTestSuitesPassingFilters = 0;
-    numTestCasesFailed = 0;
-    numAsserts = 0;
-    numAssertsFailed = 0;
-    numAssertsCurrentTest = 0;
-    numAssertsFailedCurrentTest = 0;
-  }
-
-  void finalizeTestCaseData()
-  {
-    seconds = timer.getElapsedSeconds();
-
-    // update the non-atomic counters
-    numAsserts += numAssertsCurrentTest_atomic;
-    numAssertsFailed += numAssertsFailedCurrentTest_atomic;
-    numAssertsCurrentTest = numAssertsCurrentTest_atomic;
-    numAssertsFailedCurrentTest = numAssertsFailedCurrentTest_atomic;
-
-    if (numAssertsFailedCurrentTest)
-      failure_flags |= TestCaseFailureReason::AssertFailure;
-
-    if (Approx(currentTest->m_timeout).epsilon(DBL_EPSILON) != 0 &&
-        Approx(seconds).epsilon(DBL_EPSILON) > currentTest->m_timeout)
-      failure_flags |= TestCaseFailureReason::Timeout;
-
-    if (currentTest->m_should_fail) {
-      if (failure_flags) {
-        failure_flags |= TestCaseFailureReason::ShouldHaveFailedAndDid;
-      } else {
-        failure_flags |= TestCaseFailureReason::ShouldHaveFailedButDidnt;
-      }
-    } else if (failure_flags && currentTest->m_may_fail) {
-      failure_flags |= TestCaseFailureReason::CouldHaveFailedAndDid;
-    } else if (currentTest->m_expected_failures > 0) {
-      if (numAssertsFailedCurrentTest == currentTest->m_expected_failures) {
-        failure_flags |= TestCaseFailureReason::FailedExactlyNumTimes;
-      } else {
-        failure_flags |= TestCaseFailureReason::DidntFailExactlyNumTimes;
-      }
-    }
-
-    bool ok_to_fail =
-        (TestCaseFailureReason::ShouldHaveFailedAndDid & failure_flags) ||
-        (TestCaseFailureReason::CouldHaveFailedAndDid & failure_flags) ||
-        (TestCaseFailureReason::FailedExactlyNumTimes & failure_flags);
-
-    // if any subcase has failed - the whole test case has failed
-    testCaseSuccess = !(failure_flags && !ok_to_fail);
-    if (!testCaseSuccess)
-      numTestCasesFailed++;
-  }
-};
-
-ContextState* g_cs = nullptr;
-
-// used to avoid locks for the debug output
-// TODO: figure out if this is indeed necessary/correct - seems like either
-// there still could be a race or that there wouldn't be a race even if using
-// the context directly
-DOCTEST_THREAD_LOCAL bool g_no_colors;
-
-#endif  // DOCTEST_CONFIG_DISABLE
-}  // namespace detail
-
-char*
-String::allocate(unsigned sz)
-{
-  if (sz <= last) {
-    buf[sz] = '\0';
-    setLast(last - sz);
-    return buf;
-  } else {
-    setOnHeap();
-    data.size = sz;
-    data.capacity = data.size + 1;
-    data.ptr = new char[data.capacity];
-    data.ptr[sz] = '\0';
-    return data.ptr;
-  }
-}
-
-void
-String::setOnHeap()
-{
-  *reinterpret_cast<unsigned char*>(&buf[last]) = 128;
-}
-void
-String::setLast(unsigned in)
-{
-  buf[last] = char(in);
-}
-
-void
-String::copy(const String& other)
-{
-  if (other.isOnStack()) {
-    memcpy(buf, other.buf, len);
-  } else {
-    memcpy(allocate(other.data.size), other.data.ptr, other.data.size);
-  }
-}
-
-String::String()
-{
-  buf[0] = '\0';
-  setLast();
-}
-
-String::~String()
-{
-  if (!isOnStack())
-    delete[] data.ptr;
-  // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
-}
-
-String::String(const char* in) : String(in, strlen(in)) {}
-
-String::String(const char* in, unsigned in_size)
-{
-  memcpy(allocate(in_size), in, in_size);
-}
-
-String::String(std::istream& in, unsigned in_size)
-{
-  in.read(allocate(in_size), in_size);
-}
-
-String::String(const String& other)
-{
-  copy(other);
-}
-
-String&
-String::operator=(const String& other)
-{
-  if (this != &other) {
-    if (!isOnStack())
-      delete[] data.ptr;
-
-    copy(other);
-  }
-
-  return *this;
-}
-
-String&
-String::operator+=(const String& other)
-{
-  const unsigned my_old_size = size();
-  const unsigned other_size = other.size();
-  const unsigned total_size = my_old_size + other_size;
-  if (isOnStack()) {
-    if (total_size < len) {
-      // append to the current stack space
-      memcpy(buf + my_old_size, other.c_str(), other_size + 1);
-      // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
-      setLast(last - total_size);
-    } else {
-      // alloc new chunk
-      char* temp = new char[total_size + 1];
-      // copy current data to new location before writing in the union
-      memcpy(temp, buf, my_old_size);  // skip the +1 ('\0') for speed
-      // update data in union
-      setOnHeap();
-      data.size = total_size;
-      data.capacity = data.size + 1;
-      data.ptr = temp;
-      // transfer the rest of the data
-      memcpy(data.ptr + my_old_size, other.c_str(), other_size + 1);
-    }
-  } else {
-    if (data.capacity > total_size) {
-      // append to the current heap block
-      data.size = total_size;
-      memcpy(data.ptr + my_old_size, other.c_str(), other_size + 1);
-    } else {
-      // resize
-      data.capacity *= 2;
-      if (data.capacity <= total_size)
-        data.capacity = total_size + 1;
-      // alloc new chunk
-      char* temp = new char[data.capacity];
-      // copy current data to new location before releasing it
-      memcpy(temp, data.ptr, my_old_size);  // skip the +1 ('\0') for speed
-      // release old chunk
-      delete[] data.ptr;
-      // update the rest of the union members
-      data.size = total_size;
-      data.ptr = temp;
-      // transfer the rest of the data
-      memcpy(data.ptr + my_old_size, other.c_str(), other_size + 1);
-    }
-  }
-
-  return *this;
-}
-
-String::String(String&& other)
-{
-  memcpy(buf, other.buf, len);
-  other.buf[0] = '\0';
-  other.setLast();
-}
-
-String&
-String::operator=(String&& other)
-{
-  if (this != &other) {
-    if (!isOnStack())
-      delete[] data.ptr;
-    memcpy(buf, other.buf, len);
-    other.buf[0] = '\0';
-    other.setLast();
-  }
-  return *this;
-}
-
-char
-String::operator[](unsigned i) const
-{
-  return const_cast<String*>(this)->operator[](i);  // NOLINT
-}
-
-char&
-String::operator[](unsigned i)
-{
-  if (isOnStack())
-    return reinterpret_cast<char*>(buf)[i];
-  return data.ptr[i];
-}
-
-DOCTEST_GCC_SUPPRESS_WARNING_WITH_PUSH("-Wmaybe-uninitialized")
-unsigned
-String::size() const
-{
-  if (isOnStack())
-    return last - (unsigned(buf[last]) &
-                   31);  // using "last" would work only if "len" is 32
-  return data.size;
-}
-DOCTEST_GCC_SUPPRESS_WARNING_POP
-
-unsigned
-String::capacity() const
-{
-  if (isOnStack())
-    return len;
-  return data.capacity;
-}
-
-int
-String::compare(const char* other, bool no_case) const
-{
-  if (no_case)
-    return doctest::stricmp(c_str(), other);
-  return std::strcmp(c_str(), other);
-}
-
-int
-String::compare(const String& other, bool no_case) const
-{
-  return compare(other.c_str(), no_case);
-}
-
-// NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
-String
-operator+(const String& lhs, const String& rhs)
-{
-  return String(lhs) += rhs;
-}
-
-// clang-format off
-bool operator==(const String& lhs, const String& rhs) { return lhs.compare(rhs) == 0; }
-bool operator!=(const String& lhs, const String& rhs) { return lhs.compare(rhs) != 0; }
-bool operator< (const String& lhs, const String& rhs) { return lhs.compare(rhs) < 0; }
-bool operator> (const String& lhs, const String& rhs) { return lhs.compare(rhs) > 0; }
-bool operator<=(const String& lhs, const String& rhs) { return (lhs != rhs) ? lhs.compare(rhs) < 0 : true; }
-bool operator>=(const String& lhs, const String& rhs) { return (lhs != rhs) ? lhs.compare(rhs) > 0 : true; }
-// clang-format on
-
-std::ostream&
-operator<<(std::ostream& s, const String& in)
-{
-  return s << in.c_str();
-}
-
-namespace {
-void
-color_to_stream(std::ostream&, Color::Enum) DOCTEST_BRANCH_ON_DISABLED({}, ;)
-}  // namespace
-
-namespace Color {
-std::ostream&
-operator<<(std::ostream& s, Color::Enum code)
-{
-  color_to_stream(s, code);
-  return s;
-}
-}  // namespace Color
-
-// clang-format off
-const char* assertString(assertType::Enum at) {
-    DOCTEST_MSVC_SUPPRESS_WARNING_WITH_PUSH(4062) // enum 'x' in switch of enum 'y' is not handled
-    switch(at) {  //!OCLINT missing default in switch statements
-        case assertType::DT_WARN                    : return "WARN";
-        case assertType::DT_CHECK                   : return "CHECK";
-        case assertType::DT_REQUIRE                 : return "REQUIRE";
-
-        case assertType::DT_WARN_FALSE              : return "WARN_FALSE";
-        case assertType::DT_CHECK_FALSE             : return "CHECK_FALSE";
-        case assertType::DT_REQUIRE_FALSE           : return "REQUIRE_FALSE";
-
-        case assertType::DT_WARN_THROWS             : return "WARN_THROWS";
-        case assertType::DT_CHECK_THROWS            : return "CHECK_THROWS";
-        case assertType::DT_REQUIRE_THROWS          : return "REQUIRE_THROWS";
-
-        case assertType::DT_WARN_THROWS_AS          : return "WARN_THROWS_AS";
-        case assertType::DT_CHECK_THROWS_AS         : return "CHECK_THROWS_AS";
-        case assertType::DT_REQUIRE_THROWS_AS       : return "REQUIRE_THROWS_AS";
-
-        case assertType::DT_WARN_THROWS_WITH        : return "WARN_THROWS_WITH";
-        case assertType::DT_CHECK_THROWS_WITH       : return "CHECK_THROWS_WITH";
-        case assertType::DT_REQUIRE_THROWS_WITH     : return "REQUIRE_THROWS_WITH";
-
-        case assertType::DT_WARN_THROWS_WITH_AS     : return "WARN_THROWS_WITH_AS";
-        case assertType::DT_CHECK_THROWS_WITH_AS    : return "CHECK_THROWS_WITH_AS";
-        case assertType::DT_REQUIRE_THROWS_WITH_AS  : return "REQUIRE_THROWS_WITH_AS";
-
-        case assertType::DT_WARN_NOTHROW            : return "WARN_NOTHROW";
-        case assertType::DT_CHECK_NOTHROW           : return "CHECK_NOTHROW";
-        case assertType::DT_REQUIRE_NOTHROW         : return "REQUIRE_NOTHROW";
-
-        case assertType::DT_WARN_EQ                 : return "WARN_EQ";
-        case assertType::DT_CHECK_EQ                : return "CHECK_EQ";
-        case assertType::DT_REQUIRE_EQ              : return "REQUIRE_EQ";
-        case assertType::DT_WARN_NE                 : return "WARN_NE";
-        case assertType::DT_CHECK_NE                : return "CHECK_NE";
-        case assertType::DT_REQUIRE_NE              : return "REQUIRE_NE";
-        case assertType::DT_WARN_GT                 : return "WARN_GT";
-        case assertType::DT_CHECK_GT                : return "CHECK_GT";
-        case assertType::DT_REQUIRE_GT              : return "REQUIRE_GT";
-        case assertType::DT_WARN_LT                 : return "WARN_LT";
-        case assertType::DT_CHECK_LT                : return "CHECK_LT";
-        case assertType::DT_REQUIRE_LT              : return "REQUIRE_LT";
-        case assertType::DT_WARN_GE                 : return "WARN_GE";
-        case assertType::DT_CHECK_GE                : return "CHECK_GE";
-        case assertType::DT_REQUIRE_GE              : return "REQUIRE_GE";
-        case assertType::DT_WARN_LE                 : return "WARN_LE";
-        case assertType::DT_CHECK_LE                : return "CHECK_LE";
-        case assertType::DT_REQUIRE_LE              : return "REQUIRE_LE";
-
-        case assertType::DT_WARN_UNARY              : return "WARN_UNARY";
-        case assertType::DT_CHECK_UNARY             : return "CHECK_UNARY";
-        case assertType::DT_REQUIRE_UNARY           : return "REQUIRE_UNARY";
-        case assertType::DT_WARN_UNARY_FALSE        : return "WARN_UNARY_FALSE";
-        case assertType::DT_CHECK_UNARY_FALSE       : return "CHECK_UNARY_FALSE";
-        case assertType::DT_REQUIRE_UNARY_FALSE     : return "REQUIRE_UNARY_FALSE";
-    }
-    DOCTEST_MSVC_SUPPRESS_WARNING_POP
-    return "";
-}
-// clang-format on
-
-const char*
-failureString(assertType::Enum at)
-{
-  if (at & assertType::is_warn)  //! OCLINT bitwise operator in conditional
-    return "WARNING";
-  if (at & assertType::is_check)  //! OCLINT bitwise operator in conditional
-    return "ERROR";
-  if (at & assertType::is_require)  //! OCLINT bitwise operator in conditional
-    return "FATAL ERROR";
-  return "";
-}
-
-DOCTEST_CLANG_SUPPRESS_WARNING_WITH_PUSH("-Wnull-dereference")
-DOCTEST_GCC_SUPPRESS_WARNING_WITH_PUSH("-Wnull-dereference")
-// depending on the current options this will remove the path of filenames
-const char*
-skipPathFromFilename(const char* file)
-{
-#ifndef DOCTEST_CONFIG_DISABLE
-  if (getContextOptions()->no_path_in_filenames) {
-    auto back = std::strrchr(file, '\\');
-    auto forward = std::strrchr(file, '/');
-    if (back || forward) {
-      if (back > forward)
-        forward = back;
-      return forward + 1;
-    }
-  }
-#endif  // DOCTEST_CONFIG_DISABLE
-  return file;
-}
-DOCTEST_CLANG_SUPPRESS_WARNING_POP
-DOCTEST_GCC_SUPPRESS_WARNING_POP
-
-bool
-SubcaseSignature::operator<(const SubcaseSignature& other) const
-{
-  if (m_line != other.m_line)
-    return m_line < other.m_line;
-  if (std::strcmp(m_file, other.m_file) != 0)
-    return std::strcmp(m_file, other.m_file) < 0;
-  return m_name.compare(other.m_name) < 0;
-}
-
-IContextScope::IContextScope() = default;
-IContextScope::~IContextScope() = default;
-
-#ifdef DOCTEST_CONFIG_TREAT_CHAR_STAR_AS_STRING
-String
-toString(char* in)
-{
-  return toString(static_cast<const char*>(in));
-}
-// NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
-String
-toString(const char* in)
-{
-  return String("\"") + (in ? in : "{null string}") + "\"";
-}
-#endif  // DOCTEST_CONFIG_TREAT_CHAR_STAR_AS_STRING
-String
-toString(bool in)
-{
-  return in ? "true" : "false";
-}
-String
-toString(float in)
-{
-  return fpToString(in, 5) + "f";
-}
-String
-toString(double in)
-{
-  return fpToString(in, 10);
-}
-String
-toString(double long in)
-{
-  return fpToString(in, 15);
-}
-
-#define DOCTEST_TO_STRING_OVERLOAD(type, fmt) \
-  String toString(type in)                    \
-  {                                           \
-    char buf[64];                             \
-    std::sprintf(buf, fmt, in);               \
-    return buf;                               \
-  }
-
-DOCTEST_TO_STRING_OVERLOAD(char, "%d")
-DOCTEST_TO_STRING_OVERLOAD(char signed, "%d")
-DOCTEST_TO_STRING_OVERLOAD(char unsigned, "%u")
-DOCTEST_TO_STRING_OVERLOAD(int short, "%d")
-DOCTEST_TO_STRING_OVERLOAD(int short unsigned, "%u")
-DOCTEST_TO_STRING_OVERLOAD(int, "%d")
-DOCTEST_TO_STRING_OVERLOAD(unsigned, "%u")
-DOCTEST_TO_STRING_OVERLOAD(int long, "%ld")
-DOCTEST_TO_STRING_OVERLOAD(int long unsigned, "%lu")
-DOCTEST_TO_STRING_OVERLOAD(int long long, "%lld")
-DOCTEST_TO_STRING_OVERLOAD(int long long unsigned, "%llu")
-
-String
-toString(std::nullptr_t)
-{
-  return "NULL";
-}
-
-#if DOCTEST_MSVC >= DOCTEST_COMPILER(19, 20, 0)
-// see this issue on why this is needed:
-// https://github.com/doctest/doctest/issues/183
-String
-toString(const std::string& in)
-{
-  return in.c_str();
-}
-#endif  // VS 2019
-
-Approx::Approx(double value)
-    : m_epsilon(
-          static_cast<double>(std::numeric_limits<float>::epsilon()) * 100),
-      m_scale(1.0), m_value(value)
-{
-}
-
-Approx
-Approx::operator()(double value) const
-{
-  Approx approx(value);
-  approx.epsilon(m_epsilon);
-  approx.scale(m_scale);
-  return approx;
-}
-
-Approx&
-Approx::epsilon(double newEpsilon)
-{
-  m_epsilon = newEpsilon;
-  return *this;
-}
-Approx&
-Approx::scale(double newScale)
-{
-  m_scale = newScale;
-  return *this;
-}
-
-bool
-operator==(double lhs, const Approx& rhs)
-{
-  // Thanks to Richard Harris for his help refining this formula
-  return std::fabs(lhs - rhs.m_value) <
-         rhs.m_epsilon *
-             (rhs.m_scale +
-              std::max<double>(std::fabs(lhs), std::fabs(rhs.m_value)));
-}
-bool
-operator==(const Approx& lhs, double rhs)
-{
-  return operator==(rhs, lhs);
-}
-bool
-operator!=(double lhs, const Approx& rhs)
-{
-  return !operator==(lhs, rhs);
-}
-bool
-operator!=(const Approx& lhs, double rhs)
-{
-  return !operator==(rhs, lhs);
-}
-bool
-operator<=(double lhs, const Approx& rhs)
-{
-  return lhs < rhs.m_value || lhs == rhs;
-}
-bool
-operator<=(const Approx& lhs, double rhs)
-{
-  return lhs.m_value < rhs || lhs == rhs;
-}
-bool
-operator>=(double lhs, const Approx& rhs)
-{
-  return lhs > rhs.m_value || lhs == rhs;
-}
-bool
-operator>=(const Approx& lhs, double rhs)
-{
-  return lhs.m_value > rhs || lhs == rhs;
-}
-bool
-operator<(double lhs, const Approx& rhs)
-{
-  return lhs < rhs.m_value && lhs != rhs;
-}
-bool
-operator<(const Approx& lhs, double rhs)
-{
-  return lhs.m_value < rhs && lhs != rhs;
-}
-bool
-operator>(double lhs, const Approx& rhs)
-{
-  return lhs > rhs.m_value && lhs != rhs;
-}
-bool
-operator>(const Approx& lhs, double rhs)
-{
-  return lhs.m_value > rhs && lhs != rhs;
-}
-
-String
-toString(const Approx& in)
-{
-  // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
-  return "Approx( " + doctest::toString(in.m_value) + " )";
-}
-const ContextOptions*
-getContextOptions()
-{
-  return DOCTEST_BRANCH_ON_DISABLED(nullptr, g_cs);
-}
-
-}  // namespace doctest
-
-#ifdef DOCTEST_CONFIG_DISABLE
-namespace doctest {
-Context::Context(int, const char* const*) {}
-Context::~Context() = default;
-void
-Context::applyCommandLine(int, const char* const*)
-{
-}
-void
-Context::addFilter(const char*, const char*)
-{
-}
-void
-Context::clearFilters()
-{
-}
-void
-Context::setOption(const char*, bool)
-{
-}
-void
-Context::setOption(const char*, int)
-{
-}
-void
-Context::setOption(const char*, const char*)
-{
-}
-bool
-Context::shouldExit()
-{
-  return false;
-}
-void
-Context::setAsDefaultForAssertsOutOfTestCases()
-{
-}
-void
-Context::setAssertHandler(detail::assert_handler)
-{
-}
-void
-Context::setCout(std::ostream* out)
-{
-}
-int
-Context::run()
-{
-  return 0;
-}
-
-IReporter::~IReporter() = default;
-
-int
-IReporter::get_num_active_contexts()
-{
-  return 0;
-}
-const IContextScope* const*
-IReporter::get_active_contexts()
-{
-  return nullptr;
-}
-int
-IReporter::get_num_stringified_contexts()
-{
-  return 0;
-}
-const String*
-IReporter::get_stringified_contexts()
-{
-  return nullptr;
-}
-
-int
-registerReporter(const char*, int, IReporter*)
-{
-  return 0;
-}
-
-}  // namespace doctest
-#else  // DOCTEST_CONFIG_DISABLE
-
-#if !defined(DOCTEST_CONFIG_COLORS_NONE)
-#if !defined(DOCTEST_CONFIG_COLORS_WINDOWS) && \
-    !defined(DOCTEST_CONFIG_COLORS_ANSI)
-#ifdef DOCTEST_PLATFORM_WINDOWS
-#define DOCTEST_CONFIG_COLORS_WINDOWS
-#else  // linux
-#define DOCTEST_CONFIG_COLORS_ANSI
-#endif  // platform
-#endif  // DOCTEST_CONFIG_COLORS_WINDOWS && DOCTEST_CONFIG_COLORS_ANSI
-#endif  // DOCTEST_CONFIG_COLORS_NONE
-
-namespace doctest_detail_test_suite_ns {
-// holds the current test suite
-doctest::detail::TestSuite&
-getCurrentTestSuite()
-{
-  static doctest::detail::TestSuite data{};
-  return data;
-}
-}  // namespace doctest_detail_test_suite_ns
-
-namespace doctest {
-namespace {
-// the int (priority) is part of the key for automatic sorting - sadly one can
-// register a reporter with a duplicate name and a different priority but
-// hopefully that won't happen often :|
-typedef std::map<std::pair<int, String>, reporterCreatorFunc> reporterMap;
-
-reporterMap&
-getReporters()
-{
-  static reporterMap data;
-  return data;
-}
-reporterMap&
-getListeners()
-{
-  static reporterMap data;
-  return data;
-}
-}  // namespace
-namespace detail {
-#define DOCTEST_ITERATE_THROUGH_REPORTERS(function, ...) \
-  for (auto& curr_rep : g_cs->reporters_currently_used)  \
-  curr_rep->function(__VA_ARGS__)
-
-bool
-checkIfShouldThrow(assertType::Enum at)
-{
-  if (at & assertType::is_require)  //! OCLINT bitwise operator in conditional
-    return true;
-
-  if ((at & assertType::is_check)  //! OCLINT bitwise operator in conditional
-      && getContextOptions()->abort_after > 0 &&
-      (g_cs->numAssertsFailed + g_cs->numAssertsFailedCurrentTest_atomic) >=
-          getContextOptions()->abort_after)
-    return true;
-
-  return false;
-}
-
-#ifndef DOCTEST_CONFIG_NO_EXCEPTIONS
-DOCTEST_NORETURN void
-throwException()
-{
-  g_cs->shouldLogCurrentException = false;
-  throw TestFailureException();
-}  // NOLINT(cert-err60-cpp)
-#else   // DOCTEST_CONFIG_NO_EXCEPTIONS
-void
-throwException()
-{
-}
-#endif  // DOCTEST_CONFIG_NO_EXCEPTIONS
-}  // namespace detail
-
-namespace {
-using namespace detail;
-// matching of a string against a wildcard mask (case sensitivity configurable)
-// taken from
-// https://www.codeproject.com/Articles/1088/Wildcard-string-compare-globbing
-int
-wildcmp(const char* str, const char* wild, bool caseSensitive)
-{
-  const char* cp = str;
-  const char* mp = wild;
-
-  while ((*str) && (*wild != '*')) {
-    if ((caseSensitive ? (*wild != *str) : (tolower(*wild) != tolower(*str))) &&
-        (*wild != '?')) {
-      return 0;
-    }
-    wild++;
-    str++;
-  }
-
-  while (*str) {
-    if (*wild == '*') {
-      if (!*++wild) {
-        return 1;
-      }
-      mp = wild;
-      cp = str + 1;
-    } else if (
-        (caseSensitive ? (*wild == *str) : (tolower(*wild) == tolower(*str))) ||
-        (*wild == '?')) {
-      wild++;
-      str++;
-    } else {
-      wild = mp;   //! OCLINT parameter reassignment
-      str = cp++;  //! OCLINT parameter reassignment
-    }
-  }
-
-  while (*wild == '*') {
-    wild++;
-  }
-  return !*wild;
-}
-
-//// C string hash function (djb2) - taken from
-/// http://www.cse.yorku.ca/~oz/hash.html
-// unsigned hashStr(unsigned const char* str) {
-//    unsigned long hash = 5381;
-//    char          c;
-//    while((c = *str++))
-//        hash = ((hash << 5) + hash) + c; // hash * 33 + c
-//    return hash;
-//}
-
-// checks if the name matches any of the filters (and can be configured what to
-// do when empty)
-bool
-matchesAny(
-    const char* name, const std::vector<String>& filters, bool matchEmpty,
-    bool caseSensitive)
-{
-  if (filters.empty() && matchEmpty)
-    return true;
-  for (auto& curr : filters)
-    if (wildcmp(name, curr.c_str(), caseSensitive))
-      return true;
-  return false;
-}
-}  // namespace
-namespace detail {
-
-Subcase::Subcase(const String& name, const char* file, int line)
-    : m_signature({name, file, line})
-{
-  auto* s = g_cs;
-
-  // check subcase filters
-  if (s->subcasesStack.size() < size_t(s->subcase_filter_levels)) {
-    if (!matchesAny(
-            m_signature.m_name.c_str(), s->filters[6], true, s->case_sensitive))
-      return;
-    if (matchesAny(
-            m_signature.m_name.c_str(), s->filters[7], false,
-            s->case_sensitive))
-      return;
-  }
-
-  // if a Subcase on the same level has already been entered
-  if (s->subcasesStack.size() < size_t(s->subcasesCurrentMaxLevel)) {
-    s->should_reenter = true;
-    return;
-  }
-
-  // push the current signature to the stack so we can check if the
-  // current stack + the current new subcase have been traversed
-  s->subcasesStack.push_back(m_signature);
-  if (s->subcasesPassed.count(s->subcasesStack) != 0) {
-    // pop - revert to previous stack since we've already passed this
-    s->subcasesStack.pop_back();
-    return;
-  }
-
-  s->subcasesCurrentMaxLevel = s->subcasesStack.size();
-  m_entered = true;
-
-  DOCTEST_ITERATE_THROUGH_REPORTERS(subcase_start, m_signature);
-}
-
-DOCTEST_MSVC_SUPPRESS_WARNING_WITH_PUSH(
-    4996)  // std::uncaught_exception is deprecated in C++17
-DOCTEST_GCC_SUPPRESS_WARNING_WITH_PUSH("-Wdeprecated-declarations")
-DOCTEST_CLANG_SUPPRESS_WARNING_WITH_PUSH("-Wdeprecated-declarations")
-
-Subcase::~Subcase()
-{
-  if (m_entered) {
-    // only mark the subcase stack as passed if no subcases have been skipped
-    if (g_cs->should_reenter == false)
-      g_cs->subcasesPassed.insert(g_cs->subcasesStack);
-    g_cs->subcasesStack.pop_back();
-
-#if defined(__cpp_lib_uncaught_exceptions) &&     \
-    __cpp_lib_uncaught_exceptions >= 201411L &&   \
-    (!defined(__MAC_OS_X_VERSION_MIN_REQUIRED) || \
-     __MAC_OS_X_VERSION_MIN_REQUIRED >= 101200)
-    if (std::uncaught_exceptions() > 0
-#else
-    if (std::uncaught_exception()
-#endif
-        && g_cs->shouldLogCurrentException) {
-      DOCTEST_ITERATE_THROUGH_REPORTERS(
-          test_case_exception,
-          {"exception thrown in subcase - will translate later "
-           "when the whole test case has been exited (cannot "
-           "translate while there is an active exception)",
-           false});
-      g_cs->shouldLogCurrentException = false;
-    }
-    DOCTEST_ITERATE_THROUGH_REPORTERS(subcase_end, DOCTEST_EMPTY);
-  }
-}
-
-DOCTEST_CLANG_SUPPRESS_WARNING_POP
-DOCTEST_GCC_SUPPRESS_WARNING_POP
-DOCTEST_MSVC_SUPPRESS_WARNING_POP
-
-Subcase::operator bool() const
-{
-  return m_entered;
-}
-
-Result::Result(bool passed, const String& decomposition)
-    : m_passed(passed), m_decomp(decomposition)
-{
-}
-
-ExpressionDecomposer::ExpressionDecomposer(assertType::Enum at) : m_at(at) {}
-
-TestSuite&
-TestSuite::operator*(const char* in)
-{
-  m_test_suite = in;
-  return *this;
-}
-
-TestCase::TestCase(
-    funcType test, const char* file, unsigned line, const TestSuite& test_suite,
-    const char* type, int template_id)
-{
-  m_file = file;
-  m_line = line;
-  m_name = nullptr;  // will be later overridden in operator*
-  m_test_suite = test_suite.m_test_suite;
-  m_description = test_suite.m_description;
-  m_skip = test_suite.m_skip;
-  m_no_breaks = test_suite.m_no_breaks;
-  m_no_output = test_suite.m_no_output;
-  m_may_fail = test_suite.m_may_fail;
-  m_should_fail = test_suite.m_should_fail;
-  m_expected_failures = test_suite.m_expected_failures;
-  m_timeout = test_suite.m_timeout;
-
-  m_test = test;
-  m_type = type;
-  m_template_id = template_id;
-}
-
-TestCase::TestCase(const TestCase& other) : TestCaseData()
-{
-  *this = other;
-}
-
-DOCTEST_MSVC_SUPPRESS_WARNING_WITH_PUSH(26434)  // hides a non-virtual function
-DOCTEST_MSVC_SUPPRESS_WARNING(26437)            // Do not slice
-TestCase&
-TestCase::operator=(const TestCase& other)
-{
-  static_cast<TestCaseData&>(*this) = static_cast<const TestCaseData&>(other);
-
-  m_test = other.m_test;
-  m_type = other.m_type;
-  m_template_id = other.m_template_id;
-  m_full_name = other.m_full_name;
-
-  if (m_template_id != -1)
-    m_name = m_full_name.c_str();
-  return *this;
-}
-DOCTEST_MSVC_SUPPRESS_WARNING_POP
-
-TestCase&
-TestCase::operator*(const char* in)
-{
-  m_name = in;
-  // make a new name with an appended type for templated test case
-  if (m_template_id != -1) {
-    m_full_name = String(m_name) + m_type;
-    // redirect the name to point to the newly constructed full name
-    m_name = m_full_name.c_str();
-  }
-  return *this;
-}
-
-bool
-TestCase::operator<(const TestCase& other) const
-{
-  // this will be used only to differentiate between test cases - not relevant
-  // for sorting
-  if (m_line != other.m_line)
-    return m_line < other.m_line;
-  const int name_cmp = strcmp(m_name, other.m_name);
-  if (name_cmp != 0)
-    return name_cmp < 0;
-  const int file_cmp = m_file.compare(other.m_file);
-  if (file_cmp != 0)
-    return file_cmp < 0;
-  return m_template_id < other.m_template_id;
-}
-
-// all the registered tests
-std::set<TestCase>&
-getRegisteredTests()
-{
-  static std::set<TestCase> data;
-  return data;
-}
-}  // namespace detail
-namespace {
-using namespace detail;
-// for sorting tests by file/line
-bool
-fileOrderComparator(const TestCase* lhs, const TestCase* rhs)
-{
-  // this is needed because MSVC gives different case for drive letters
-  // for __FILE__ when evaluated in a header and a source file
-  const int res = lhs->m_file.compare(rhs->m_file, bool(DOCTEST_MSVC));
-  if (res != 0)
-    return res < 0;
-  if (lhs->m_line != rhs->m_line)
-    return lhs->m_line < rhs->m_line;
-  return lhs->m_template_id < rhs->m_template_id;
-}
-
-// for sorting tests by suite/file/line
-bool
-suiteOrderComparator(const TestCase* lhs, const TestCase* rhs)
-{
-  const int res = std::strcmp(lhs->m_test_suite, rhs->m_test_suite);
-  if (res != 0)
-    return res < 0;
-  return fileOrderComparator(lhs, rhs);
-}
-
-// for sorting tests by name/suite/file/line
-bool
-nameOrderComparator(const TestCase* lhs, const TestCase* rhs)
-{
-  const int res = std::strcmp(lhs->m_name, rhs->m_name);
-  if (res != 0)
-    return res < 0;
-  return suiteOrderComparator(lhs, rhs);
-}
-
-DOCTEST_CLANG_SUPPRESS_WARNING_WITH_PUSH("-Wdeprecated-declarations")
-void
-color_to_stream(std::ostream& s, Color::Enum code)
-{
-  static_cast<void>(
-      s);  // for DOCTEST_CONFIG_COLORS_NONE or DOCTEST_CONFIG_COLORS_WINDOWS
-  static_cast<void>(code);  // for DOCTEST_CONFIG_COLORS_NONE
-#ifdef DOCTEST_CONFIG_COLORS_ANSI
-  if (g_no_colors || (isatty(STDOUT_FILENO) == false &&
-                      getContextOptions()->force_colors == false))
-    return;
-
-  auto col = "";
-  // clang-format off
-            switch(code) { //!OCLINT missing break in switch statement / unnecessary default statement in covered switch statement
-                case Color::Red:         col = "[0;31m"; break;
-                case Color::Green:       col = "[0;32m"; break;
-                case Color::Blue:        col = "[0;34m"; break;
-                case Color::Cyan:        col = "[0;36m"; break;
-                case Color::Yellow:      col = "[0;33m"; break;
-                case Color::Grey:        col = "[1;30m"; break;
-                case Color::LightGrey:   col = "[0;37m"; break;
-                case Color::BrightRed:   col = "[1;31m"; break;
-                case Color::BrightGreen: col = "[1;32m"; break;
-                case Color::BrightWhite: col = "[1;37m"; break;
-                case Color::Bright: // invalid
-                case Color::None:
-                case Color::White:
-                default:                 col = "[0m";
-            }
-  // clang-format on
-  s << "\033" << col;
-#endif  // DOCTEST_CONFIG_COLORS_ANSI
-
-#ifdef DOCTEST_CONFIG_COLORS_WINDOWS
-  if (g_no_colors || (_isatty(_fileno(stdout)) == false &&
-                      getContextOptions()->force_colors == false))
-    return;
-
-  static struct ConsoleHelper {
-    HANDLE stdoutHandle;
-    WORD origFgAttrs;
-    WORD origBgAttrs;
-
-    ConsoleHelper()
-    {
-      stdoutHandle = GetStdHandle(STD_OUTPUT_HANDLE);
-      CONSOLE_SCREEN_BUFFER_INFO csbiInfo;
-      GetConsoleScreenBufferInfo(stdoutHandle, &csbiInfo);
-      origFgAttrs =
-          csbiInfo.wAttributes & ~(BACKGROUND_GREEN | BACKGROUND_RED |
-                                   BACKGROUND_BLUE | BACKGROUND_INTENSITY);
-      origBgAttrs =
-          csbiInfo.wAttributes & ~(FOREGROUND_GREEN | FOREGROUND_RED |
-                                   FOREGROUND_BLUE | FOREGROUND_INTENSITY);
-    }
-  } ch;
-
-#define DOCTEST_SET_ATTR(x) \
-  SetConsoleTextAttribute(ch.stdoutHandle, x | ch.origBgAttrs)
-
-  // clang-format off
-        switch (code) {
-            case Color::White:       DOCTEST_SET_ATTR(FOREGROUND_GREEN | FOREGROUND_RED | FOREGROUND_BLUE); break;
-            case Color::Red:         DOCTEST_SET_ATTR(FOREGROUND_RED);                                      break;
-            case Color::Green:       DOCTEST_SET_ATTR(FOREGROUND_GREEN);                                    break;
-            case Color::Blue:        DOCTEST_SET_ATTR(FOREGROUND_BLUE);                                     break;
-            case Color::Cyan:        DOCTEST_SET_ATTR(FOREGROUND_BLUE | FOREGROUND_GREEN);                  break;
-            case Color::Yellow:      DOCTEST_SET_ATTR(FOREGROUND_RED | FOREGROUND_GREEN);                   break;
-            case Color::Grey:        DOCTEST_SET_ATTR(0);                                                   break;
-            case Color::LightGrey:   DOCTEST_SET_ATTR(FOREGROUND_INTENSITY);                                break;
-            case Color::BrightRed:   DOCTEST_SET_ATTR(FOREGROUND_INTENSITY | FOREGROUND_RED);               break;
-            case Color::BrightGreen: DOCTEST_SET_ATTR(FOREGROUND_INTENSITY | FOREGROUND_GREEN);             break;
-            case Color::BrightWhite: DOCTEST_SET_ATTR(FOREGROUND_INTENSITY | FOREGROUND_GREEN | FOREGROUND_RED | FOREGROUND_BLUE); break;
-            case Color::None:
-            case Color::Bright: // invalid
-            default:                 DOCTEST_SET_ATTR(ch.origFgAttrs);
-        }
-    // clang-format on
-#endif  // DOCTEST_CONFIG_COLORS_WINDOWS
-}
-DOCTEST_CLANG_SUPPRESS_WARNING_POP
-
-std::vector<const IExceptionTranslator*>&
-getExceptionTranslators()
-{
-  static std::vector<const IExceptionTranslator*> data;
-  return data;
-}
-
-String
-translateActiveException()
-{
-#ifndef DOCTEST_CONFIG_NO_EXCEPTIONS
-  String res;
-  auto& translators = getExceptionTranslators();
-  for (auto& curr : translators)
-    if (curr->translate(res))
-      return res;
-  // clang-format off
-        DOCTEST_GCC_SUPPRESS_WARNING_WITH_PUSH("-Wcatch-value")
-        try {
-            throw;
-        } catch(std::exception& ex) {
-            return ex.what();
-        } catch(std::string& msg) {
-            return msg.c_str();
-        } catch(const char* msg) {
-            return msg;
-        } catch(...) {
-            return "unknown exception";
-        }
-        DOCTEST_GCC_SUPPRESS_WARNING_POP
-// clang-format on
-#else   // DOCTEST_CONFIG_NO_EXCEPTIONS
-  return "";
-#endif  // DOCTEST_CONFIG_NO_EXCEPTIONS
-}
-}  // namespace
-
-namespace detail {
-// used by the macros for registering tests
-int
-regTest(const TestCase& tc)
-{
-  getRegisteredTests().insert(tc);
-  return 0;
-}
-
-// sets the current test suite
-int
-setTestSuite(const TestSuite& ts)
-{
-  doctest_detail_test_suite_ns::getCurrentTestSuite() = ts;
-  return 0;
-}
-
-#ifdef DOCTEST_IS_DEBUGGER_ACTIVE
-bool
-isDebuggerActive()
-{
-  return DOCTEST_IS_DEBUGGER_ACTIVE();
-}
-#else  // DOCTEST_IS_DEBUGGER_ACTIVE
-#ifdef DOCTEST_PLATFORM_LINUX
-class ErrnoGuard {
- public:
-  ErrnoGuard() : m_oldErrno(errno) {}
-  ~ErrnoGuard() { errno = m_oldErrno; }
-
- private:
-  int m_oldErrno;
-};
-// See the comments in Catch2 for the reasoning behind this implementation:
-// https://github.com/catchorg/Catch2/blob/v2.13.1/include/internal/catch_debugger.cpp#L79-L102
-bool
-isDebuggerActive()
-{
-  ErrnoGuard guard;
-  std::ifstream in("/proc/self/status");
-  for (std::string line; std::getline(in, line);) {
-    static const int PREFIX_LEN = 11;
-    if (line.compare(0, PREFIX_LEN, "TracerPid:\t") == 0) {
-      return line.length() > PREFIX_LEN && line[PREFIX_LEN] != '0';
-    }
-  }
-  return false;
-}
-#elif defined(DOCTEST_PLATFORM_MAC)
-// The following function is taken directly from the following technical note:
-// https://developer.apple.com/library/archive/qa/qa1361/_index.html
-// Returns true if the current process is being debugged (either
-// running under the debugger or has a debugger attached post facto).
-bool
-isDebuggerActive()
-{
-  int mib[4];
-  kinfo_proc info;
-  size_t size;
-  // Initialize the flags so that, if sysctl fails for some bizarre
-  // reason, we get a predictable result.
-  info.kp_proc.p_flag = 0;
-  // Initialize mib, which tells sysctl the info we want, in this case
-  // we're looking for information about a specific process ID.
-  mib[0] = CTL_KERN;
-  mib[1] = KERN_PROC;
-  mib[2] = KERN_PROC_PID;
-  mib[3] = getpid();
-  // Call sysctl.
-  size = sizeof(info);
-  if (sysctl(mib, DOCTEST_COUNTOF(mib), &info, &size, 0, 0) != 0) {
-    std::cerr << "\nCall to sysctl failed - unable to determine if debugger is "
-                 "active **\n";
-    return false;
-  }
-  // We're being debugged if the P_TRACED flag is set.
-  return ((info.kp_proc.p_flag & P_TRACED) != 0);
-}
-#elif DOCTEST_MSVC || defined(__MINGW32__) || defined(__MINGW64__)
-bool
-isDebuggerActive()
-{
-  return ::IsDebuggerPresent() != 0;
-}
-#else
-bool
-isDebuggerActive()
-{
-  return false;
-}
-#endif  // Platform
-#endif  // DOCTEST_IS_DEBUGGER_ACTIVE
-
-void
-registerExceptionTranslatorImpl(const IExceptionTranslator* et)
-{
-  if (std::find(
-          getExceptionTranslators().begin(), getExceptionTranslators().end(),
-          et) == getExceptionTranslators().end())
-    getExceptionTranslators().push_back(et);
-}
-
-#ifdef DOCTEST_CONFIG_TREAT_CHAR_STAR_AS_STRING
-void
-toStream(std::ostream* s, char* in)
-{
-  *s << in;
-}
-void
-toStream(std::ostream* s, const char* in)
-{
-  *s << in;
-}
-#endif  // DOCTEST_CONFIG_TREAT_CHAR_STAR_AS_STRING
-void
-toStream(std::ostream* s, bool in)
-{
-  *s << std::boolalpha << in << std::noboolalpha;
-}
-void
-toStream(std::ostream* s, float in)
-{
-  *s << in;
-}
-void
-toStream(std::ostream* s, double in)
-{
-  *s << in;
-}
-void
-toStream(std::ostream* s, double long in)
-{
-  *s << in;
-}
-
-void
-toStream(std::ostream* s, char in)
-{
-  *s << in;
-}
-void
-toStream(std::ostream* s, char signed in)
-{
-  *s << in;
-}
-void
-toStream(std::ostream* s, char unsigned in)
-{
-  *s << in;
-}
-void
-toStream(std::ostream* s, int short in)
-{
-  *s << in;
-}
-void
-toStream(std::ostream* s, int short unsigned in)
-{
-  *s << in;
-}
-void
-toStream(std::ostream* s, int in)
-{
-  *s << in;
-}
-void
-toStream(std::ostream* s, int unsigned in)
-{
-  *s << in;
-}
-void
-toStream(std::ostream* s, int long in)
-{
-  *s << in;
-}
-void
-toStream(std::ostream* s, int long unsigned in)
-{
-  *s << in;
-}
-void
-toStream(std::ostream* s, int long long in)
-{
-  *s << in;
-}
-void
-toStream(std::ostream* s, int long long unsigned in)
-{
-  *s << in;
-}
-
-DOCTEST_THREAD_LOCAL std::vector<IContextScope*>
-    g_infoContexts;  // for logging with INFO()
-
-ContextScopeBase::ContextScopeBase()
-{
-  g_infoContexts.push_back(this);
-}
-
-ContextScopeBase::ContextScopeBase(ContextScopeBase&& other)
-{
-  if (other.need_to_destroy) {
-    other.destroy();
-  }
-  other.need_to_destroy = false;
-  g_infoContexts.push_back(this);
-}
-
-DOCTEST_MSVC_SUPPRESS_WARNING_WITH_PUSH(
-    4996)  // std::uncaught_exception is deprecated in C++17
-DOCTEST_GCC_SUPPRESS_WARNING_WITH_PUSH("-Wdeprecated-declarations")
-DOCTEST_CLANG_SUPPRESS_WARNING_WITH_PUSH("-Wdeprecated-declarations")
-
-// destroy cannot be inlined into the destructor because that would mean calling
-// stringify after ContextScope has been destroyed (base class destructors run
-// after derived class destructors). Instead, ContextScope calls this method
-// directly from its destructor.
-void
-ContextScopeBase::destroy()
-{
-#if defined(__cpp_lib_uncaught_exceptions) &&     \
-    __cpp_lib_uncaught_exceptions >= 201411L &&   \
-    (!defined(__MAC_OS_X_VERSION_MIN_REQUIRED) || \
-     __MAC_OS_X_VERSION_MIN_REQUIRED >= 101200)
-  if (std::uncaught_exceptions() > 0) {
-#else
-  if (std::uncaught_exception()) {
-#endif
-    std::ostringstream s;
-    this->stringify(&s);
-    g_cs->stringifiedContexts.push_back(s.str().c_str());
-  }
-  g_infoContexts.pop_back();
-}  // namespace detail
-
-DOCTEST_CLANG_SUPPRESS_WARNING_POP
-DOCTEST_GCC_SUPPRESS_WARNING_POP
-DOCTEST_MSVC_SUPPRESS_WARNING_POP
-}  // namespace detail
-namespace {
-using namespace detail;
-
-#if !defined(DOCTEST_CONFIG_POSIX_SIGNALS) && \
-    !defined(DOCTEST_CONFIG_WINDOWS_SEH)
-struct FatalConditionHandler {
-  static void reset() {}
-  static void allocateAltStackMem() {}
-  static void freeAltStackMem() {}
-};
-#else  // DOCTEST_CONFIG_POSIX_SIGNALS || DOCTEST_CONFIG_WINDOWS_SEH
-
-void reportFatal(const std::string&);
-
-#ifdef DOCTEST_PLATFORM_WINDOWS
-
-struct SignalDefs {
-  DWORD id;
-  const char* name;
-};
-// There is no 1-1 mapping between signals and windows exceptions.
-// Windows can easily distinguish between SO and SigSegV,
-// but SigInt, SigTerm, etc are handled differently.
-SignalDefs signalDefs[] = {
-    {static_cast<DWORD>(EXCEPTION_ILLEGAL_INSTRUCTION),
-     "SIGILL - Illegal instruction signal"},
-    {static_cast<DWORD>(EXCEPTION_STACK_OVERFLOW), "SIGSEGV - Stack overflow"},
-    {static_cast<DWORD>(EXCEPTION_ACCESS_VIOLATION),
-     "SIGSEGV - Segmentation violation signal"},
-    {static_cast<DWORD>(EXCEPTION_INT_DIVIDE_BY_ZERO), "Divide by zero error"},
-};
-
-struct FatalConditionHandler {
-  static LONG CALLBACK handleException(PEXCEPTION_POINTERS ExceptionInfo)
-  {
-    // Multiple threads may enter this filter/handler at once. We want the error
-    // message to be printed on the console just once no matter how many threads
-    // have crashed.
-    static std::mutex mutex;
-    static bool execute = true;
-    {
-      std::lock_guard<std::mutex> lock(mutex);
-      if (execute) {
-        bool reported = false;
-        for (size_t i = 0; i < DOCTEST_COUNTOF(signalDefs); ++i) {
-          if (ExceptionInfo->ExceptionRecord->ExceptionCode ==
-              signalDefs[i].id) {
-            reportFatal(signalDefs[i].name);
-            reported = true;
-            break;
-          }
-        }
-        if (reported == false)
-          reportFatal("Unhandled SEH exception caught");
-        if (isDebuggerActive() && !g_cs->no_breaks)
-          DOCTEST_BREAK_INTO_DEBUGGER();
-      }
-      execute = false;
-    }
-    std::exit(EXIT_FAILURE);
-  }
-
-  static void allocateAltStackMem() {}
-  static void freeAltStackMem() {}
-
-  FatalConditionHandler()
-  {
-    isSet = true;
-    // 32k seems enough for doctest to handle stack overflow,
-    // but the value was found experimentally, so there is no strong guarantee
-    guaranteeSize = 32 * 1024;
-    // Register an unhandled exception filter
-    previousTop = SetUnhandledExceptionFilter(handleException);
-    // Pass in guarantee size to be filled
-    SetThreadStackGuarantee(&guaranteeSize);
-
-    // On Windows uncaught exceptions from another thread, exceptions from
-    // destructors, or calls to std::terminate are not a SEH exception
-
-    // The terminal handler gets called when:
-    // - std::terminate is called FROM THE TEST RUNNER THREAD
-    // - an exception is thrown from a destructor FROM THE TEST RUNNER THREAD
-    original_terminate_handler = std::get_terminate();
-    std::set_terminate([]() DOCTEST_NOEXCEPT {
-      reportFatal("Terminate handler called");
-      if (isDebuggerActive() && !g_cs->no_breaks)
-        DOCTEST_BREAK_INTO_DEBUGGER();
-      std::exit(EXIT_FAILURE);  // explicitly exit - otherwise the SIGABRT
-                                // handler may be called as well
-    });
-
-    // SIGABRT is raised when:
-    // - std::terminate is called FROM A DIFFERENT THREAD
-    // - an exception is thrown from a destructor FROM A DIFFERENT THREAD
-    // - an uncaught exception is thrown FROM A DIFFERENT THREAD
-    prev_sigabrt_handler =
-        std::signal(SIGABRT, [](int signal) DOCTEST_NOEXCEPT {
-          if (signal == SIGABRT) {
-            reportFatal("SIGABRT - Abort (abnormal termination) signal");
-            if (isDebuggerActive() && !g_cs->no_breaks)
-              DOCTEST_BREAK_INTO_DEBUGGER();
-            std::exit(EXIT_FAILURE);
-          }
-        });
-
-    // The following settings are taken from google test, and more
-    // specifically from UnitTest::Run() inside of gtest.cc
-
-    // the user does not want to see pop-up dialogs about crashes
-    prev_error_mode_1 = SetErrorMode(
-        SEM_FAILCRITICALERRORS | SEM_NOALIGNMENTFAULTEXCEPT |
-        SEM_NOGPFAULTERRORBOX | SEM_NOOPENFILEERRORBOX);
-    // This forces the abort message to go to stderr in all circumstances.
-    prev_error_mode_2 = _set_error_mode(_OUT_TO_STDERR);
-    // In the debug version, Visual Studio pops up a separate dialog
-    // offering a choice to debug the aborted program - we want to disable that.
-    prev_abort_behavior =
-        _set_abort_behavior(0x0, _WRITE_ABORT_MSG | _CALL_REPORTFAULT);
-    // In debug mode, the Windows CRT can crash with an assertion over invalid
-    // input (e.g. passing an invalid file descriptor). The default handling
-    // for these assertions is to pop up a dialog and wait for user input.
-    // Instead ask the CRT to dump such assertions to stderr non-interactively.
-    prev_report_mode =
-        _CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG);
-    prev_report_file = _CrtSetReportFile(_CRT_ASSERT, _CRTDBG_FILE_STDERR);
-  }
-
-  static void reset()
-  {
-    if (isSet) {
-      // Unregister handler and restore the old guarantee
-      SetUnhandledExceptionFilter(previousTop);
-      SetThreadStackGuarantee(&guaranteeSize);
-      std::set_terminate(original_terminate_handler);
-      std::signal(SIGABRT, prev_sigabrt_handler);
-      SetErrorMode(prev_error_mode_1);
-      _set_error_mode(prev_error_mode_2);
-      _set_abort_behavior(
-          prev_abort_behavior, _WRITE_ABORT_MSG | _CALL_REPORTFAULT);
-      static_cast<void>(_CrtSetReportMode(_CRT_ASSERT, prev_report_mode));
-      static_cast<void>(_CrtSetReportFile(_CRT_ASSERT, prev_report_file));
-      isSet = false;
-    }
-  }
-
-  ~FatalConditionHandler() { reset(); }
-
- private:
-  static UINT prev_error_mode_1;
-  static int prev_error_mode_2;
-  static unsigned int prev_abort_behavior;
-  static int prev_report_mode;
-  static _HFILE prev_report_file;
-  static void(DOCTEST_CDECL* prev_sigabrt_handler)(int);
-  static std::terminate_handler original_terminate_handler;
-  static bool isSet;
-  static ULONG guaranteeSize;
-  static LPTOP_LEVEL_EXCEPTION_FILTER previousTop;
-};
-
-UINT FatalConditionHandler::prev_error_mode_1;
-int FatalConditionHandler::prev_error_mode_2;
-unsigned int FatalConditionHandler::prev_abort_behavior;
-int FatalConditionHandler::prev_report_mode;
-_HFILE FatalConditionHandler::prev_report_file;
-void(DOCTEST_CDECL* FatalConditionHandler::prev_sigabrt_handler)(int);
-std::terminate_handler FatalConditionHandler::original_terminate_handler;
-bool FatalConditionHandler::isSet = false;
-ULONG FatalConditionHandler::guaranteeSize = 0;
-LPTOP_LEVEL_EXCEPTION_FILTER FatalConditionHandler::previousTop = nullptr;
-
-#else  // DOCTEST_PLATFORM_WINDOWS
-
-struct SignalDefs {
-  int id;
-  const char* name;
-};
-SignalDefs signalDefs[] = {
-    {SIGINT, "SIGINT - Terminal interrupt signal"},
-    {SIGILL, "SIGILL - Illegal instruction signal"},
-    {SIGFPE, "SIGFPE - Floating point error signal"},
-    {SIGSEGV, "SIGSEGV - Segmentation violation signal"},
-    {SIGTERM, "SIGTERM - Termination request signal"},
-    {SIGABRT, "SIGABRT - Abort (abnormal termination) signal"}};
-
-struct FatalConditionHandler {
-  static bool isSet;
-  static struct sigaction oldSigActions[DOCTEST_COUNTOF(signalDefs)];
-  static stack_t oldSigStack;
-  static size_t altStackSize;
-  static char* altStackMem;
-
-  static void handleSignal(int sig)
-  {
-    const char* name = "<unknown signal>";
-    for (std::size_t i = 0; i < DOCTEST_COUNTOF(signalDefs); ++i) {
-      SignalDefs& def = signalDefs[i];
-      if (sig == def.id) {
-        name = def.name;
-        break;
-      }
-    }
-    reset();
-    reportFatal(name);
-    raise(sig);
-  }
-
-  static void allocateAltStackMem() { altStackMem = new char[altStackSize]; }
-
-  static void freeAltStackMem() { delete[] altStackMem; }
-
-  FatalConditionHandler()
-  {
-    isSet = true;
-    stack_t sigStack;
-    sigStack.ss_sp = altStackMem;
-    sigStack.ss_size = altStackSize;
-    sigStack.ss_flags = 0;
-    sigaltstack(&sigStack, &oldSigStack);
-    struct sigaction sa = {};
-    sa.sa_handler = handleSignal;  // NOLINT
-    sa.sa_flags = SA_ONSTACK;
-    for (std::size_t i = 0; i < DOCTEST_COUNTOF(signalDefs); ++i) {
-      sigaction(signalDefs[i].id, &sa, &oldSigActions[i]);
-    }
-  }
-
-  ~FatalConditionHandler() { reset(); }
-  static void reset()
-  {
-    if (isSet) {
-      // Set signals back to previous values -- hopefully nobody overwrote them
-      // in the meantime
-      for (std::size_t i = 0; i < DOCTEST_COUNTOF(signalDefs); ++i) {
-        sigaction(signalDefs[i].id, &oldSigActions[i], nullptr);
-      }
-      // Return the old stack
-      sigaltstack(&oldSigStack, nullptr);
-      isSet = false;
-    }
-  }
-};
-
-bool FatalConditionHandler::isSet = false;
-struct sigaction
-    FatalConditionHandler::oldSigActions[DOCTEST_COUNTOF(signalDefs)] = {};
-stack_t FatalConditionHandler::oldSigStack = {};
-size_t FatalConditionHandler::altStackSize = 4 * SIGSTKSZ;
-char* FatalConditionHandler::altStackMem = nullptr;
-
-#endif  // DOCTEST_PLATFORM_WINDOWS
-#endif  // DOCTEST_CONFIG_POSIX_SIGNALS || DOCTEST_CONFIG_WINDOWS_SEH
-
-}  // namespace
-
-namespace {
-using namespace detail;
-
-#ifdef DOCTEST_PLATFORM_WINDOWS
-#define DOCTEST_OUTPUT_DEBUG_STRING(text) ::OutputDebugStringA(text)
-#else
-// TODO: integration with XCode and other IDEs
-#define DOCTEST_OUTPUT_DEBUG_STRING( \
-    text)  // NOLINT(clang-diagnostic-unused-macros)
-#endif     // Platform
-
-void
-addAssert(assertType::Enum at)
-{
-  if ((at & assertType::is_warn) ==
-      0)  //! OCLINT bitwise operator in conditional
-    g_cs->numAssertsCurrentTest_atomic++;
-}
-
-void
-addFailedAssert(assertType::Enum at)
-{
-  if ((at & assertType::is_warn) ==
-      0)  //! OCLINT bitwise operator in conditional
-    g_cs->numAssertsFailedCurrentTest_atomic++;
-}
-
-#if defined(DOCTEST_CONFIG_POSIX_SIGNALS) || defined(DOCTEST_CONFIG_WINDOWS_SEH)
-void
-reportFatal(const std::string& message)
-{
-  g_cs->failure_flags |= TestCaseFailureReason::Crash;
-
-  DOCTEST_ITERATE_THROUGH_REPORTERS(
-      test_case_exception, {message.c_str(), true});
-
-  while (g_cs->subcasesStack.size()) {
-    g_cs->subcasesStack.pop_back();
-    DOCTEST_ITERATE_THROUGH_REPORTERS(subcase_end, DOCTEST_EMPTY);
-  }
-
-  g_cs->finalizeTestCaseData();
-
-  DOCTEST_ITERATE_THROUGH_REPORTERS(test_case_end, *g_cs);
-
-  DOCTEST_ITERATE_THROUGH_REPORTERS(test_run_end, *g_cs);
-}
-#endif  // DOCTEST_CONFIG_POSIX_SIGNALS || DOCTEST_CONFIG_WINDOWS_SEH
-}  // namespace
-namespace detail {
-
-ResultBuilder::ResultBuilder(
-    assertType::Enum at, const char* file, int line, const char* expr,
-    const char* exception_type, const char* exception_string)
-{
-  m_test_case = g_cs->currentTest;
-  m_at = at;
-  m_file = file;
-  m_line = line;
-  m_expr = expr;
-  m_failed = true;
-  m_threw = false;
-  m_threw_as = false;
-  m_exception_type = exception_type;
-  m_exception_string = exception_string;
-#if DOCTEST_MSVC
-  if (m_expr[0] ==
-      ' ')  // this happens when variadic macros are disabled under MSVC
-    ++m_expr;
-#endif  // MSVC
-}
-
-void
-ResultBuilder::setResult(const Result& res)
-{
-  m_decomp = res.m_decomp;
-  m_failed = !res.m_passed;
-}
-
-void
-ResultBuilder::translateException()
-{
-  m_threw = true;
-  m_exception = translateActiveException();
-}
-
-bool
-ResultBuilder::log()
-{
-  if (m_at & assertType::is_throws) {  //! OCLINT bitwise operator in
-                                       //! conditional
-    m_failed = !m_threw;
-  } else if (
-      (m_at & assertType::is_throws_as) &&
-      (m_at & assertType::is_throws_with)) {  //! OCLINT
-    m_failed = !m_threw_as || (m_exception != m_exception_string);
-  } else if (m_at & assertType::is_throws_as) {  //! OCLINT bitwise operator in
-                                                 //! conditional
-    m_failed = !m_threw_as;
-  } else if (m_at & assertType::is_throws_with) {  //! OCLINT bitwise operator
-                                                   //! in conditional
-    m_failed = m_exception != m_exception_string;
-  } else if (m_at & assertType::is_nothrow) {  //! OCLINT bitwise operator in
-                                               //! conditional
-    m_failed = m_threw;
-  }
-
-  if (m_exception.size())
-    m_exception = "\"" + m_exception + "\"";
-
-  if (is_running_in_test) {
-    addAssert(m_at);
-    DOCTEST_ITERATE_THROUGH_REPORTERS(log_assert, *this);
-
-    if (m_failed)
-      addFailedAssert(m_at);
-  } else if (m_failed) {
-    failed_out_of_a_testing_context(*this);
-  }
-
-  return m_failed && isDebuggerActive() && !getContextOptions()->no_breaks &&
-         (g_cs->currentTest == nullptr ||
-          !g_cs->currentTest->m_no_breaks);  // break into debugger
-}
-
-void
-ResultBuilder::react() const
-{
-  if (m_failed && checkIfShouldThrow(m_at))
-    throwException();
-}
-
-void
-failed_out_of_a_testing_context(const AssertData& ad)
-{
-  if (g_cs->ah)
-    g_cs->ah(ad);
-  else
-    std::abort();
-}
-
-bool
-decomp_assert(
-    assertType::Enum at, const char* file, int line, const char* expr,
-    Result result)
-{
-  bool failed = !result.m_passed;
-
-  // ###################################################################################
-  // IF THE DEBUGGER BREAKS HERE - GO 1 LEVEL UP IN THE CALLSTACK FOR THE
-  // FAILING ASSERT THIS IS THE EFFECT OF HAVING
-  // 'DOCTEST_CONFIG_SUPER_FAST_ASSERTS' DEFINED
-  // ###################################################################################
-  DOCTEST_ASSERT_OUT_OF_TESTS(result.m_decomp);
-  DOCTEST_ASSERT_IN_TESTS(result.m_decomp);
-  // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
-  return !failed;
-}
-
-MessageBuilder::MessageBuilder(
-    const char* file, int line, assertType::Enum severity)
-{
-  m_stream = tlssPush();
-  m_file = file;
-  m_line = line;
-  m_severity = severity;
-}
-
-MessageBuilder::~MessageBuilder()
-{
-  if (!logged)
-    tlssPop();
-}
-
-IExceptionTranslator::IExceptionTranslator() = default;
-IExceptionTranslator::~IExceptionTranslator() = default;
-
-bool
-MessageBuilder::log()
-{
-  if (!logged) {
-    m_string = tlssPop();
-    logged = true;
-  }
-
-  DOCTEST_ITERATE_THROUGH_REPORTERS(log_message, *this);
-
-  const bool isWarn = m_severity & assertType::is_warn;
-
-  // warn is just a message in this context so we don't treat it as an assert
-  if (!isWarn) {
-    addAssert(m_severity);
-    addFailedAssert(m_severity);
-  }
-
-  return isDebuggerActive() && !getContextOptions()->no_breaks && !isWarn &&
-         (g_cs->currentTest == nullptr ||
-          !g_cs->currentTest->m_no_breaks);  // break into debugger
-}
-
-void
-MessageBuilder::react()
-{
-  if (m_severity &
-      assertType::is_require)  //! OCLINT bitwise operator in conditional
-    throwException();
-}
-}  // namespace detail
-namespace {
-using namespace detail;
-
-// clang-format off
-
-// =================================================================================================
-// The following code has been taken verbatim from Catch2/include/internal/catch_xmlwriter.h/cpp
-// This is done so cherry-picking bug fixes is trivial - even the style/formatting is untouched.
-// =================================================================================================
-
-    class XmlEncode {
-    public:
-        enum ForWhat { ForTextNodes, ForAttributes };
-
-        XmlEncode( std::string const& str, ForWhat forWhat = ForTextNodes );
-
-        void encodeTo( std::ostream& os ) const;
-
-        friend std::ostream& operator << ( std::ostream& os, XmlEncode const& xmlEncode );
-
-    private:
-        std::string m_str;
-        ForWhat m_forWhat;
-    };
-
-    class XmlWriter {
-    public:
-
-        class ScopedElement {
-        public:
-            ScopedElement( XmlWriter* writer );
-
-            ScopedElement( ScopedElement&& other ) DOCTEST_NOEXCEPT;
-            ScopedElement& operator=( ScopedElement&& other ) DOCTEST_NOEXCEPT;
-
-            ~ScopedElement();
-
-            ScopedElement& writeText( std::string const& text, bool indent = true );
-
-            template<typename T>
-            ScopedElement& writeAttribute( std::string const& name, T const& attribute ) {
-                m_writer->writeAttribute( name, attribute );
-                return *this;
-            }
-
-        private:
-            mutable XmlWriter* m_writer = nullptr;
-        };
-
-        XmlWriter( std::ostream& os = std::cout );
-        ~XmlWriter();
-
-        XmlWriter( XmlWriter const& ) = delete;
-        XmlWriter& operator=( XmlWriter const& ) = delete;
-
-        XmlWriter& startElement( std::string const& name );
-
-        ScopedElement scopedElement( std::string const& name );
-
-        XmlWriter& endElement();
-
-        XmlWriter& writeAttribute( std::string const& name, std::string const& attribute );
-
-        XmlWriter& writeAttribute( std::string const& name, const char* attribute );
-
-        XmlWriter& writeAttribute( std::string const& name, bool attribute );
-
-        template<typename T>
-        XmlWriter& writeAttribute( std::string const& name, T const& attribute ) {
-        std::stringstream rss;
-            rss << attribute;
-            return writeAttribute( name, rss.str() );
-        }
-
-        XmlWriter& writeText( std::string const& text, bool indent = true );
-
-        //XmlWriter& writeComment( std::string const& text );
-
-        //void writeStylesheetRef( std::string const& url );
-
-        //XmlWriter& writeBlankLine();
-
-        void ensureTagClosed();
-
-    private:
-
-        void writeDeclaration();
-
-        void newlineIfNecessary();
-
-        bool m_tagIsOpen = false;
-        bool m_needsNewline = false;
-        std::vector<std::string> m_tags;
-        std::string m_indent;
-        std::ostream& m_os;
-    };
-
-// =================================================================================================
-// The following code has been taken verbatim from Catch2/include/internal/catch_xmlwriter.h/cpp
-// This is done so cherry-picking bug fixes is trivial - even the style/formatting is untouched.
-// =================================================================================================
-
-using uchar = unsigned char;
-
-namespace {
-
-    size_t trailingBytes(unsigned char c) {
-        if ((c & 0xE0) == 0xC0) {
-            return 2;
-        }
-        if ((c & 0xF0) == 0xE0) {
-            return 3;
-        }
-        if ((c & 0xF8) == 0xF0) {
-            return 4;
-        }
-        DOCTEST_INTERNAL_ERROR("Invalid multibyte utf-8 start byte encountered");
-    }
-
-    uint32_t headerValue(unsigned char c) {
-        if ((c & 0xE0) == 0xC0) {
-            return c & 0x1F;
-        }
-        if ((c & 0xF0) == 0xE0) {
-            return c & 0x0F;
-        }
-        if ((c & 0xF8) == 0xF0) {
-            return c & 0x07;
-        }
-        DOCTEST_INTERNAL_ERROR("Invalid multibyte utf-8 start byte encountered");
-    }
-
-    void hexEscapeChar(std::ostream& os, unsigned char c) {
-        std::ios_base::fmtflags f(os.flags());
-        os << "\\x"
-            << std::uppercase << std::hex << std::setfill('0') << std::setw(2)
-            << static_cast<int>(c);
-        os.flags(f);
-    }
-
-} // anonymous namespace
-
-    XmlEncode::XmlEncode( std::string const& str, ForWhat forWhat )
-    :   m_str( str ),
-        m_forWhat( forWhat )
-    {}
-
-    void XmlEncode::encodeTo( std::ostream& os ) const {
-        // Apostrophe escaping not necessary if we always use " to write attributes
-        // (see: https://www.w3.org/TR/xml/#syntax)
-
-        for( std::size_t idx = 0; idx < m_str.size(); ++ idx ) {
-            uchar c = m_str[idx];
-            switch (c) {
-            case '<':   os << "&lt;"; break;
-            case '&':   os << "&amp;"; break;
-
-            case '>':
-                // See: https://www.w3.org/TR/xml/#syntax
-                if (idx > 2 && m_str[idx - 1] == ']' && m_str[idx - 2] == ']')
-                    os << "&gt;";
-                else
-                    os << c;
-                break;
-
-            case '\"':
-                if (m_forWhat == ForAttributes)
-                    os << "&quot;";
-                else
-                    os << c;
-                break;
-
-            default:
-                // Check for control characters and invalid utf-8
-
-                // Escape control characters in standard ascii
-                // see https://stackoverflow.com/questions/404107/why-are-control-characters-illegal-in-xml-1-0
-                if (c < 0x09 || (c > 0x0D && c < 0x20) || c == 0x7F) {
-                    hexEscapeChar(os, c);
-                    break;
-                }
-
-                // Plain ASCII: Write it to stream
-                if (c < 0x7F) {
-                    os << c;
-                    break;
-                }
-
-                // UTF-8 territory
-                // Check if the encoding is valid and if it is not, hex escape bytes.
-                // Important: We do not check the exact decoded values for validity, only the encoding format
-                // First check that this bytes is a valid lead byte:
-                // This means that it is not encoded as 1111 1XXX
-                // Or as 10XX XXXX
-                if (c <  0xC0 ||
-                    c >= 0xF8) {
-                    hexEscapeChar(os, c);
-                    break;
-                }
-
-                auto encBytes = trailingBytes(c);
-                // Are there enough bytes left to avoid accessing out-of-bounds memory?
-                if (idx + encBytes - 1 >= m_str.size()) {
-                    hexEscapeChar(os, c);
-                    break;
-                }
-                // The header is valid, check data
-                // The next encBytes bytes must together be a valid utf-8
-                // This means: bitpattern 10XX XXXX and the extracted value is sane (ish)
-                bool valid = true;
-                uint32_t value = headerValue(c);
-                for (std::size_t n = 1; n < encBytes; ++n) {
-                    uchar nc = m_str[idx + n];
-                    valid &= ((nc & 0xC0) == 0x80);
-                    value = (value << 6) | (nc & 0x3F);
-                }
-
-                if (
-                    // Wrong bit pattern of following bytes
-                    (!valid) ||
-                    // Overlong encodings
-                    (value < 0x80) ||
-                    (                 value < 0x800   && encBytes > 2) || // removed "0x80 <= value &&" because redundant
-                    (0x800 < value && value < 0x10000 && encBytes > 3) ||
-                    // Encoded value out of range
-                    (value >= 0x110000)
-                    ) {
-                    hexEscapeChar(os, c);
-                    break;
-                }
-
-                // If we got here, this is in fact a valid(ish) utf-8 sequence
-                for (std::size_t n = 0; n < encBytes; ++n) {
-                    os << m_str[idx + n];
-                }
-                idx += encBytes - 1;
-                break;
-            }
-        }
-    }
-
-    std::ostream& operator << ( std::ostream& os, XmlEncode const& xmlEncode ) {
-        xmlEncode.encodeTo( os );
-        return os;
-    }
-
-    XmlWriter::ScopedElement::ScopedElement( XmlWriter* writer )
-    :   m_writer( writer )
-    {}
-
-    XmlWriter::ScopedElement::ScopedElement( ScopedElement&& other ) DOCTEST_NOEXCEPT
-    :   m_writer( other.m_writer ){
-        other.m_writer = nullptr;
-    }
-    XmlWriter::ScopedElement& XmlWriter::ScopedElement::operator=( ScopedElement&& other ) DOCTEST_NOEXCEPT {
-        if ( m_writer ) {
-            m_writer->endElement();
-        }
-        m_writer = other.m_writer;
-        other.m_writer = nullptr;
-        return *this;
-    }
-
-
-    XmlWriter::ScopedElement::~ScopedElement() {
-        if( m_writer )
-            m_writer->endElement();
-    }
-
-    XmlWriter::ScopedElement& XmlWriter::ScopedElement::writeText( std::string const& text, bool indent ) {
-        m_writer->writeText( text, indent );
-        return *this;
-    }
-
-    XmlWriter::XmlWriter( std::ostream& os ) : m_os( os )
-    {
-        writeDeclaration();
-    }
-
-    XmlWriter::~XmlWriter() {
-        while( !m_tags.empty() )
-            endElement();
-    }
-
-    XmlWriter& XmlWriter::startElement( std::string const& name ) {
-        ensureTagClosed();
-        newlineIfNecessary();
-        m_os << m_indent << '<' << name;
-        m_tags.push_back( name );
-        m_indent += "  ";
-        m_tagIsOpen = true;
-        return *this;
-    }
-
-    XmlWriter::ScopedElement XmlWriter::scopedElement( std::string const& name ) {
-        ScopedElement scoped( this );
-        startElement( name );
-        return scoped;
-    }
-
-    XmlWriter& XmlWriter::endElement() {
-        newlineIfNecessary();
-        m_indent = m_indent.substr( 0, m_indent.size()-2 );
-        if( m_tagIsOpen ) {
-            m_os << "/>";
-            m_tagIsOpen = false;
-        }
-        else {
-            m_os << m_indent << "</" << m_tags.back() << ">";
-        }
-        m_os << std::endl;
-        m_tags.pop_back();
-        return *this;
-    }
-
-    XmlWriter& XmlWriter::writeAttribute( std::string const& name, std::string const& attribute ) {
-        if( !name.empty() && !attribute.empty() )
-            m_os << ' ' << name << "=\"" << XmlEncode( attribute, XmlEncode::ForAttributes ) << '"';
-        return *this;
-    }
-
-    XmlWriter& XmlWriter::writeAttribute( std::string const& name, const char* attribute ) {
-        if( !name.empty() && attribute && attribute[0] != '\0' )
-            m_os << ' ' << name << "=\"" << XmlEncode( attribute, XmlEncode::ForAttributes ) << '"';
-        return *this;
-    }
-
-    XmlWriter& XmlWriter::writeAttribute( std::string const& name, bool attribute ) {
-        m_os << ' ' << name << "=\"" << ( attribute ? "true" : "false" ) << '"';
-        return *this;
-    }
-
-    XmlWriter& XmlWriter::writeText( std::string const& text, bool indent ) {
-        if( !text.empty() ){
-            bool tagWasOpen = m_tagIsOpen;
-            ensureTagClosed();
-            if( tagWasOpen && indent )
-                m_os << m_indent;
-            m_os << XmlEncode( text );
-            m_needsNewline = true;
-        }
-        return *this;
-    }
-
-    //XmlWriter& XmlWriter::writeComment( std::string const& text ) {
-    //    ensureTagClosed();
-    //    m_os << m_indent << "<!--" << text << "-->";
-    //    m_needsNewline = true;
-    //    return *this;
-    //}
-
-    //void XmlWriter::writeStylesheetRef( std::string const& url ) {
-    //    m_os << "<?xml-stylesheet type=\"text/xsl\" href=\"" << url << "\"?>\n";
-    //}
-
-    //XmlWriter& XmlWriter::writeBlankLine() {
-    //    ensureTagClosed();
-    //    m_os << '\n';
-    //    return *this;
-    //}
-
-    void XmlWriter::ensureTagClosed() {
-        if( m_tagIsOpen ) {
-            m_os << ">" << std::endl;
-            m_tagIsOpen = false;
-        }
-    }
-
-    void XmlWriter::writeDeclaration() {
-        m_os << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
-    }
-
-    void XmlWriter::newlineIfNecessary() {
-        if( m_needsNewline ) {
-            m_os << std::endl;
-            m_needsNewline = false;
-        }
-    }
-
-// =================================================================================================
-// End of copy-pasted code from Catch
-// =================================================================================================
-
-// clang-format on
-
-struct XmlReporter : public IReporter {
-  XmlWriter xml;
-  std::mutex mutex;
-
-  // caching pointers/references to objects of these types - safe to do
-  const ContextOptions& opt;
-  const TestCaseData* tc = nullptr;
-
-  XmlReporter(const ContextOptions& co) : xml(*co.cout), opt(co) {}
-
-  void log_contexts()
-  {
-    int num_contexts = get_num_active_contexts();
-    if (num_contexts) {
-      auto contexts = get_active_contexts();
-      std::stringstream ss;
-      for (int i = 0; i < num_contexts; ++i) {
-        contexts[i]->stringify(&ss);
-        xml.scopedElement("Info").writeText(ss.str());
-        ss.str("");
-      }
-    }
-  }
-
-  unsigned line(unsigned l) const { return opt.no_line_numbers ? 0 : l; }
-
-  void test_case_start_impl(const TestCaseData& in)
-  {
-    bool open_ts_tag = false;
-    if (tc != nullptr) {  // we have already opened a test suite
-      if (std::strcmp(tc->m_test_suite, in.m_test_suite) != 0) {
-        xml.endElement();
-        open_ts_tag = true;
-      }
-    } else {
-      open_ts_tag = true;  // first test case ==> first test suite
-    }
-
-    if (open_ts_tag) {
-      xml.startElement("TestSuite");
-      xml.writeAttribute("name", in.m_test_suite);
-    }
-
-    tc = &in;
-    xml.startElement("TestCase")
-        .writeAttribute("name", in.m_name)
-        .writeAttribute("filename", skipPathFromFilename(in.m_file.c_str()))
-        .writeAttribute("line", line(in.m_line))
-        .writeAttribute("description", in.m_description);
-
-    if (Approx(in.m_timeout) != 0)
-      xml.writeAttribute("timeout", in.m_timeout);
-    if (in.m_may_fail)
-      xml.writeAttribute("may_fail", true);
-    if (in.m_should_fail)
-      xml.writeAttribute("should_fail", true);
-  }
-
-  // =========================================================================================
-  // WHAT FOLLOWS ARE OVERRIDES OF THE VIRTUAL METHODS OF THE REPORTER INTERFACE
-  // =========================================================================================
-
-  void report_query(const QueryData& in) override
-  {
-    test_run_start();
-    if (opt.list_reporters) {
-      for (auto& curr : getListeners())
-        xml.scopedElement("Listener")
-            .writeAttribute("priority", curr.first.first)
-            .writeAttribute("name", curr.first.second);
-      for (auto& curr : getReporters())
-        xml.scopedElement("Reporter")
-            .writeAttribute("priority", curr.first.first)
-            .writeAttribute("name", curr.first.second);
-    } else if (opt.count || opt.list_test_cases) {
-      for (unsigned i = 0; i < in.num_data; ++i) {
-        xml.scopedElement("TestCase")
-            .writeAttribute("name", in.data[i]->m_name)
-            .writeAttribute("testsuite", in.data[i]->m_test_suite)
-            .writeAttribute(
-                "filename", skipPathFromFilename(in.data[i]->m_file.c_str()))
-            .writeAttribute("line", line(in.data[i]->m_line))
-            .writeAttribute("skipped", in.data[i]->m_skip);
-      }
-      xml.scopedElement("OverallResultsTestCases")
-          .writeAttribute(
-              "unskipped", in.run_stats->numTestCasesPassingFilters);
-    } else if (opt.list_test_suites) {
-      for (unsigned i = 0; i < in.num_data; ++i)
-        xml.scopedElement("TestSuite")
-            .writeAttribute("name", in.data[i]->m_test_suite);
-      xml.scopedElement("OverallResultsTestCases")
-          .writeAttribute(
-              "unskipped", in.run_stats->numTestCasesPassingFilters);
-      xml.scopedElement("OverallResultsTestSuites")
-          .writeAttribute(
-              "unskipped", in.run_stats->numTestSuitesPassingFilters);
-    }
-    xml.endElement();
-  }
-
-  void test_run_start() override
-  {
-    // remove .exe extension - mainly to have the same output on UNIX and
-    // Windows
-    std::string binary_name = skipPathFromFilename(opt.binary_name.c_str());
-#ifdef DOCTEST_PLATFORM_WINDOWS
-    if (binary_name.rfind(".exe") != std::string::npos)
-      binary_name = binary_name.substr(0, binary_name.length() - 4);
-#endif  // DOCTEST_PLATFORM_WINDOWS
-
-    xml.startElement("doctest").writeAttribute("binary", binary_name);
-    if (opt.no_version == false)
-      xml.writeAttribute("version", DOCTEST_VERSION_STR);
-
-    // only the consequential ones (TODO: filters)
-    xml.scopedElement("Options")
-        .writeAttribute("order_by", opt.order_by.c_str())
-        .writeAttribute("rand_seed", opt.rand_seed)
-        .writeAttribute("first", opt.first)
-        .writeAttribute("last", opt.last)
-        .writeAttribute("abort_after", opt.abort_after)
-        .writeAttribute("subcase_filter_levels", opt.subcase_filter_levels)
-        .writeAttribute("case_sensitive", opt.case_sensitive)
-        .writeAttribute("no_throw", opt.no_throw)
-        .writeAttribute("no_skip", opt.no_skip);
-  }
-
-  void test_run_end(const TestRunStats& p) override
-  {
-    if (tc)  // the TestSuite tag - only if there has been at least 1 test case
-      xml.endElement();
-
-    xml.scopedElement("OverallResultsAsserts")
-        .writeAttribute("successes", p.numAsserts - p.numAssertsFailed)
-        .writeAttribute("failures", p.numAssertsFailed);
-
-    xml.startElement("OverallResultsTestCases")
-        .writeAttribute(
-            "successes", p.numTestCasesPassingFilters - p.numTestCasesFailed)
-        .writeAttribute("failures", p.numTestCasesFailed);
-    if (opt.no_skipped_summary == false)
-      xml.writeAttribute(
-          "skipped", p.numTestCases - p.numTestCasesPassingFilters);
-    xml.endElement();
-
-    xml.endElement();
-  }
-
-  void test_case_start(const TestCaseData& in) override
-  {
-    test_case_start_impl(in);
-    xml.ensureTagClosed();
-  }
-
-  void test_case_reenter(const TestCaseData&) override {}
-
-  void test_case_end(const CurrentTestCaseStats& st) override
-  {
-    xml.startElement("OverallResultsAsserts")
-        .writeAttribute(
-            "successes",
-            st.numAssertsCurrentTest - st.numAssertsFailedCurrentTest)
-        .writeAttribute("failures", st.numAssertsFailedCurrentTest)
-        .writeAttribute("test_case_success", st.testCaseSuccess);
-    if (opt.duration)
-      xml.writeAttribute("duration", st.seconds);
-    if (tc->m_expected_failures)
-      xml.writeAttribute("expected_failures", tc->m_expected_failures);
-    xml.endElement();
-
-    xml.endElement();
-  }
-
-  void test_case_exception(const TestCaseException& e) override
-  {
-    std::lock_guard<std::mutex> lock(mutex);
-
-    xml.scopedElement("Exception")
-        .writeAttribute("crash", e.is_crash)
-        .writeText(e.error_string.c_str());
-  }
-
-  void subcase_start(const SubcaseSignature& in) override
-  {
-    xml.startElement("SubCase")
-        .writeAttribute("name", in.m_name)
-        .writeAttribute("filename", skipPathFromFilename(in.m_file))
-        .writeAttribute("line", line(in.m_line));
-    xml.ensureTagClosed();
-  }
-
-  void subcase_end() override { xml.endElement(); }
-
-  void log_assert(const AssertData& rb) override
-  {
-    if (!rb.m_failed && !opt.success)
-      return;
-
-    std::lock_guard<std::mutex> lock(mutex);
-
-    xml.startElement("Expression")
-        .writeAttribute("success", !rb.m_failed)
-        .writeAttribute("type", assertString(rb.m_at))
-        .writeAttribute("filename", skipPathFromFilename(rb.m_file))
-        .writeAttribute("line", line(rb.m_line));
-
-    xml.scopedElement("Original").writeText(rb.m_expr);
-
-    if (rb.m_threw)
-      xml.scopedElement("Exception").writeText(rb.m_exception.c_str());
-
-    if (rb.m_at & assertType::is_throws_as)
-      xml.scopedElement("ExpectedException").writeText(rb.m_exception_type);
-    if (rb.m_at & assertType::is_throws_with)
-      xml.scopedElement("ExpectedExceptionString")
-          .writeText(rb.m_exception_string);
-    if ((rb.m_at & assertType::is_normal) && !rb.m_threw)
-      xml.scopedElement("Expanded").writeText(rb.m_decomp.c_str());
-
-    log_contexts();
-
-    xml.endElement();
-  }
-
-  void log_message(const MessageData& mb) override
-  {
-    std::lock_guard<std::mutex> lock(mutex);
-
-    xml.startElement("Message")
-        .writeAttribute("type", failureString(mb.m_severity))
-        .writeAttribute("filename", skipPathFromFilename(mb.m_file))
-        .writeAttribute("line", line(mb.m_line));
-
-    xml.scopedElement("Text").writeText(mb.m_string.c_str());
-
-    log_contexts();
-
-    xml.endElement();
-  }
-
-  void test_case_skipped(const TestCaseData& in) override
-  {
-    if (opt.no_skipped_summary == false) {
-      test_case_start_impl(in);
-      xml.writeAttribute("skipped", "true");
-      xml.endElement();
-    }
-  }
-};
-
-DOCTEST_REGISTER_REPORTER("xml", 0, XmlReporter);
-
-void
-fulltext_log_assert_to_stream(std::ostream& s, const AssertData& rb)
-{
-  if ((rb.m_at & (assertType::is_throws_as | assertType::is_throws_with)) ==
-      0)  //! OCLINT bitwise operator in conditional
-    s << Color::Cyan << assertString(rb.m_at) << "( " << rb.m_expr << " ) "
-      << Color::None;
-
-  if (rb.m_at &
-      assertType::is_throws) {  //! OCLINT bitwise operator in conditional
-    s << (rb.m_threw ? "threw as expected!" : "did NOT throw at all!") << "\n";
-  } else if (
-      (rb.m_at & assertType::is_throws_as) &&
-      (rb.m_at & assertType::is_throws_with)) {  //! OCLINT
-    s << Color::Cyan << assertString(rb.m_at) << "( " << rb.m_expr << ", \""
-      << rb.m_exception_string << "\", " << rb.m_exception_type << " ) "
-      << Color::None;
-    if (rb.m_threw) {
-      if (!rb.m_failed) {
-        s << "threw as expected!\n";
-      } else {
-        s << "threw a DIFFERENT exception! (contents: " << rb.m_exception
-          << ")\n";
-      }
-    } else {
-      s << "did NOT throw at all!\n";
-    }
-  } else if (rb.m_at & assertType::is_throws_as) {  //! OCLINT bitwise operator
-                                                    //! in conditional
-    s << Color::Cyan << assertString(rb.m_at) << "( " << rb.m_expr << ", "
-      << rb.m_exception_type << " ) " << Color::None
-      << (rb.m_threw ? (rb.m_threw_as ? "threw as expected!"
-                                      : "threw a DIFFERENT exception: ")
-                     : "did NOT throw at all!")
-      << Color::Cyan << rb.m_exception << "\n";
-  } else if (rb.m_at & assertType::is_throws_with) {  //! OCLINT bitwise
-                                                      //! operator in
-                                                      //! conditional
-    s << Color::Cyan << assertString(rb.m_at) << "( " << rb.m_expr << ", \""
-      << rb.m_exception_string << "\" ) " << Color::None
-      << (rb.m_threw ? (!rb.m_failed ? "threw as expected!"
-                                     : "threw a DIFFERENT exception: ")
-                     : "did NOT throw at all!")
-      << Color::Cyan << rb.m_exception << "\n";
-  } else if (rb.m_at & assertType::is_nothrow) {  //! OCLINT bitwise operator in
-                                                  //! conditional
-    s << (rb.m_threw ? "THREW exception: " : "didn't throw!") << Color::Cyan
-      << rb.m_exception << "\n";
-  } else {
-    s
-        << (rb.m_threw
-                ? "THREW exception: "
-                : (!rb.m_failed ? "is correct!\n" : "is NOT correct!\n"));
-    if (rb.m_threw)
-      s << rb.m_exception << "\n";
-    else
-      s << "  values: " << assertString(rb.m_at) << "( " << rb.m_decomp
-        << " )\n";
-  }
-}
-
-// TODO:
-// - log_message()
-// - respond to queries
-// - honor remaining options
-// - more attributes in tags
-struct JUnitReporter : public IReporter {
-  XmlWriter xml;
-  std::mutex mutex;
-  Timer timer;
-  std::vector<String> deepestSubcaseStackNames;
-
-  struct JUnitTestCaseData {
-    static std::string getCurrentTimestamp()
-    {
-      // Beware, this is not reentrant because of backward compatibility issues
-      // Also, UTC only, again because of backward compatibility (%z is C++11)
-      time_t rawtime;
-      std::time(&rawtime);
-      auto const timeStampSize = sizeof("2017-01-16T17:06:45Z");
-
-      std::tm timeInfo;
-#ifdef DOCTEST_PLATFORM_WINDOWS
-      gmtime_s(&timeInfo, &rawtime);
-#else   // DOCTEST_PLATFORM_WINDOWS
-      gmtime_r(&rawtime, &timeInfo);
-#endif  // DOCTEST_PLATFORM_WINDOWS
-
-      char timeStamp[timeStampSize];
-      const char* const fmt = "%Y-%m-%dT%H:%M:%SZ";
-
-      std::strftime(timeStamp, timeStampSize, fmt, &timeInfo);
-      return std::string(timeStamp);
-    }
-
-    struct JUnitTestMessage {
-      JUnitTestMessage(
-          const std::string& _message, const std::string& _type,
-          const std::string& _details)
-          : message(_message), type(_type), details(_details)
-      {
-      }
-
-      JUnitTestMessage(const std::string& _message, const std::string& _details)
-          : message(_message), type(), details(_details)
-      {
-      }
-
-      std::string message, type, details;
-    };
-
-    struct JUnitTestCase {
-      JUnitTestCase(const std::string& _classname, const std::string& _name)
-          : classname(_classname), name(_name), time(0), failures()
-      {
-      }
-
-      std::string classname, name;
-      double time;
-      std::vector<JUnitTestMessage> failures, errors;
-    };
-
-    void add(const std::string& classname, const std::string& name)
-    {
-      testcases.emplace_back(classname, name);
-    }
-
-    void appendSubcaseNamesToLastTestcase(std::vector<String> nameStack)
-    {
-      for (auto& curr : nameStack)
-        if (curr.size())
-          testcases.back().name += std::string("/") + curr.c_str();
-    }
-
-    void addTime(double time)
-    {
-      if (time < 1e-4)
-        time = 0;
-      testcases.back().time = time;
-      totalSeconds += time;
-    }
-
-    void addFailure(
-        const std::string& message, const std::string& type,
-        const std::string& details)
-    {
-      testcases.back().failures.emplace_back(message, type, details);
-      ++totalFailures;
-    }
-
-    void addError(const std::string& message, const std::string& details)
-    {
-      testcases.back().errors.emplace_back(message, details);
-      ++totalErrors;
-    }
-
-    std::vector<JUnitTestCase> testcases;
-    double totalSeconds = 0;
-    int totalErrors = 0, totalFailures = 0;
-  };
-
-  JUnitTestCaseData testCaseData;
-
-  // caching pointers/references to objects of these types - safe to do
-  const ContextOptions& opt;
-  const TestCaseData* tc = nullptr;
-
-  JUnitReporter(const ContextOptions& co) : xml(*co.cout), opt(co) {}
-
-  unsigned line(unsigned l) const { return opt.no_line_numbers ? 0 : l; }
-
-  // =========================================================================================
-  // WHAT FOLLOWS ARE OVERRIDES OF THE VIRTUAL METHODS OF THE REPORTER INTERFACE
-  // =========================================================================================
-
-  void report_query(const QueryData&) override {}
-
-  void test_run_start() override {}
-
-  void test_run_end(const TestRunStats& p) override
-  {
-    // remove .exe extension - mainly to have the same output on UNIX and
-    // Windows
-    std::string binary_name = skipPathFromFilename(opt.binary_name.c_str());
-#ifdef DOCTEST_PLATFORM_WINDOWS
-    if (binary_name.rfind(".exe") != std::string::npos)
-      binary_name = binary_name.substr(0, binary_name.length() - 4);
-#endif  // DOCTEST_PLATFORM_WINDOWS
-    xml.startElement("testsuites");
-    xml.startElement("testsuite")
-        .writeAttribute("name", binary_name)
-        .writeAttribute("errors", testCaseData.totalErrors)
-        .writeAttribute("failures", testCaseData.totalFailures)
-        .writeAttribute("tests", p.numAsserts);
-    if (opt.no_time_in_output == false) {
-      xml.writeAttribute("time", testCaseData.totalSeconds);
-      xml.writeAttribute("timestamp", JUnitTestCaseData::getCurrentTimestamp());
-    }
-    if (opt.no_version == false)
-      xml.writeAttribute("doctest_version", DOCTEST_VERSION_STR);
-
-    for (const auto& testCase : testCaseData.testcases) {
-      xml.startElement("testcase")
-          .writeAttribute("classname", testCase.classname)
-          .writeAttribute("name", testCase.name);
-      if (opt.no_time_in_output == false)
-        xml.writeAttribute("time", testCase.time);
-      // This is not ideal, but it should be enough to mimic gtest's junit
-      // output.
-      xml.writeAttribute("status", "run");
-
-      for (const auto& failure : testCase.failures) {
-        xml.scopedElement("failure")
-            .writeAttribute("message", failure.message)
-            .writeAttribute("type", failure.type)
-            .writeText(failure.details, false);
-      }
-
-      for (const auto& error : testCase.errors) {
-        xml.scopedElement("error")
-            .writeAttribute("message", error.message)
-            .writeText(error.details);
-      }
-
-      xml.endElement();
-    }
-    xml.endElement();
-    xml.endElement();
-  }
-
-  void test_case_start(const TestCaseData& in) override
-  {
-    testCaseData.add(skipPathFromFilename(in.m_file.c_str()), in.m_name);
-    timer.start();
-  }
-
-  void test_case_reenter(const TestCaseData& in) override
-  {
-    testCaseData.addTime(timer.getElapsedSeconds());
-    testCaseData.appendSubcaseNamesToLastTestcase(deepestSubcaseStackNames);
-    deepestSubcaseStackNames.clear();
-
-    timer.start();
-    testCaseData.add(skipPathFromFilename(in.m_file.c_str()), in.m_name);
-  }
-
-  void test_case_end(const CurrentTestCaseStats&) override
-  {
-    testCaseData.addTime(timer.getElapsedSeconds());
-    testCaseData.appendSubcaseNamesToLastTestcase(deepestSubcaseStackNames);
-    deepestSubcaseStackNames.clear();
-  }
-
-  void test_case_exception(const TestCaseException& e) override
-  {
-    std::lock_guard<std::mutex> lock(mutex);
-    testCaseData.addError("exception", e.error_string.c_str());
-  }
-
-  void subcase_start(const SubcaseSignature& in) override
-  {
-    deepestSubcaseStackNames.push_back(in.m_name);
-  }
-
-  void subcase_end() override {}
-
-  void log_assert(const AssertData& rb) override
-  {
-    if (!rb.m_failed)  // report only failures & ignore the `success` option
-      return;
-
-    std::lock_guard<std::mutex> lock(mutex);
-
-    std::ostringstream os;
-    os << skipPathFromFilename(rb.m_file) << (opt.gnu_file_line ? ":" : "(")
-       << line(rb.m_line) << (opt.gnu_file_line ? ":" : "):") << std::endl;
-
-    fulltext_log_assert_to_stream(os, rb);
-    log_contexts(os);
-    testCaseData.addFailure(
-        rb.m_decomp.c_str(), assertString(rb.m_at), os.str());
-  }
-
-  void log_message(const MessageData&) override {}
-
-  void test_case_skipped(const TestCaseData&) override {}
-
-  void log_contexts(std::ostringstream& s)
-  {
-    int num_contexts = get_num_active_contexts();
-    if (num_contexts) {
-      auto contexts = get_active_contexts();
-
-      s << "  logged: ";
-      for (int i = 0; i < num_contexts; ++i) {
-        s << (i == 0 ? "" : "          ");
-        contexts[i]->stringify(&s);
-        s << std::endl;
-      }
-    }
-  }
-};
-
-DOCTEST_REGISTER_REPORTER("junit", 0, JUnitReporter);
-
-struct Whitespace {
-  int nrSpaces;
-  explicit Whitespace(int nr) : nrSpaces(nr) {}
-};
-
-std::ostream&
-operator<<(std::ostream& out, const Whitespace& ws)
-{
-  if (ws.nrSpaces != 0)
-    out << std::setw(ws.nrSpaces) << ' ';
-  return out;
-}
-
-struct ConsoleReporter : public IReporter {
-  std::ostream& s;
-  bool hasLoggedCurrentTestStart;
-  std::vector<SubcaseSignature> subcasesStack;
-  size_t currentSubcaseLevel;
-  std::mutex mutex;
-
-  // caching pointers/references to objects of these types - safe to do
-  const ContextOptions& opt;
-  const TestCaseData* tc;
-
-  ConsoleReporter(const ContextOptions& co) : s(*co.cout), opt(co) {}
-
-  ConsoleReporter(const ContextOptions& co, std::ostream& ostr)
-      : s(ostr), opt(co)
-  {
-  }
-
-  // =========================================================================================
-  // WHAT FOLLOWS ARE HELPERS USED BY THE OVERRIDES OF THE VIRTUAL METHODS OF
-  // THE INTERFACE
-  // =========================================================================================
-
-  void separator_to_stream()
-  {
-    s << Color::Yellow
-      << "====================================================================="
-         "=========="
-         "\n";
-  }
-
-  const char* getSuccessOrFailString(
-      bool success, assertType::Enum at, const char* success_str)
-  {
-    if (success)
-      return success_str;
-    return failureString(at);
-  }
-
-  Color::Enum getSuccessOrFailColor(bool success, assertType::Enum at)
-  {
-    return success                      ? Color::BrightGreen
-           : (at & assertType::is_warn) ? Color::Yellow
-                                        : Color::Red;
-  }
-
-  void successOrFailColoredStringToStream(
-      bool success, assertType::Enum at, const char* success_str = "SUCCESS")
-  {
-    s << getSuccessOrFailColor(success, at)
-      << getSuccessOrFailString(success, at, success_str) << ": ";
-  }
-
-  void log_contexts()
-  {
-    int num_contexts = get_num_active_contexts();
-    if (num_contexts) {
-      auto contexts = get_active_contexts();
-
-      s << Color::None << "  logged: ";
-      for (int i = 0; i < num_contexts; ++i) {
-        s << (i == 0 ? "" : "          ");
-        contexts[i]->stringify(&s);
-        s << "\n";
-      }
-    }
-
-    s << "\n";
-  }
-
-  // this was requested to be made virtual so users could override it
-  virtual void file_line_to_stream(
-      const char* file, int line, const char* tail = "")
-  {
-    s << Color::LightGrey << skipPathFromFilename(file)
-      << (opt.gnu_file_line ? ":" : "(")
-      << (opt.no_line_numbers
-              ? 0
-              : line)  // 0 or the real num depending on the option
-      << (opt.gnu_file_line ? ":" : "):") << tail;
-  }
-
-  void logTestStart()
-  {
-    if (hasLoggedCurrentTestStart)
-      return;
-
-    separator_to_stream();
-    file_line_to_stream(tc->m_file.c_str(), tc->m_line, "\n");
-    if (tc->m_description)
-      s << Color::Yellow << "DESCRIPTION: " << Color::None << tc->m_description
-        << "\n";
-    if (tc->m_test_suite && tc->m_test_suite[0] != '\0')
-      s << Color::Yellow << "TEST SUITE: " << Color::None << tc->m_test_suite
-        << "\n";
-    if (strncmp(tc->m_name, "  Scenario:", 11) != 0)
-      s << Color::Yellow << "TEST CASE:  ";
-    s << Color::None << tc->m_name << "\n";
-
-    for (size_t i = 0; i < currentSubcaseLevel; ++i) {
-      if (subcasesStack[i].m_name[0] != '\0')
-        s << "  " << subcasesStack[i].m_name << "\n";
-    }
-
-    if (currentSubcaseLevel != subcasesStack.size()) {
-      s << Color::Yellow
-        << "\nDEEPEST SUBCASE STACK REACHED (DIFFERENT FROM THE CURRENT ONE):\n"
-        << Color::None;
-      for (size_t i = 0; i < subcasesStack.size(); ++i) {
-        if (subcasesStack[i].m_name[0] != '\0')
-          s << "  " << subcasesStack[i].m_name << "\n";
-      }
-    }
-
-    s << "\n";
-
-    hasLoggedCurrentTestStart = true;
-  }
-
-  void printVersion()
-  {
-    if (opt.no_version == false)
-      s << Color::Cyan << "[doctest] " << Color::None << "doctest version is \""
-        << DOCTEST_VERSION_STR << "\"\n";
-  }
-
-  void printIntro()
-  {
-    if (opt.no_intro == false) {
-      printVersion();
-      s << Color::Cyan << "[doctest] " << Color::None
-        << "run with \"--" DOCTEST_OPTIONS_PREFIX_DISPLAY
-           "help\" for options\n";
-    }
-  }
-
-  void printHelp()
-  {
-    int sizePrefixDisplay =
-        static_cast<int>(strlen(DOCTEST_OPTIONS_PREFIX_DISPLAY));
-    printVersion();
-    // clang-format off
-            s << Color::Cyan << "[doctest]\n" << Color::None;
-            s << Color::Cyan << "[doctest] " << Color::None;
-            s << "boolean values: \"1/on/yes/true\" or \"0/off/no/false\"\n";
-            s << Color::Cyan << "[doctest] " << Color::None;
-            s << "filter  values: \"str1,str2,str3\" (comma separated strings)\n";
-            s << Color::Cyan << "[doctest]\n" << Color::None;
-            s << Color::Cyan << "[doctest] " << Color::None;
-            s << "filters use wildcards for matching strings\n";
-            s << Color::Cyan << "[doctest] " << Color::None;
-            s << "something passes a filter if any of the strings in a filter matches\n";
-#ifndef DOCTEST_CONFIG_NO_UNPREFIXED_OPTIONS
-            s << Color::Cyan << "[doctest]\n" << Color::None;
-            s << Color::Cyan << "[doctest] " << Color::None;
-            s << "ALL FLAGS, OPTIONS AND FILTERS ALSO AVAILABLE WITH A \"" DOCTEST_CONFIG_OPTIONS_PREFIX "\" PREFIX!!!\n";
-#endif
-            s << Color::Cyan << "[doctest]\n" << Color::None;
-            s << Color::Cyan << "[doctest] " << Color::None;
-            s << "Query flags - the program quits after them. Available:\n\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "?,   --" DOCTEST_OPTIONS_PREFIX_DISPLAY "help, -" DOCTEST_OPTIONS_PREFIX_DISPLAY "h                      "
-              << Whitespace(sizePrefixDisplay*0) <<  "prints this message\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "v,   --" DOCTEST_OPTIONS_PREFIX_DISPLAY "version                       "
-              << Whitespace(sizePrefixDisplay*1) << "prints the version\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "c,   --" DOCTEST_OPTIONS_PREFIX_DISPLAY "count                         "
-              << Whitespace(sizePrefixDisplay*1) << "prints the number of matching tests\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "ltc, --" DOCTEST_OPTIONS_PREFIX_DISPLAY "list-test-cases               "
-              << Whitespace(sizePrefixDisplay*1) << "lists all matching tests by name\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "lts, --" DOCTEST_OPTIONS_PREFIX_DISPLAY "list-test-suites              "
-              << Whitespace(sizePrefixDisplay*1) << "lists all matching test suites\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "lr,  --" DOCTEST_OPTIONS_PREFIX_DISPLAY "list-reporters                "
-              << Whitespace(sizePrefixDisplay*1) << "lists all registered reporters\n\n";
-            // ================================================================================== << 79
-            s << Color::Cyan << "[doctest] " << Color::None;
-            s << "The available <int>/<string> options/filters are:\n\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "tc,  --" DOCTEST_OPTIONS_PREFIX_DISPLAY "test-case=<filters>           "
-              << Whitespace(sizePrefixDisplay*1) << "filters     tests by their name\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "tce, --" DOCTEST_OPTIONS_PREFIX_DISPLAY "test-case-exclude=<filters>   "
-              << Whitespace(sizePrefixDisplay*1) << "filters OUT tests by their name\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "sf,  --" DOCTEST_OPTIONS_PREFIX_DISPLAY "source-file=<filters>         "
-              << Whitespace(sizePrefixDisplay*1) << "filters     tests by their file\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "sfe, --" DOCTEST_OPTIONS_PREFIX_DISPLAY "source-file-exclude=<filters> "
-              << Whitespace(sizePrefixDisplay*1) << "filters OUT tests by their file\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "ts,  --" DOCTEST_OPTIONS_PREFIX_DISPLAY "test-suite=<filters>          "
-              << Whitespace(sizePrefixDisplay*1) << "filters     tests by their test suite\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "tse, --" DOCTEST_OPTIONS_PREFIX_DISPLAY "test-suite-exclude=<filters>  "
-              << Whitespace(sizePrefixDisplay*1) << "filters OUT tests by their test suite\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "sc,  --" DOCTEST_OPTIONS_PREFIX_DISPLAY "subcase=<filters>             "
-              << Whitespace(sizePrefixDisplay*1) << "filters     subcases by their name\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "sce, --" DOCTEST_OPTIONS_PREFIX_DISPLAY "subcase-exclude=<filters>     "
-              << Whitespace(sizePrefixDisplay*1) << "filters OUT subcases by their name\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "r,   --" DOCTEST_OPTIONS_PREFIX_DISPLAY "reporters=<filters>           "
-              << Whitespace(sizePrefixDisplay*1) << "reporters to use (console is default)\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "o,   --" DOCTEST_OPTIONS_PREFIX_DISPLAY "out=<string>                  "
-              << Whitespace(sizePrefixDisplay*1) << "output filename\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "ob,  --" DOCTEST_OPTIONS_PREFIX_DISPLAY "order-by=<string>             "
-              << Whitespace(sizePrefixDisplay*1) << "how the tests should be ordered\n";
-            s << Whitespace(sizePrefixDisplay*3) << "                                       <string> - [file/suite/name/rand/none]\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "rs,  --" DOCTEST_OPTIONS_PREFIX_DISPLAY "rand-seed=<int>               "
-              << Whitespace(sizePrefixDisplay*1) << "seed for random ordering\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "f,   --" DOCTEST_OPTIONS_PREFIX_DISPLAY "first=<int>                   "
-              << Whitespace(sizePrefixDisplay*1) << "the first test passing the filters to\n";
-            s << Whitespace(sizePrefixDisplay*3) << "                                       execute - for range-based execution\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "l,   --" DOCTEST_OPTIONS_PREFIX_DISPLAY "last=<int>                    "
-              << Whitespace(sizePrefixDisplay*1) << "the last test passing the filters to\n";
-            s << Whitespace(sizePrefixDisplay*3) << "                                       execute - for range-based execution\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "aa,  --" DOCTEST_OPTIONS_PREFIX_DISPLAY "abort-after=<int>             "
-              << Whitespace(sizePrefixDisplay*1) << "stop after <int> failed assertions\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "scfl,--" DOCTEST_OPTIONS_PREFIX_DISPLAY "subcase-filter-levels=<int>   "
-              << Whitespace(sizePrefixDisplay*1) << "apply filters for the first <int> levels\n";
-            s << Color::Cyan << "\n[doctest] " << Color::None;
-            s << "Bool options - can be used like flags and true is assumed. Available:\n\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "s,   --" DOCTEST_OPTIONS_PREFIX_DISPLAY "success=<bool>                "
-              << Whitespace(sizePrefixDisplay*1) << "include successful assertions in output\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "cs,  --" DOCTEST_OPTIONS_PREFIX_DISPLAY "case-sensitive=<bool>         "
-              << Whitespace(sizePrefixDisplay*1) << "filters being treated as case sensitive\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "e,   --" DOCTEST_OPTIONS_PREFIX_DISPLAY "exit=<bool>                   "
-              << Whitespace(sizePrefixDisplay*1) << "exits after the tests finish\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "d,   --" DOCTEST_OPTIONS_PREFIX_DISPLAY "duration=<bool>               "
-              << Whitespace(sizePrefixDisplay*1) << "prints the time duration of each test\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "m,   --" DOCTEST_OPTIONS_PREFIX_DISPLAY "minimal=<bool>                "
-              << Whitespace(sizePrefixDisplay*1) << "minimal console output (only failures)\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "q,   --" DOCTEST_OPTIONS_PREFIX_DISPLAY "quiet=<bool>                  "
-              << Whitespace(sizePrefixDisplay*1) << "no console output\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "nt,  --" DOCTEST_OPTIONS_PREFIX_DISPLAY "no-throw=<bool>               "
-              << Whitespace(sizePrefixDisplay*1) << "skips exceptions-related assert checks\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "ne,  --" DOCTEST_OPTIONS_PREFIX_DISPLAY "no-exitcode=<bool>            "
-              << Whitespace(sizePrefixDisplay*1) << "returns (or exits) always with success\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "nr,  --" DOCTEST_OPTIONS_PREFIX_DISPLAY "no-run=<bool>                 "
-              << Whitespace(sizePrefixDisplay*1) << "skips all runtime doctest operations\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "ni,  --" DOCTEST_OPTIONS_PREFIX_DISPLAY "no-intro=<bool>               "
-              << Whitespace(sizePrefixDisplay*1) << "omit the framework intro in the output\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "nv,  --" DOCTEST_OPTIONS_PREFIX_DISPLAY "no-version=<bool>             "
-              << Whitespace(sizePrefixDisplay*1) << "omit the framework version in the output\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "nc,  --" DOCTEST_OPTIONS_PREFIX_DISPLAY "no-colors=<bool>              "
-              << Whitespace(sizePrefixDisplay*1) << "disables colors in output\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "fc,  --" DOCTEST_OPTIONS_PREFIX_DISPLAY "force-colors=<bool>           "
-              << Whitespace(sizePrefixDisplay*1) << "use colors even when not in a tty\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "nb,  --" DOCTEST_OPTIONS_PREFIX_DISPLAY "no-breaks=<bool>              "
-              << Whitespace(sizePrefixDisplay*1) << "disables breakpoints in debuggers\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "ns,  --" DOCTEST_OPTIONS_PREFIX_DISPLAY "no-skip=<bool>                "
-              << Whitespace(sizePrefixDisplay*1) << "don't skip test cases marked as skip\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "gfl, --" DOCTEST_OPTIONS_PREFIX_DISPLAY "gnu-file-line=<bool>          "
-              << Whitespace(sizePrefixDisplay*1) << ":n: vs (n): for line numbers in output\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "npf, --" DOCTEST_OPTIONS_PREFIX_DISPLAY "no-path-filenames=<bool>      "
-              << Whitespace(sizePrefixDisplay*1) << "only filenames and no paths in output\n";
-            s << " -" DOCTEST_OPTIONS_PREFIX_DISPLAY "nln, --" DOCTEST_OPTIONS_PREFIX_DISPLAY "no-line-numbers=<bool>        "
-              << Whitespace(sizePrefixDisplay*1) << "0 instead of real line numbers in output\n";
-            // ================================================================================== << 79
-    // clang-format on
-
-    s << Color::Cyan << "\n[doctest] " << Color::None;
-    s << "for more information visit the project documentation\n\n";
-  }
-
-  void printRegisteredReporters()
-  {
-    printVersion();
-    auto printReporters = [this](
-                              const reporterMap& reporters, const char* type) {
-      if (reporters.size()) {
-        s << Color::Cyan << "[doctest] " << Color::None
-          << "listing all registered " << type << "\n";
-        for (auto& curr : reporters)
-          s << "priority: " << std::setw(5) << curr.first.first
-            << " name: " << curr.first.second << "\n";
-      }
-    };
-    printReporters(getListeners(), "listeners");
-    printReporters(getReporters(), "reporters");
-  }
-
-  // =========================================================================================
-  // WHAT FOLLOWS ARE OVERRIDES OF THE VIRTUAL METHODS OF THE REPORTER INTERFACE
-  // =========================================================================================
-
-  void report_query(const QueryData& in) override
-  {
-    if (opt.version) {
-      printVersion();
-    } else if (opt.help) {
-      printHelp();
-    } else if (opt.list_reporters) {
-      printRegisteredReporters();
-    } else if (opt.count || opt.list_test_cases) {
-      if (opt.list_test_cases) {
-        s << Color::Cyan << "[doctest] " << Color::None
-          << "listing all test case names\n";
-        separator_to_stream();
-      }
-
-      for (unsigned i = 0; i < in.num_data; ++i)
-        s << Color::None << in.data[i]->m_name << "\n";
-
-      separator_to_stream();
-
-      s << Color::Cyan << "[doctest] " << Color::None
-        << "unskipped test cases passing the current filters: "
-        << g_cs->numTestCasesPassingFilters << "\n";
-
-    } else if (opt.list_test_suites) {
-      s << Color::Cyan << "[doctest] " << Color::None
-        << "listing all test suites\n";
-      separator_to_stream();
-
-      for (unsigned i = 0; i < in.num_data; ++i)
-        s << Color::None << in.data[i]->m_test_suite << "\n";
-
-      separator_to_stream();
-
-      s << Color::Cyan << "[doctest] " << Color::None
-        << "unskipped test cases passing the current filters: "
-        << g_cs->numTestCasesPassingFilters << "\n";
-      s << Color::Cyan << "[doctest] " << Color::None
-        << "test suites with unskipped test cases passing the current filters: "
-        << g_cs->numTestSuitesPassingFilters << "\n";
-    }
-  }
-
-  void test_run_start() override
-  {
-    if (!opt.minimal)
-      printIntro();
-  }
-
-  void test_run_end(const TestRunStats& p) override
-  {
-    if (opt.minimal && p.numTestCasesFailed == 0)
-      return;
-
-    separator_to_stream();
-    s << std::dec;
-
-    auto totwidth = int(std::ceil(log10(
-        (std::max(
-            p.numTestCasesPassingFilters,
-            static_cast<unsigned>(p.numAsserts))) +
-        1)));
-    auto passwidth = int(std::ceil(log10(
-        (std::max(
-            p.numTestCasesPassingFilters - p.numTestCasesFailed,
-            static_cast<unsigned>(p.numAsserts - p.numAssertsFailed))) +
-        1)));
-    auto failwidth = int(std::ceil(log10(
-        (std::max(
-            p.numTestCasesFailed, static_cast<unsigned>(p.numAssertsFailed))) +
-        1)));
-    const bool anythingFailed =
-        p.numTestCasesFailed > 0 || p.numAssertsFailed > 0;
-    s << Color::Cyan << "[doctest] " << Color::None
-      << "test cases: " << std::setw(totwidth) << p.numTestCasesPassingFilters
-      << " | "
-      << ((p.numTestCasesPassingFilters == 0 || anythingFailed) ? Color::None
-                                                                : Color::Green)
-      << std::setw(passwidth)
-      << p.numTestCasesPassingFilters - p.numTestCasesFailed << " passed"
-      << Color::None << " | "
-      << (p.numTestCasesFailed > 0 ? Color::Red : Color::None)
-      << std::setw(failwidth) << p.numTestCasesFailed << " failed"
-      << Color::None << " |";
-    if (opt.no_skipped_summary == false) {
-      const int numSkipped = p.numTestCases - p.numTestCasesPassingFilters;
-      s << " " << (numSkipped == 0 ? Color::None : Color::Yellow) << numSkipped
-        << " skipped" << Color::None;
-    }
-    s << "\n";
-    s << Color::Cyan << "[doctest] " << Color::None
-      << "assertions: " << std::setw(totwidth) << p.numAsserts << " | "
-      << ((p.numAsserts == 0 || anythingFailed) ? Color::None : Color::Green)
-      << std::setw(passwidth) << (p.numAsserts - p.numAssertsFailed)
-      << " passed" << Color::None << " | "
-      << (p.numAssertsFailed > 0 ? Color::Red : Color::None)
-      << std::setw(failwidth) << p.numAssertsFailed << " failed" << Color::None
-      << " |\n";
-    s << Color::Cyan << "[doctest] " << Color::None
-      << "Status: " << (p.numTestCasesFailed > 0 ? Color::Red : Color::Green)
-      << ((p.numTestCasesFailed > 0) ? "FAILURE!" : "SUCCESS!") << Color::None
-      << std::endl;
-  }
-
-  void test_case_start(const TestCaseData& in) override
-  {
-    hasLoggedCurrentTestStart = false;
-    tc = &in;
-    subcasesStack.clear();
-    currentSubcaseLevel = 0;
-  }
-
-  void test_case_reenter(const TestCaseData&) override
-  {
-    subcasesStack.clear();
-  }
-
-  void test_case_end(const CurrentTestCaseStats& st) override
-  {
-    if (tc->m_no_output)
-      return;
-
-    // log the preamble of the test case only if there is something
-    // else to print - something other than that an assert has failed
-    if (opt.duration ||
-        (st.failure_flags &&
-         st.failure_flags != TestCaseFailureReason::AssertFailure))
-      logTestStart();
-
-    if (opt.duration)
-      s << Color::None << std::setprecision(6) << std::fixed << st.seconds
-        << " s: " << tc->m_name << "\n";
-
-    if (st.failure_flags & TestCaseFailureReason::Timeout)
-      s << Color::Red << "Test case exceeded time limit of "
-        << std::setprecision(6) << std::fixed << tc->m_timeout << "!\n";
-
-    if (st.failure_flags & TestCaseFailureReason::ShouldHaveFailedButDidnt) {
-      s << Color::Red
-        << "Should have failed but didn't! Marking it as failed!\n";
-    } else if (
-        st.failure_flags & TestCaseFailureReason::ShouldHaveFailedAndDid) {
-      s << Color::Yellow << "Failed as expected so marking it as not failed\n";
-    } else if (
-        st.failure_flags & TestCaseFailureReason::CouldHaveFailedAndDid) {
-      s << Color::Yellow << "Allowed to fail so marking it as not failed\n";
-    } else if (
-        st.failure_flags & TestCaseFailureReason::DidntFailExactlyNumTimes) {
-      s << Color::Red << "Didn't fail exactly " << tc->m_expected_failures
-        << " times so marking it as failed!\n";
-    } else if (
-        st.failure_flags & TestCaseFailureReason::FailedExactlyNumTimes) {
-      s << Color::Yellow << "Failed exactly " << tc->m_expected_failures
-        << " times as expected so marking it as not failed!\n";
-    }
-    if (st.failure_flags & TestCaseFailureReason::TooManyFailedAsserts) {
-      s << Color::Red << "Aborting - too many failed asserts!\n";
-    }
-    s << Color::None;  // lgtm [cpp/useless-expression]
-  }
-
-  void test_case_exception(const TestCaseException& e) override
-  {
-    std::lock_guard<std::mutex> lock(mutex);
-    if (tc->m_no_output)
-      return;
-
-    logTestStart();
-
-    file_line_to_stream(tc->m_file.c_str(), tc->m_line, " ");
-    successOrFailColoredStringToStream(
-        false, e.is_crash ? assertType::is_require : assertType::is_check);
-    s << Color::Red
-      << (e.is_crash ? "test case CRASHED: " : "test case THREW exception: ")
-      << Color::Cyan << e.error_string << "\n";
-
-    int num_stringified_contexts = get_num_stringified_contexts();
-    if (num_stringified_contexts) {
-      auto stringified_contexts = get_stringified_contexts();
-      s << Color::None << "  logged: ";
-      for (int i = num_stringified_contexts; i > 0; --i) {
-        s << (i == num_stringified_contexts ? "" : "          ")
-          << stringified_contexts[i - 1] << "\n";
-      }
-    }
-    s << "\n" << Color::None;
-  }
-
-  void subcase_start(const SubcaseSignature& subc) override
-  {
-    subcasesStack.push_back(subc);
-    ++currentSubcaseLevel;
-    hasLoggedCurrentTestStart = false;
-  }
-
-  void subcase_end() override
-  {
-    --currentSubcaseLevel;
-    hasLoggedCurrentTestStart = false;
-  }
-
-  void log_assert(const AssertData& rb) override
-  {
-    if ((!rb.m_failed && !opt.success) || tc->m_no_output)
-      return;
-
-    std::lock_guard<std::mutex> lock(mutex);
-
-    logTestStart();
-
-    file_line_to_stream(rb.m_file, rb.m_line, " ");
-    successOrFailColoredStringToStream(!rb.m_failed, rb.m_at);
-
-    fulltext_log_assert_to_stream(s, rb);
-
-    log_contexts();
-  }
-
-  void log_message(const MessageData& mb) override
-  {
-    if (tc->m_no_output)
-      return;
-
-    std::lock_guard<std::mutex> lock(mutex);
-
-    logTestStart();
-
-    file_line_to_stream(mb.m_file, mb.m_line, " ");
-    s << getSuccessOrFailColor(false, mb.m_severity)
-      << getSuccessOrFailString(
-             mb.m_severity & assertType::is_warn, mb.m_severity, "MESSAGE")
-      << ": ";
-    s << Color::None << mb.m_string << "\n";
-    log_contexts();
-  }
-
-  void test_case_skipped(const TestCaseData&) override {}
-};
-
-DOCTEST_REGISTER_REPORTER("console", 0, ConsoleReporter);
-
-#ifdef DOCTEST_PLATFORM_WINDOWS
-struct DebugOutputWindowReporter : public ConsoleReporter {
-  DOCTEST_THREAD_LOCAL static std::ostringstream oss;
-
-  DebugOutputWindowReporter(const ContextOptions& co) : ConsoleReporter(co, oss)
-  {
-  }
-
-#define DOCTEST_DEBUG_OUTPUT_REPORTER_OVERRIDE(func, type, arg) \
-  void func(type arg) override                                  \
-  {                                                             \
-    bool with_col = g_no_colors;                                \
-    g_no_colors = false;                                        \
-    ConsoleReporter::func(arg);                                 \
-    if (oss.tellp() != std::streampos{}) {                      \
-      DOCTEST_OUTPUT_DEBUG_STRING(oss.str().c_str());           \
-      oss.str("");                                              \
-    }                                                           \
-    g_no_colors = with_col;                                     \
-  }
-
-  DOCTEST_DEBUG_OUTPUT_REPORTER_OVERRIDE(
-      test_run_start, DOCTEST_EMPTY, DOCTEST_EMPTY)
-  DOCTEST_DEBUG_OUTPUT_REPORTER_OVERRIDE(test_run_end, const TestRunStats&, in)
-  DOCTEST_DEBUG_OUTPUT_REPORTER_OVERRIDE(
-      test_case_start, const TestCaseData&, in)
-  DOCTEST_DEBUG_OUTPUT_REPORTER_OVERRIDE(
-      test_case_reenter, const TestCaseData&, in)
-  DOCTEST_DEBUG_OUTPUT_REPORTER_OVERRIDE(
-      test_case_end, const CurrentTestCaseStats&, in)
-  DOCTEST_DEBUG_OUTPUT_REPORTER_OVERRIDE(
-      test_case_exception, const TestCaseException&, in)
-  DOCTEST_DEBUG_OUTPUT_REPORTER_OVERRIDE(
-      subcase_start, const SubcaseSignature&, in)
-  DOCTEST_DEBUG_OUTPUT_REPORTER_OVERRIDE(
-      subcase_end, DOCTEST_EMPTY, DOCTEST_EMPTY)
-  DOCTEST_DEBUG_OUTPUT_REPORTER_OVERRIDE(log_assert, const AssertData&, in)
-  DOCTEST_DEBUG_OUTPUT_REPORTER_OVERRIDE(log_message, const MessageData&, in)
-  DOCTEST_DEBUG_OUTPUT_REPORTER_OVERRIDE(
-      test_case_skipped, const TestCaseData&, in)
-};
-
-DOCTEST_THREAD_LOCAL std::ostringstream DebugOutputWindowReporter::oss;
-#endif  // DOCTEST_PLATFORM_WINDOWS
-
-// the implementation of parseOption()
-bool
-parseOptionImpl(
-    int argc, const char* const* argv, const char* pattern, String* value)
-{
-  // going from the end to the beginning and stopping on the first occurrence
-  // from the end
-  for (int i = argc; i > 0; --i) {
-    auto index = i - 1;
-    auto temp = std::strstr(argv[index], pattern);
-    if (temp &&
-        (value ||
-         strlen(temp) ==
-             strlen(pattern))) {  //! OCLINT prefer early exits and continue
-      // eliminate matches in which the chars before the option are not '-'
-      bool noBadCharsFound = true;
-      auto curr = argv[index];
-      while (curr != temp) {
-        if (*curr++ != '-') {
-          noBadCharsFound = false;
-          break;
-        }
-      }
-      if (noBadCharsFound && argv[index][0] == '-') {
-        if (value) {
-          // parsing the value of an option
-          temp += strlen(pattern);
-          const unsigned len = strlen(temp);
-          if (len) {
-            *value = temp;
-            return true;
-          }
-        } else {
-          // just a flag - no value
-          return true;
-        }
-      }
-    }
-  }
-  return false;
-}
-
-// parses an option and returns the string after the '=' character
-bool
-parseOption(
-    int argc, const char* const* argv, const char* pattern,
-    String* value = nullptr, const String& defaultVal = String())
-{
-  if (value)
-    *value = defaultVal;
-#ifndef DOCTEST_CONFIG_NO_UNPREFIXED_OPTIONS
-  // offset (normally 3 for "dt-") to skip prefix
-  if (parseOptionImpl(
-          argc, argv, pattern + strlen(DOCTEST_CONFIG_OPTIONS_PREFIX), value))
-    return true;
-#endif  // DOCTEST_CONFIG_NO_UNPREFIXED_OPTIONS
-  return parseOptionImpl(argc, argv, pattern, value);
-}
-
-// locates a flag on the command line
-bool
-parseFlag(int argc, const char* const* argv, const char* pattern)
-{
-  return parseOption(argc, argv, pattern);
-}
-
-// parses a comma separated list of words after a pattern in one of the
-// arguments in argv
-bool
-parseCommaSepArgs(
-    int argc, const char* const* argv, const char* pattern,
-    std::vector<String>& res)
-{
-  String filtersString;
-  if (parseOption(argc, argv, pattern, &filtersString)) {
-    // tokenize with "," as a separator, unless escaped with backslash
-    std::ostringstream s;
-    auto flush = [&s, &res]() {
-      auto string = s.str();
-      if (string.size() > 0) {
-        res.push_back(string.c_str());
-      }
-      s.str("");
-    };
-
-    bool seenBackslash = false;
-    const char* current = filtersString.c_str();
-    const char* end = current + strlen(current);
-    while (current != end) {
-      char character = *current++;
-      if (seenBackslash) {
-        seenBackslash = false;
-        if (character == ',') {
-          s.put(',');
-          continue;
-        }
-        s.put('\\');
-      }
-      if (character == '\\') {
-        seenBackslash = true;
-      } else if (character == ',') {
-        flush();
-      } else {
-        s.put(character);
-      }
-    }
-
-    if (seenBackslash) {
-      s.put('\\');
-    }
-    flush();
-    return true;
-  }
-  return false;
-}
-
-enum optionType { option_bool, option_int };
-
-// parses an int/bool option from the command line
-bool
-parseIntOption(
-    int argc, const char* const* argv, const char* pattern, optionType type,
-    int& res)
-{
-  String parsedValue;
-  if (!parseOption(argc, argv, pattern, &parsedValue))
-    return false;
-
-  if (type == 0) {
-    // boolean
-    const char positive[][5] = {
-        "1", "true", "on", "yes"};  // 5 - strlen("true") + 1
-    const char negative[][6] = {
-        "0", "false", "off", "no"};  // 6 - strlen("false") + 1
-
-    // if the value matches any of the positive/negative possibilities
-    for (unsigned i = 0; i < 4; i++) {
-      if (parsedValue.compare(positive[i], true) == 0) {
-        res = 1;  //! OCLINT parameter reassignment
-        return true;
-      }
-      if (parsedValue.compare(negative[i], true) == 0) {
-        res = 0;  //! OCLINT parameter reassignment
-        return true;
-      }
-    }
-  } else {
-    // integer
-    // TODO: change this to use std::stoi or something else! currently it uses
-    // undefined behavior - assumes '0' on failed parse...
-    int theInt = std::atoi(parsedValue.c_str());  // NOLINT
-    if (theInt != 0) {
-      res = theInt;  //! OCLINT parameter reassignment
-      return true;
-    }
-  }
-  return false;
-}
-}  // namespace
-
-Context::Context(int argc, const char* const* argv)
-    : p(new detail::ContextState)
-{
-  parseArgs(argc, argv, true);
-  if (argc)
-    p->binary_name = argv[0];
-}
-
-Context::~Context()
-{
-  if (g_cs == p)
-    g_cs = nullptr;
-  delete p;
-}
-
-void
-Context::applyCommandLine(int argc, const char* const* argv)
-{
-  parseArgs(argc, argv);
-  if (argc)
-    p->binary_name = argv[0];
-}
-
-// parses args
-void
-Context::parseArgs(int argc, const char* const* argv, bool withDefaults)
-{
-  using namespace detail;
-
-  // clang-format off
-    parseCommaSepArgs(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX "source-file=",        p->filters[0]);
-    parseCommaSepArgs(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX "sf=",                 p->filters[0]);
-    parseCommaSepArgs(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX "source-file-exclude=",p->filters[1]);
-    parseCommaSepArgs(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX "sfe=",                p->filters[1]);
-    parseCommaSepArgs(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX "test-suite=",         p->filters[2]);
-    parseCommaSepArgs(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX "ts=",                 p->filters[2]);
-    parseCommaSepArgs(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX "test-suite-exclude=", p->filters[3]);
-    parseCommaSepArgs(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX "tse=",                p->filters[3]);
-    parseCommaSepArgs(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX "test-case=",          p->filters[4]);
-    parseCommaSepArgs(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX "tc=",                 p->filters[4]);
-    parseCommaSepArgs(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX "test-case-exclude=",  p->filters[5]);
-    parseCommaSepArgs(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX "tce=",                p->filters[5]);
-    parseCommaSepArgs(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX "subcase=",            p->filters[6]);
-    parseCommaSepArgs(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX "sc=",                 p->filters[6]);
-    parseCommaSepArgs(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX "subcase-exclude=",    p->filters[7]);
-    parseCommaSepArgs(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX "sce=",                p->filters[7]);
-    parseCommaSepArgs(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX "reporters=",          p->filters[8]);
-    parseCommaSepArgs(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX "r=",                  p->filters[8]);
-  // clang-format on
-
-  int intRes = 0;
-  String strRes;
-
-#define DOCTEST_PARSE_AS_BOOL_OR_FLAG(name, sname, var, default)            \
-  if (parseIntOption(                                                       \
-          argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX name "=", option_bool,  \
-          intRes) ||                                                        \
-      parseIntOption(                                                       \
-          argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX sname "=", option_bool, \
-          intRes))                                                          \
-    p->var = static_cast<bool>(intRes);                                     \
-  else if (                                                                 \
-      parseFlag(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX name) ||          \
-      parseFlag(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX sname))           \
-    p->var = true;                                                          \
-  else if (withDefaults)                                                    \
-  p->var = default
-
-#define DOCTEST_PARSE_INT_OPTION(name, sname, var, default)                \
-  if (parseIntOption(                                                      \
-          argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX name "=", option_int,  \
-          intRes) ||                                                       \
-      parseIntOption(                                                      \
-          argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX sname "=", option_int, \
-          intRes))                                                         \
-    p->var = intRes;                                                       \
-  else if (withDefaults)                                                   \
-  p->var = default
-
-#define DOCTEST_PARSE_STR_OPTION(name, sname, var, default)             \
-  if (parseOption(                                                      \
-          argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX name "=", &strRes,  \
-          default) ||                                                   \
-      parseOption(                                                      \
-          argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX sname "=", &strRes, \
-          default) ||                                                   \
-      withDefaults)                                                     \
-  p->var = strRes
-
-  // clang-format off
-    DOCTEST_PARSE_STR_OPTION("out", "o", out, "");
-    DOCTEST_PARSE_STR_OPTION("order-by", "ob", order_by, "file");
-    DOCTEST_PARSE_INT_OPTION("rand-seed", "rs", rand_seed, 0);
-
-    DOCTEST_PARSE_INT_OPTION("first", "f", first, 0);
-    DOCTEST_PARSE_INT_OPTION("last", "l", last, UINT_MAX);
-
-    DOCTEST_PARSE_INT_OPTION("abort-after", "aa", abort_after, 0);
-    DOCTEST_PARSE_INT_OPTION("subcase-filter-levels", "scfl", subcase_filter_levels, INT_MAX);
-
-    DOCTEST_PARSE_AS_BOOL_OR_FLAG("success", "s", success, false);
-    DOCTEST_PARSE_AS_BOOL_OR_FLAG("case-sensitive", "cs", case_sensitive, false);
-    DOCTEST_PARSE_AS_BOOL_OR_FLAG("exit", "e", exit, false);
-    DOCTEST_PARSE_AS_BOOL_OR_FLAG("duration", "d", duration, false);
-    DOCTEST_PARSE_AS_BOOL_OR_FLAG("minimal", "m", minimal, false);
-    DOCTEST_PARSE_AS_BOOL_OR_FLAG("quiet", "q", quiet, false);
-    DOCTEST_PARSE_AS_BOOL_OR_FLAG("no-throw", "nt", no_throw, false);
-    DOCTEST_PARSE_AS_BOOL_OR_FLAG("no-exitcode", "ne", no_exitcode, false);
-    DOCTEST_PARSE_AS_BOOL_OR_FLAG("no-run", "nr", no_run, false);
-    DOCTEST_PARSE_AS_BOOL_OR_FLAG("no-intro", "ni", no_intro, false);
-    DOCTEST_PARSE_AS_BOOL_OR_FLAG("no-version", "nv", no_version, false);
-    DOCTEST_PARSE_AS_BOOL_OR_FLAG("no-colors", "nc", no_colors, false);
-    DOCTEST_PARSE_AS_BOOL_OR_FLAG("force-colors", "fc", force_colors, false);
-    DOCTEST_PARSE_AS_BOOL_OR_FLAG("no-breaks", "nb", no_breaks, false);
-    DOCTEST_PARSE_AS_BOOL_OR_FLAG("no-skip", "ns", no_skip, false);
-    DOCTEST_PARSE_AS_BOOL_OR_FLAG("gnu-file-line", "gfl", gnu_file_line, !bool(DOCTEST_MSVC));
-    DOCTEST_PARSE_AS_BOOL_OR_FLAG("no-path-filenames", "npf", no_path_in_filenames, false);
-    DOCTEST_PARSE_AS_BOOL_OR_FLAG("no-line-numbers", "nln", no_line_numbers, false);
-    DOCTEST_PARSE_AS_BOOL_OR_FLAG("no-debug-output", "ndo", no_debug_output, false);
-    DOCTEST_PARSE_AS_BOOL_OR_FLAG("no-skipped-summary", "nss", no_skipped_summary, false);
-    DOCTEST_PARSE_AS_BOOL_OR_FLAG("no-time-in-output", "ntio", no_time_in_output, false);
-  // clang-format on
-
-  if (withDefaults) {
-    p->help = false;
-    p->version = false;
-    p->count = false;
-    p->list_test_cases = false;
-    p->list_test_suites = false;
-    p->list_reporters = false;
-  }
-  if (parseFlag(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX "help") ||
-      parseFlag(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX "h") ||
-      parseFlag(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX "?")) {
-    p->help = true;
-    p->exit = true;
-  }
-  if (parseFlag(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX "version") ||
-      parseFlag(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX "v")) {
-    p->version = true;
-    p->exit = true;
-  }
-  if (parseFlag(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX "count") ||
-      parseFlag(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX "c")) {
-    p->count = true;
-    p->exit = true;
-  }
-  if (parseFlag(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX "list-test-cases") ||
-      parseFlag(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX "ltc")) {
-    p->list_test_cases = true;
-    p->exit = true;
-  }
-  if (parseFlag(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX "list-test-suites") ||
-      parseFlag(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX "lts")) {
-    p->list_test_suites = true;
-    p->exit = true;
-  }
-  if (parseFlag(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX "list-reporters") ||
-      parseFlag(argc, argv, DOCTEST_CONFIG_OPTIONS_PREFIX "lr")) {
-    p->list_reporters = true;
-    p->exit = true;
-  }
-}
-
-// allows the user to add procedurally to the filters from the command line
-void
-Context::addFilter(const char* filter, const char* value)
-{
-  setOption(filter, value);
-}
-
-// allows the user to clear all filters from the command line
-void
-Context::clearFilters()
-{
-  for (auto& curr : p->filters) curr.clear();
-}
-
-// allows the user to override procedurally the bool options from the command
-// line
-void
-Context::setOption(const char* option, bool value)
-{
-  setOption(option, value ? "true" : "false");
-}
-
-// allows the user to override procedurally the int options from the command
-// line
-void
-Context::setOption(const char* option, int value)
-{
-  setOption(option, toString(value).c_str());
-  // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
-}
-
-// allows the user to override procedurally the string options from the command
-// line
-void
-Context::setOption(const char* option, const char* value)
-{
-  auto argv = String("-") + option + "=" + value;
-  auto lvalue = argv.c_str();
-  parseArgs(1, &lvalue);
-}
-
-// users should query this in their main() and exit the program if true
-bool
-Context::shouldExit()
-{
-  return p->exit;
-}
-
-void
-Context::setAsDefaultForAssertsOutOfTestCases()
-{
-  g_cs = p;
-}
-
-void
-Context::setAssertHandler(detail::assert_handler ah)
-{
-  p->ah = ah;
-}
-
-void
-Context::setCout(std::ostream* out)
-{
-  p->cout = out;
-}
-
-static class DiscardOStream : public std::ostream {
- private:
-  class : public std::streambuf {
-   private:
-    // allowing some buffering decreases the amount of calls to overflow
-    char buf[1024];
-
-   protected:
-    std::streamsize xsputn(const char_type*, std::streamsize count) override
-    {
-      return count;
-    }
-
-    int_type overflow(int_type ch) override
-    {
-      setp(std::begin(buf), std::end(buf));
-      return traits_type::not_eof(ch);
-    }
-  } discardBuf;
-
- public:
-  DiscardOStream() : std::ostream(&discardBuf) {}
-} discardOut;
-
-// the main function that does all the filtering and test running
-int
-Context::run()
-{
-  using namespace detail;
-
-  // save the old context state in case such was setup - for using asserts out
-  // of a testing context
-  auto old_cs = g_cs;
-  // this is the current contest
-  g_cs = p;
-  is_running_in_test = true;
-
-  g_no_colors = p->no_colors;
-  p->resetRunData();
-
-  std::fstream fstr;
-  if (p->cout == nullptr) {
-    if (p->quiet) {
-      p->cout = &discardOut;
-    } else if (p->out.size()) {
-      // to a file if specified
-      fstr.open(p->out.c_str(), std::fstream::out);
-      p->cout = &fstr;
-    } else {
-      // stdout by default
-      p->cout = &std::cout;
-    }
-  }
-
-  FatalConditionHandler::allocateAltStackMem();
-
-  auto cleanup_and_return = [&]() {
-    FatalConditionHandler::freeAltStackMem();
-
-    if (fstr.is_open())
-      fstr.close();
-
-    // restore context
-    g_cs = old_cs;
-    is_running_in_test = false;
-
-    // we have to free the reporters which were allocated when the run started
-    for (auto& curr : p->reporters_currently_used) delete curr;
-    p->reporters_currently_used.clear();
-
-    if (p->numTestCasesFailed && !p->no_exitcode)
-      return EXIT_FAILURE;
-    return EXIT_SUCCESS;
-  };
-
-  // setup default reporter if none is given through the command line
-  if (p->filters[8].empty())
-    p->filters[8].push_back("console");
-
-  // check to see if any of the registered reporters has been selected
-  for (auto& curr : getReporters()) {
-    if (matchesAny(
-            curr.first.second.c_str(), p->filters[8], false, p->case_sensitive))
-      p->reporters_currently_used.push_back(curr.second(*g_cs));
-  }
-
-  // TODO: check if there is nothing in reporters_currently_used
-
-  // prepend all listeners
-  for (auto& curr : getListeners())
-    p->reporters_currently_used.insert(
-        p->reporters_currently_used.begin(), curr.second(*g_cs));
-
-#ifdef DOCTEST_PLATFORM_WINDOWS
-  if (isDebuggerActive() && p->no_debug_output == false)
-    p->reporters_currently_used.push_back(new DebugOutputWindowReporter(*g_cs));
-#endif  // DOCTEST_PLATFORM_WINDOWS
-
-  // handle version, help and no_run
-  if (p->no_run || p->version || p->help || p->list_reporters) {
-    DOCTEST_ITERATE_THROUGH_REPORTERS(report_query, QueryData());
-
-    return cleanup_and_return();
-  }
-
-  std::vector<const TestCase*> testArray;
-  for (auto& curr : getRegisteredTests()) testArray.push_back(&curr);
-  p->numTestCases = testArray.size();
-
-  // sort the collected records
-  if (!testArray.empty()) {
-    if (p->order_by.compare("file", true) == 0) {
-      std::sort(testArray.begin(), testArray.end(), fileOrderComparator);
-    } else if (p->order_by.compare("suite", true) == 0) {
-      std::sort(testArray.begin(), testArray.end(), suiteOrderComparator);
-    } else if (p->order_by.compare("name", true) == 0) {
-      std::sort(testArray.begin(), testArray.end(), nameOrderComparator);
-    } else if (p->order_by.compare("rand", true) == 0) {
-      std::srand(p->rand_seed);
-
-      // random_shuffle implementation
-      const auto first = &testArray[0];
-      for (size_t i = testArray.size() - 1; i > 0; --i) {
-        int idxToSwap = std::rand() % (i + 1);  // NOLINT
-
-        const auto temp = first[i];
-
-        first[i] = first[idxToSwap];
-        first[idxToSwap] = temp;
-      }
-    } else if (p->order_by.compare("none", true) == 0) {
-      // means no sorting - beneficial for death tests which call into the
-      // executable with a specific test case in mind - we don't want to slow
-      // down the startup times
-    }
-  }
-
-  std::set<String> testSuitesPassingFilt;
-
-  bool query_mode = p->count || p->list_test_cases || p->list_test_suites;
-  std::vector<const TestCaseData*> queryResults;
-
-  if (!query_mode)
-    DOCTEST_ITERATE_THROUGH_REPORTERS(test_run_start, DOCTEST_EMPTY);
-
-  // invoke the registered functions if they match the filter criteria (or just
-  // count them)
-  for (auto& curr : testArray) {
-    const auto& tc = *curr;
-
-    bool skip_me = false;
-    if (tc.m_skip && !p->no_skip)
-      skip_me = true;
-
-    if (!matchesAny(tc.m_file.c_str(), p->filters[0], true, p->case_sensitive))
-      skip_me = true;
-    if (matchesAny(tc.m_file.c_str(), p->filters[1], false, p->case_sensitive))
-      skip_me = true;
-    if (!matchesAny(tc.m_test_suite, p->filters[2], true, p->case_sensitive))
-      skip_me = true;
-    if (matchesAny(tc.m_test_suite, p->filters[3], false, p->case_sensitive))
-      skip_me = true;
-    if (!matchesAny(tc.m_name, p->filters[4], true, p->case_sensitive))
-      skip_me = true;
-    if (matchesAny(tc.m_name, p->filters[5], false, p->case_sensitive))
-      skip_me = true;
-
-    if (!skip_me)
-      p->numTestCasesPassingFilters++;
-
-    // skip the test if it is not in the execution range
-    if ((p->last < p->numTestCasesPassingFilters && p->first <= p->last) ||
-        (p->first > p->numTestCasesPassingFilters))
-      skip_me = true;
-
-    if (skip_me) {
-      if (!query_mode)
-        DOCTEST_ITERATE_THROUGH_REPORTERS(test_case_skipped, tc);
-      continue;
-    }
-
-    // do not execute the test if we are to only count the number of filter
-    // passing tests
-    if (p->count)
-      continue;
-
-    // print the name of the test and don't execute it
-    if (p->list_test_cases) {
-      queryResults.push_back(&tc);
-      continue;
-    }
-
-    // print the name of the test suite if not done already and don't execute it
-    if (p->list_test_suites) {
-      if ((testSuitesPassingFilt.count(tc.m_test_suite) == 0) &&
-          tc.m_test_suite[0] != '\0') {
-        queryResults.push_back(&tc);
-        testSuitesPassingFilt.insert(tc.m_test_suite);
-        p->numTestSuitesPassingFilters++;
-      }
-      continue;
-    }
-
-    // execute the test if it passes all the filtering
-    {
-      p->currentTest = &tc;
-
-      p->failure_flags = TestCaseFailureReason::None;
-      p->seconds = 0;
-
-      // reset atomic counters
-      p->numAssertsFailedCurrentTest_atomic = 0;
-      p->numAssertsCurrentTest_atomic = 0;
-
-      p->subcasesPassed.clear();
-
-      DOCTEST_ITERATE_THROUGH_REPORTERS(test_case_start, tc);
-
-      p->timer.start();
-
-      bool run_test = true;
-
-      do {
-        // reset some of the fields for subcases (except for the set of fully
-        // passed ones)
-        p->should_reenter = false;
-        p->subcasesCurrentMaxLevel = 0;
-        p->subcasesStack.clear();
-
-        p->shouldLogCurrentException = true;
-
-        // reset stuff for logging with INFO()
-        p->stringifiedContexts.clear();
-
-#ifndef DOCTEST_CONFIG_NO_EXCEPTIONS
-        try {
-#endif  // DOCTEST_CONFIG_NO_EXCEPTIONS
-        // MSVC 2015 diagnoses fatalConditionHandler as unused (because
-        // reset() is a static method)
-          DOCTEST_MSVC_SUPPRESS_WARNING_WITH_PUSH(
-              4101)  // unreferenced local variable
-          FatalConditionHandler fatalConditionHandler;  // Handle signals
-          // execute the test
-          tc.m_test();
-          fatalConditionHandler.reset();
-          DOCTEST_MSVC_SUPPRESS_WARNING_POP
-#ifndef DOCTEST_CONFIG_NO_EXCEPTIONS
-        }
-        catch (const TestFailureException&) {
-          p->failure_flags |= TestCaseFailureReason::AssertFailure;
-        }
-        catch (...) {
-          DOCTEST_ITERATE_THROUGH_REPORTERS(
-              test_case_exception, {translateActiveException(), false});
-          p->failure_flags |= TestCaseFailureReason::Exception;
-        }
-#endif  // DOCTEST_CONFIG_NO_EXCEPTIONS
-
-        // exit this loop if enough assertions have failed - even if there are
-        // more subcases
-        if (p->abort_after > 0 &&
-            p->numAssertsFailed + p->numAssertsFailedCurrentTest_atomic >=
-                p->abort_after) {
-          run_test = false;
-          p->failure_flags |= TestCaseFailureReason::TooManyFailedAsserts;
-        }
-
-        if (p->should_reenter && run_test)
-          DOCTEST_ITERATE_THROUGH_REPORTERS(test_case_reenter, tc);
-        if (!p->should_reenter)
-          run_test = false;
-      } while (run_test);
-
-      p->finalizeTestCaseData();
-
-      DOCTEST_ITERATE_THROUGH_REPORTERS(test_case_end, *g_cs);
-
-      p->currentTest = nullptr;
-
-      // stop executing tests if enough assertions have failed
-      if (p->abort_after > 0 && p->numAssertsFailed >= p->abort_after)
-        break;
-    }
-  }
-
-  if (!query_mode) {
-    DOCTEST_ITERATE_THROUGH_REPORTERS(test_run_end, *g_cs);
-  } else {
-    QueryData qdata;
-    qdata.run_stats = g_cs;
-    qdata.data = queryResults.data();
-    qdata.num_data = unsigned(queryResults.size());
-    DOCTEST_ITERATE_THROUGH_REPORTERS(report_query, qdata);
-  }
-
-  return cleanup_and_return();
-}
-
-IReporter::~IReporter() = default;
-
-int
-IReporter::get_num_active_contexts()
-{
-  return detail::g_infoContexts.size();
-}
-const IContextScope* const*
-IReporter::get_active_contexts()
-{
-  return get_num_active_contexts() ? &detail::g_infoContexts[0] : nullptr;
-}
-
-int
-IReporter::get_num_stringified_contexts()
-{
-  return detail::g_cs->stringifiedContexts.size();
-}
-const String*
-IReporter::get_stringified_contexts()
-{
-  return get_num_stringified_contexts() ? &detail::g_cs->stringifiedContexts[0]
-                                        : nullptr;
-}
-
-namespace detail {
-void
-registerReporterImpl(
-    const char* name, int priority, reporterCreatorFunc c, bool isReporter)
-{
-  if (isReporter)
-    getReporters().insert(
-        reporterMap::value_type(reporterMap::key_type(priority, name), c));
-  else
-    getListeners().insert(
-        reporterMap::value_type(reporterMap::key_type(priority, name), c));
-}
-}  // namespace detail
-
-}  // namespace doctest
-
-#endif  // DOCTEST_CONFIG_DISABLE
-
-#ifdef DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
-DOCTEST_MSVC_SUPPRESS_WARNING_WITH_PUSH(
-    4007)  // 'function' : must be 'attribute' - see issue #182
-int
-main(int argc, char** argv)
-{
-  return doctest::Context(argc, argv).run();
-}
-DOCTEST_MSVC_SUPPRESS_WARNING_POP
-#endif  // DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
-
-DOCTEST_CLANG_SUPPRESS_WARNING_POP
-DOCTEST_MSVC_SUPPRESS_WARNING_POP
-DOCTEST_GCC_SUPPRESS_WARNING_POP
-
-DOCTEST_SUPPRESS_COMMON_WARNINGS_POP
-
-#endif  // DOCTEST_LIBRARY_IMPLEMENTATION
-#endif  // DOCTEST_CONFIG_IMPLEMENT
diff --git a/src/c++/perf_analyzer/fifo_ctx_id_tracker.h b/src/c++/perf_analyzer/fifo_ctx_id_tracker.h
deleted file mode 100644
index 750fc63b8..000000000
--- a/src/c++/perf_analyzer/fifo_ctx_id_tracker.h
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "base_queue_ctx_id_tracker.h"
-
-namespace triton { namespace perfanalyzer {
-
-// Context ID Tracker that reuses IDs in a roughly round-robin manner using a
-// FIFO
-//
-class FifoCtxIdTracker : public BaseQueueCtxIdTracker {
- public:
-  FifoCtxIdTracker() = default;
-  void Reset(size_t count) override
-  {
-    Clear();
-
-    for (size_t i = 0; i < count; ++i) {
-      free_ctx_ids_.push(i);
-    }
-  }
-};
-
-}};  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/genai-perf/.gitignore b/src/c++/perf_analyzer/genai-perf/.gitignore
deleted file mode 100644
index d4f588edf..000000000
--- a/src/c++/perf_analyzer/genai-perf/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-artifacts/
diff --git a/src/c++/perf_analyzer/genai-perf/README.md b/src/c++/perf_analyzer/genai-perf/README.md
deleted file mode 100644
index 53e510541..000000000
--- a/src/c++/perf_analyzer/genai-perf/README.md
+++ /dev/null
@@ -1,558 +0,0 @@
-<!--
-Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
- * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
- * Neither the name of NVIDIA CORPORATION nor the names of its
-   contributors may be used to endorse or promote products derived
-   from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--->
-
-# GenAI-Perf
-
-GenAI-Perf is a command line tool for measuring the throughput and latency of
-generative AI models as served through an inference server.
-For large language models (LLMs), GenAI-Perf provides metrics such as
-[output token throughput](#output_token_throughput_metric),
-[time to first token](#time_to_first_token_metric),
-[inter token latency](#inter_token_latency_metric), and
-[request throughput](#request_throughput_metric).
-For a full list of metrics please see the [Metrics section](#metrics).
-
-Users specify a model name, an inference server URL, the type of inputs to use
-(synthetic or from dataset), and the type of load to generate (number of
-concurrent requests, request rate).
-
-GenAI-Perf generates the specified load, measures the performance of the
-inference server and reports the metrics in a simple table as console output.
-The tool also logs all results in a csv and json file that can be used to derive
-additional metrics and visualizations. The inference server must already be
-running when GenAI-Perf is run.
-
-You can use GenAI-Perf to run performance benchmarks on
-- [Large Language Models](docs/tutorial.md)
-- [Vision Language Models](docs/multi_modal.md)
-- [Embedding Models](docs/embeddings.md)
-- [Ranking Models](docs/rankings.md)
-- [Multiple LoRA Adapters](docs/lora.md)
-
-> [!Note]
-> GenAI-Perf is currently in early release and under rapid development. While we
-> will try to remain consistent, command line options and functionality are
-> subject to change as the tool matures.
-
-</br>
-
-<!--
-======================
-INSTALLATION
-======================
--->
-
-## Installation
-
-The easiest way to install GenAI-Perf is through
-[Triton Server SDK container](https://ngc.nvidia.com/catalog/containers/nvidia:tritonserver).
-Install the latest release using the following command:
-
-```bash
-export RELEASE="yy.mm" # e.g. export RELEASE="24.06"
-
-docker run -it --net=host --gpus=all  nvcr.io/nvidia/tritonserver:${RELEASE}-py3-sdk
-
-# Check out genai_perf command inside the container:
-genai-perf --help
-```
-
-<details>
-
-<summary>Alternatively, to install from source:</summary>
-
-Since GenAI-Perf depends on Perf Analyzer,
-you'll need to install the Perf Analyzer binary:
-
-### Install Perf Analyzer (Ubuntu, Python 3.8+)
-
-**NOTE**: you must already have CUDA 12 installed
-(checkout the [CUDA installation guide](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html)).
-
-```bash
-pip install tritonclient
-
-apt update && apt install -y --no-install-recommends libb64-0d libcurl4
-```
-
-You can also build Perf Analyzer [from source](../docs/install.md#build-from-source) as well.
-
-### Install GenAI-Perf from source
-
-```bash
-git clone https://github.com/triton-inference-server/client.git && cd client
-
-pip install -e .
-```
-
-</details>
-
-</br>
-
-<!--
-======================
-QUICK START
-======================
--->
-
-## Quick Start
-
-In this quick start, we will use GenAI-Perf to run performance benchmarking on
-the GPT-2 model running on Triton Inference Server with a TensorRT-LLM engine.
-
-### Serve GPT-2 TensorRT-LLM model using Triton CLI
-
-You can follow the [quickstart guide](https://github.com/triton-inference-server/triton_cli?tab=readme-ov-file#serving-a-trt-llm-model)
-on Triton CLI github repo to run GPT-2 model locally.
-The full instructions are copied below for convenience:
-
-```bash
-# This container comes with all of the dependencies for building TRT-LLM engines
-# and serving the engine with Triton Inference Server.
-docker run -ti \
-    --gpus all \
-    --network=host \
-    --shm-size=1g --ulimit memlock=-1 \
-    -v /tmp:/tmp \
-    -v ${HOME}/models:/root/models \
-    -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
-    nvcr.io/nvidia/tritonserver:24.05-trtllm-python-py3
-
-# Install the Triton CLI
-pip install git+https://github.com/triton-inference-server/triton_cli.git@0.0.8
-
-# Build TRT LLM engine and generate a Triton model repository pointing at it
-triton remove -m all
-triton import -m gpt2 --backend tensorrtllm
-
-# Start Triton pointing at the default model repository
-triton start
-```
-
-### Running GenAI-Perf
-
-Now we can run GenAI-Perf from Triton Inference Server SDK container:
-
-```bash
-export RELEASE="yy.mm" # e.g. export RELEASE="24.06"
-
-docker run -it --net=host --rm --gpus=all nvcr.io/nvidia/tritonserver:${RELEASE}-py3-sdk
-
-# Run GenAI-Perf in the container:
-genai-perf profile \
-  -m gpt2 \
-  --service-kind triton \
-  --backend tensorrtllm \
-  --num-prompts 100 \
-  --random-seed 123 \
-  --synthetic-input-tokens-mean 200 \
-  --synthetic-input-tokens-stddev 0 \
-  --streaming \
-  --output-tokens-mean 100 \
-  --output-tokens-stddev 0 \
-  --output-tokens-mean-deterministic \
-  --tokenizer hf-internal-testing/llama-tokenizer \
-  --concurrency 1 \
-  --measurement-interval 4000 \
-  --profile-export-file my_profile_export.json \
-  --url localhost:8001
-```
-
-Example output:
-
-```
-                                   LLM Metrics
-┏━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━┓
-┃                Statistic ┃    avg ┃    min ┃    max ┃    p99 ┃    p90 ┃    p75 ┃
-┡━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━┩
-│ Time to first token (ms) │  11.70 │   9.88 │  17.21 │  14.35 │  12.01 │  11.87 │
-│ Inter token latency (ms) │   1.46 │   1.08 │   1.89 │   1.87 │   1.62 │   1.52 │
-│     Request latency (ms) │ 161.24 │ 153.45 │ 200.74 │ 200.66 │ 179.43 │ 162.23 │
-│   Output sequence length │ 103.39 │  95.00 │ 134.00 │ 120.08 │ 107.30 │ 105.00 │
-│    Input sequence length │ 200.01 │ 200.00 │ 201.00 │ 200.13 │ 200.00 │ 200.00 │
-└──────────────────────────┴────────┴────────┴────────┴────────┴────────┴────────┘
-Output token throughput (per sec): 635.61
-Request throughput (per sec): 6.15
-```
-
-See [Tutorial](docs/tutorial.md) for additional examples.
-
-</br>
-
-<!--
-======================
-VISUALIZATION
-======================
--->
-
-## Visualization
-
-GenAI-Perf can also generate various plots that visualize the performance of the
-current profile run. This is disabled by default but users can easily enable it
-by passing the `--generate-plots` option when running the benchmark:
-
-```bash
-genai-perf profile \
-  -m gpt2 \
-  --service-kind triton \
-  --backend tensorrtllm \
-  --streaming \
-  --concurrency 1 \
-  --generate-plots
-```
-
-This will generate a [set of default plots](docs/compare.md#example-plots) such as:
-- Time to first token (TTFT) analysis
-- Request latency analysis
-- TTFT vs Input sequence lengths
-- Inter token latencies vs Token positions
-- Input sequence lengths vs Output sequence lengths
-
-
-### Using `compare` Subcommand to Visualize Multiple Runs
-
-The `compare` subcommand in GenAI-Perf facilitates users in comparing multiple
-profile runs and visualizing the differences through plots.
-
-#### Usage
-Assuming the user possesses two profile export JSON files,
-namely `profile1.json` and `profile2.json`,
-they can execute the `compare` subcommand using the `--files` option:
-
-```bash
-genai-perf compare --files profile1.json profile2.json
-```
-
-Executing the above command will perform the following actions under the
-`compare` directory:
-1. Generate a YAML configuration file (e.g. `config.yaml`) containing the
-metadata for each plot generated during the comparison process.
-2. Automatically generate the [default set of plots](docs/compare.md#example-plots)
-(e.g. TTFT vs. Input Sequence Lengths) that compare the two profile runs.
-
-```
-compare
-├── config.yaml
-├── distribution_of_input_sequence_lengths_to_output_sequence_lengths.jpeg
-├── request_latency.jpeg
-├── time_to_first_token.jpeg
-├── time_to_first_token_vs_input_sequence_lengths.jpeg
-├── token-to-token_latency_vs_output_token_position.jpeg
-└── ...
-```
-
-#### Customization
-Users have the flexibility to iteratively modify the generated YAML configuration
-file to suit their specific requirements.
-They can make alterations to the plots according to their preferences and execute
-the command with the `--config` option followed by the path to the modified
-configuration file:
-
-```bash
-genai-perf compare --config compare/config.yaml
-```
-
-This command will regenerate the plots based on the updated configuration settings,
-enabling users to refine the visual representation of the comparison results as
-per their needs.
-
-See [Compare documentation](docs/compare.md) for more details.
-
-</br>
-
-<!--
-======================
-MODEL INPUTS
-======================
--->
-
-## Model Inputs
-
-GenAI-Perf supports model input prompts from either synthetically generated
-inputs, or from the HuggingFace
-[OpenOrca](https://huggingface.co/datasets/Open-Orca/OpenOrca) or
-[CNN_DailyMail](https://huggingface.co/datasets/cnn_dailymail) datasets. This is
-specified using the `--input-dataset` CLI option.
-
-When the dataset is synthetic, you can specify the following options:
-* `--num-prompts <int>`: The number of unique prompts to generate as stimulus, >= 1.
-* `--synthetic-input-tokens-mean <int>`: The mean of number of tokens in the
-  generated prompts when using synthetic data, >= 1.
-* `--synthetic-input-tokens-stddev <int>`: The standard deviation of number of
-  tokens in the generated prompts when using synthetic data, >= 0.
-* `--random-seed <int>`: The seed used to generate random values, >= 0.
-
-When the dataset is coming from HuggingFace, you can specify the following
-options:
-* `--input-dataset {openorca,cnn_dailymail}`: HuggingFace dataset to use for
-  benchmarking.
-* `--num-prompts <int>`: The number of unique prompts to generate as stimulus, >= 1.
-
-When the dataset is coming from a file, you can specify the following
-options:
-* `--input-file <path>`: The input file containing the prompts to
-  use for benchmarking as JSON objects.
-
-For any dataset, you can specify the following options:
-* `--output-tokens-mean <int>`: The mean number of tokens in each output. Ensure
-  the `--tokenizer` value is set correctly, >= 1.
-* `--output-tokens-stddev <int>`: The standard deviation of the number of tokens
-  in each output. This is only used when output-tokens-mean is provided, >= 1.
-* `--output-tokens-mean-deterministic`: When using `--output-tokens-mean`, this
-  flag can be set to improve precision by setting the minimum number of tokens
-  equal to the requested number of tokens. This is currently supported with the
-  Triton service-kind. Note that there is still some variability in the
-  requested number of output tokens, but GenAi-Perf attempts its best effort
-  with your model to get the right number of output tokens.
-
-You can optionally set additional model inputs with the following option:
-* `--extra-inputs <input_name>:<value>`: An additional input for use with the
-  model with a singular value, such as `stream:true` or `max_tokens:5`. This
-  flag can be repeated to supply multiple extra inputs.
-
-</br>
-
-<!--
-======================
-METRICS
-======================
--->
-
-## Metrics
-
-GenAI-Perf collects a diverse set of metrics that captures the performance of
-the inference server.
-
-| Metric | Description | Aggregations |
-| - | - | - |
-| <span id="time_to_first_token_metric">Time to First Token</span> | Time between when a request is sent and when its first response is received, one value per request in benchmark | Avg, min, max, p99, p90, p75 |
-| <span id="inter_token_latency_metric">Inter Token Latency</span> | Time between intermediate responses for a single request divided by the number of generated tokens of the latter response, one value per response per request in benchmark | Avg, min, max, p99, p90, p75 |
-| Request Latency | Time between when a request is sent and when its final response is received, one value per request in benchmark | Avg, min, max, p99, p90, p75 |
-| Output Sequence Length | Total number of output tokens of a request, one value per request in benchmark | Avg, min, max, p99, p90, p75 |
-| Input Sequence Length | Total number of input tokens of a request, one value per request in benchmark | Avg, min, max, p99, p90, p75 |
-| <span id="output_token_throughput_metric">Output Token Throughput</span> | Total number of output tokens from benchmark divided by benchmark duration | None–one value per benchmark |
-| <span id="request_throughput_metric">Request Throughput</span> | Number of final responses from benchmark divided by benchmark duration | None–one value per benchmark |
-
-</br>
-
-<!--
-======================
-COMMAND LINE OPTIONS
-======================
--->
-
-## Command Line Options
-
-##### `-h`
-##### `--help`
-
-Show the help message and exit.
-
-### Endpoint Options:
-
-##### `-m <list>`
-##### `--model <list>`
-
-The names of the models to benchmark.
-A single model is recommended, unless you are
-[profiling multiple LoRA adapters](docs/lora.md). (default: `None`)
-
-##### `--model-selection-strategy {round_robin, random}`
-
-When multiple models are specified, this is how a specific model
-is assigned to a prompt. Round robin means that each model receives
-a request in order. Random means that assignment is uniformly random
-(default: `round_robin`)
-
-##### `--backend {tensorrtllm,vllm}`
-
-When using the "triton" service-kind, this is the backend of the model. For the
-TRT-LLM backend, you currently must set `exclude_input_in_output` to true in the
-model config to not echo the input tokens in the output. (default: tensorrtllm)
-
-##### `--endpoint <str>`
-
-Set a custom endpoint that differs from the OpenAI defaults. (default: `None`)
-
-##### `--endpoint-type {chat,completions,embeddings,rankings}`
-
-The endpoint-type to send requests to on the server. This is only used with the
-`openai` service-kind. (default: `None`)
-
-##### `--service-kind {triton,openai}`
-
-The kind of service perf_analyzer will generate load for. In order to use
-`openai`, you must specify an api via `--endpoint-type`. (default: `triton`)
-
-##### `--streaming`
-
-An option to enable the use of the streaming API. (default: `False`)
-
-##### `-u <url>`
-##### `--url <url>`
-
-URL of the endpoint to target for benchmarking. (default: `None`)
-
-### Input Options
-
-##### `-b <int>`
-##### `--batch-size <int>`
-
-The batch size of the requests GenAI-Perf should send.
-This is currently only supported with the
-[embeddings endpoint type](docs/embeddings.md).
-(default: `1`) and
-[rankings endpoint type](docs/rankings.md).
-
-##### `--extra-inputs <str>`
-
-Provide additional inputs to include with every request. You can repeat this
-flag for multiple inputs. Inputs should be in an input_name:value format.
-Alternatively, a string representing a json formatted dict can be provided.
-(default: `None`)
-
-##### `--input-dataset {openorca,cnn_dailymail}`
-
-The HuggingFace dataset to use for prompts.
-(default: `openorca`)
-
-##### `--input-file <path>`
-
-The input file containing the prompts to use for profiling.
-Each line should be a JSON object with a 'text_input' field in JSONL format.
-Example: {\"text_input\": \"Your prompt here\"}"
-
-##### `--num-prompts <int>`
-
-The number of unique prompts to generate as stimulus. (default: `100`)
-
-##### `--output-tokens-mean <int>`
-
-The mean number of tokens in each output. Ensure the `--tokenizer` value is set
-correctly. (default: `-1`)
-
-##### `--output-tokens-mean-deterministic`
-
-When using `--output-tokens-mean`, this flag can be set to improve precision by
-setting the minimum number of tokens equal to the requested number of tokens.
-This is currently supported with the Triton service-kind. Note that there is
-still some variability in the requested number of output tokens, but GenAi-Perf
-attempts its best effort with your model to get the right number of output
-tokens. (default: `False`)
-
-##### `--output-tokens-stddev <int>`
-
-The standard deviation of the number of tokens in each output. This is only used
-when `--output-tokens-mean` is provided. (default: `0`)
-
-##### `--random-seed <int>`
-
-The seed used to generate random values. (default: `0`)
-
-##### `--synthetic-input-tokens-mean <int>`
-
-The mean of number of tokens in the generated prompts when using synthetic
-data. (default: `550`)
-
-##### `--synthetic-input-tokens-stddev <int>`
-
-The standard deviation of number of tokens in the generated prompts when
-using synthetic data. (default: `0`)
-
-### Profiling Options
-
-##### `--concurrency <int>`
-
-The concurrency value to benchmark. (default: `None`)
-
-##### `--measurement-interval <int>`
-##### `-p <int>`
-
-The time interval used for each measurement in milliseconds. Perf Analyzer
-will sample a time interval specified and take measurement over the requests
-completed within that time interval. (default: `10000`)
-
-##### `--request-rate <float>`
-
-Sets the request rate for the load generated by PA. (default: `None`)
-
-##### `-s <float>`
-##### `--stability-percentage <float>`
-
-The allowed variation in latency measurements when determining if a result is
-stable. The measurement is considered as stable if the ratio of max / min from
-the recent 3 measurements is within (stability percentage) in terms of both
-infer per second and latency. (default: `999`)
-
-### Output Options
-
-##### `--artifact-dir`
-
-The directory to store all the (output) artifacts generated by GenAI-Perf and
-Perf Analyzer. (default: `artifacts`)
-
-##### `--generate-plots`
-
-An option to enable the generation of plots. (default: False)
-
-##### `--profile-export-file <path>`
-
-The path where the perf_analyzer profile export will be generated. By default,
-the profile export will be to `profile_export.json`. The genai-perf file will be
-exported to `<profile_export_file>_genai_perf.csv`. For example, if the profile
-export file is `profile_export.json`, the genai-perf file will be exported to
-`profile_export_genai_perf.csv`. (default: `profile_export.json`)
-
-### Other Options
-
-##### `--tokenizer <str>`
-
-The HuggingFace tokenizer to use to interpret token metrics from prompts and
-responses. (default: `hf-internal-testing/llama-tokenizer`)
-
-##### `-v`
-##### `--verbose`
-
-An option to enable verbose mode. (default: `False`)
-
-##### `--version`
-
-An option to print the version and exit.
-
-</br>
-
-<!--
-======================
-Known Issues
-======================
--->
-
-## Known Issues
-
-* GenAI-Perf can be slow to finish if a high request-rate is provided
-* Token counts may not be exact
diff --git a/src/c++/perf_analyzer/genai-perf/docs/assets/distribution_of_input_sequence_lengths_to_output_sequence_lengths.jpeg b/src/c++/perf_analyzer/genai-perf/docs/assets/distribution_of_input_sequence_lengths_to_output_sequence_lengths.jpeg
deleted file mode 100644
index 1f9b2cba6..000000000
Binary files a/src/c++/perf_analyzer/genai-perf/docs/assets/distribution_of_input_sequence_lengths_to_output_sequence_lengths.jpeg and /dev/null differ
diff --git a/src/c++/perf_analyzer/genai-perf/docs/assets/request_latency.jpeg b/src/c++/perf_analyzer/genai-perf/docs/assets/request_latency.jpeg
deleted file mode 100644
index d681195ff..000000000
Binary files a/src/c++/perf_analyzer/genai-perf/docs/assets/request_latency.jpeg and /dev/null differ
diff --git a/src/c++/perf_analyzer/genai-perf/docs/assets/time_to_first_token.jpeg b/src/c++/perf_analyzer/genai-perf/docs/assets/time_to_first_token.jpeg
deleted file mode 100644
index 99ca06ee0..000000000
Binary files a/src/c++/perf_analyzer/genai-perf/docs/assets/time_to_first_token.jpeg and /dev/null differ
diff --git a/src/c++/perf_analyzer/genai-perf/docs/assets/time_to_first_token_vs_input_sequence_lengths.jpeg b/src/c++/perf_analyzer/genai-perf/docs/assets/time_to_first_token_vs_input_sequence_lengths.jpeg
deleted file mode 100644
index 1b81ef532..000000000
Binary files a/src/c++/perf_analyzer/genai-perf/docs/assets/time_to_first_token_vs_input_sequence_lengths.jpeg and /dev/null differ
diff --git a/src/c++/perf_analyzer/genai-perf/docs/assets/token-to-token_latency_vs_output_token_position.jpeg b/src/c++/perf_analyzer/genai-perf/docs/assets/token-to-token_latency_vs_output_token_position.jpeg
deleted file mode 100644
index 4a179ef8d..000000000
Binary files a/src/c++/perf_analyzer/genai-perf/docs/assets/token-to-token_latency_vs_output_token_position.jpeg and /dev/null differ
diff --git a/src/c++/perf_analyzer/genai-perf/docs/compare.md b/src/c++/perf_analyzer/genai-perf/docs/compare.md
deleted file mode 100644
index 5d1a36413..000000000
--- a/src/c++/perf_analyzer/genai-perf/docs/compare.md
+++ /dev/null
@@ -1,251 +0,0 @@
-<!--
-Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
- * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
- * Neither the name of NVIDIA CORPORATION nor the names of its
-   contributors may be used to endorse or promote products derived
-   from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--->
-
-# GenAI-Perf Compare Subcommand
-
-There are two approaches for the users to use the `compare` subcommand to create
-plots across multiple runs. First is to directly pass the profile export files
-with `--files` option
-
-## Running initially with `--files` option
-
-If the user does not have a YAML configuration file,
-they can run the `compare` subcommand with the `--files` option to generate a
-set of default plots as well as a pre-filled YAML config file for the plots.
-
-```bash
-genai-perf compare --files profile1.json profile2.json profile3.json
-```
-
-This will generate the default plots and compare across the three runs.
-GenAI-Perf will also generate an initial YAML configuration file `config.yaml`
-that is pre-filled with plot configurations as following:
-
-```yaml
-plot1:
-  title: Time to First Token
-  x_metric: ''
-  y_metric: time_to_first_tokens
-  x_label: Time to First Token (ms)
-  y_label: ''
-  width: 1200
-  height: 700
-  type: box
-  paths:
-  - profile1.json
-  - profile2.json
-  - profile3.json
-  output: compare
-plot2:
-  title: Request Latency
-  x_metric: ''
-  y_metric: request_latencies
-  x_label: Request Latency (ms)
-  y_label: ''
-  width: 1200
-  height: 700
-  type: box
-  paths:
-  - profile1.json
-  - profile2.json
-  - profile3.json
-  output: compare
-plot3:
-  title: Distribution of Input Sequence Lengths to Output Sequence Lengths
-  x_metric: input_sequence_lengths
-  y_metric: output_sequence_lengths
-  x_label: Input Sequence Length
-  y_label: Output Sequence Length
-  width: 1200
-  height: 450
-  type: heatmap
-  paths:
-  - profile1.json
-  - profile2.json
-  - profile3.json
-  output: compare
-plot4:
-  title: Time to First Token vs Input Sequence Lengths
-  x_metric: input_sequence_lengths
-  y_metric: time_to_first_tokens
-  x_label: Input Sequence Length
-  y_label: Time to First Token (ms)
-  width: 1200
-  height: 700
-  type: scatter
-  paths:
-  - profile1.json
-  - profile2.json
-  - profile3.json
-  output: compare
-plot5:
-  title: Token-to-Token Latency vs Output Token Position
-  x_metric: token_positions
-  y_metric: inter_token_latencies
-  x_label: Output Token Position
-  y_label: Token-to-Token Latency (ms)
-  width: 1200
-  height: 700
-  type: scatter
-  paths:
-  - profile1.json
-  - profile2.json
-  - profile3.json
-  output: compare
-```
-
-Once the user has the YAML configuration file,
-they can repeat the process of editing the config file and running with
-`--config` option to re-generate the plots iteratively.
-
-```bash
-# edit
-vi config.yaml
-
-# re-generate the plots
-genai-perf compare --config config.yaml
-```
-
-## Running directly with `--config` option
-
-If the user would like to create a custom plot (other than the default ones provided),
-they can build their own YAML configuration file that contains the information
-about the plots they would like to generate.
-For instance, if the user would like to see how the inter token latencies change
-by the number of output tokens, which is not part of the default plots,
-they could add the following YAML block to the file:
-
-```yaml
-plot1:
-  title: Inter Token Latency vs Output Tokens
-  x_metric: num_output_tokens
-  y_metric: inter_token_latencies
-  x_label: Num Output Tokens
-  y_label: Avg ITL (ms)
-  width: 1200
-  height: 450
-  type: scatter
-  paths:
-    - <path-to-profile-export-file>
-    - <path-to-profile-export-file>
-  output: compare
-```
-
-After adding the lines, the user can run the following command to generate the
-plots specified in the configuration file (in this case, `config.yaml`):
-
-```bash
-genai-perf compare --config config.yaml
-```
-
-The user can check the generated plots under the output directory:
-```
-compare/
-├── inter_token_latency_vs_output_tokens.jpeg
-└── ...
-```
-
-## YAML Schema
-
-Here are more details about the YAML configuration file and its stricture.
-The general YAML schema for the plot configuration looks as following:
-
-```yaml
-plot1:
-  title: [str]
-  x_metric: [str]
-  y_metric: [str]
-  x_label: [str]
-  y_label: [str]
-  width: [int]
-  height: [int]
-  type: [scatter,box,heatmap]
-  paths:
-    - [str]
-    - ...
-  output: [str]
-
-plot2:
-  title: [str]
-  x_metric: [str]
-  y_metric: [str]
-  x_label: [str]
-  y_label: [str]
-  width: [int]
-  height: [int]
-  type: [scatter,box,heatmap]
-  paths:
-    - [str]
-    - ...
-  output: [str]
-
-# add more plots
-```
-
-The user can add as many plots they would like to generate by adding the plot
-blocks in the configuration file (they have a key pattern of `plot<#>`,
-but that is not required and the user can set it to any arbitrary string).
-For each plot block, the user can specify the following configurations:
-- `title`: The title of the plot.
-- `x_metric`: The name of the metric to be used on the x-axis.
-- `y_metric`: The name of the metric to be used on the y-axis.
-- `x_label`: The x-axis label (or description)
-- `y_label`: The y-axis label (or description)
-- `width`: The width of the entire plot
-- `height`: The height of the entire plot
-- `type`: The type of the plot. It must be one of the three: `scatter`, `box`,
-or `heatmap`.
-- `paths`: List of paths to the profile export files to compare.
-- `output`: The path to the output directory to store all the plots and YAML
-configuration file.
-
-> [!Note]
-> User *MUST* provide at least one valid path to the profile export file.
-
-
-
-## Example Plots
-
-Here are the list of sample plots that gets created by default from running the
-`compare` subcommand:
-
-### Distribution of Input Sequence Lengths to Output Sequence Lengths
-<img src="assets/distribution_of_input_sequence_lengths_to_output_sequence_lengths.jpeg" width="800" height="300" />
-
-### Request Latency Analysis
-<img src="assets/request_latency.jpeg" width="800" height="300" />
-
-### Time to First Token Analysis
-<img src="assets/time_to_first_token.jpeg" width="800" height="300" />
-
-### Time to First Token vs. Input Sequence Lengths
-<img src="assets/time_to_first_token_vs_input_sequence_lengths.jpeg" width="800" height="300" />
-
-### Token-to-Token Latency vs. Output Token Position
-<img src="assets/token-to-token_latency_vs_output_token_position.jpeg" width="800" height="300" />
-
diff --git a/src/c++/perf_analyzer/genai-perf/docs/embeddings.md b/src/c++/perf_analyzer/genai-perf/docs/embeddings.md
deleted file mode 100644
index e508f9eff..000000000
--- a/src/c++/perf_analyzer/genai-perf/docs/embeddings.md
+++ /dev/null
@@ -1,93 +0,0 @@
-<!--
-Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
- * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
- * Neither the name of NVIDIA CORPORATION nor the names of its
-   contributors may be used to endorse or promote products derived
-   from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--->
-
-# Profile Embeddings Models with GenAI-Perf
-
-GenAI-Perf allows you to profile embedding models running on an
-[OpenAI Embeddings API](https://platform.openai.com/docs/api-reference/embeddings)-compatible server.
-
-## Create a Sample Embeddings Input File
-
-To create a sample embeddings input file, use the following command:
-
-```bash
-echo '{"text": "What was the first car ever driven?"}
-{"text": "Who served as the 5th President of the United States of America?"}
-{"text": "Is the Sydney Opera House located in Australia?"}
-{"text": "In what state did they film Shrek 2?"}' > embeddings.jsonl
-```
-
-This will generate a file named embeddings.jsonl with the following content:
-```jsonl
-{"text": "What was the first car ever driven?"}
-{"text": "Who served as the 5th President of the United States of America?"}
-{"text": "Is the Sydney Opera House located in Australia?"}
-{"text": "In what state did they film Shrek 2?"}
-```
-
-## Start an OpenAI Embeddings-Compatible Server
-To start an OpenAI embeddings-compatible server, run the following command:
-```bash
-docker run -it --net=host --rm --gpus=all vllm/vllm-openai:latest --model intfloat/e5-mistral-7b-instruct --dtype float16 --max-model-len 1024
-```
-
-## Run GenAI-Perf
-To profile embeddings models using GenAI-Perf, use the following command:
-
-```bash
-genai-perf profile \
-    -m intfloat/e5-mistral-7b-instruct \
-    --service-kind openai \
-    --endpoint-type embeddings \
-    --batch-size 2 \
-    --input-file embeddings.jsonl
-```
-
-This will use default values for optional arguments. You can also pass in
-additional arguments with the `--extra-inputs` [flag](../README.md#input-options).
-For example, you could use this command:
-
-```bash
-genai-perf profile \
-    -m intfloat/e5-mistral-7b-instruct \
-    --service-kind openai \
-    --endpoint-type embeddings \
-    --extra-inputs user:sample_user
-```
-
-Example output:
-
-```
-                          Embeddings Metrics
-┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┓
-┃ Statistic            ┃ avg   ┃ min   ┃ max    ┃ p99   ┃ p90   ┃ p75   ┃
-┡━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━┩
-│ Request latency (ms) │ 42.21 │ 28.18 │ 318.61 │ 56.50 │ 49.21 │ 43.07 │
-└──────────────────────┴───────┴───────┴────────┴───────┴───────┴───────┘
-Request throughput (per sec): 23.63
-```
diff --git a/src/c++/perf_analyzer/genai-perf/docs/files.md b/src/c++/perf_analyzer/genai-perf/docs/files.md
deleted file mode 100644
index 6ebdf69fa..000000000
--- a/src/c++/perf_analyzer/genai-perf/docs/files.md
+++ /dev/null
@@ -1,129 +0,0 @@
-<!--
-Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
- * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
- * Neither the name of NVIDIA CORPORATION nor the names of its
-   contributors may be used to endorse or promote products derived
-   from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--->
-
-# Generated File Structures
-
-## Overview
-
-This document serves as a guide to understanding the structure and contents of
-the files generated  by GenAi-Perf.
-
-## Directory Structure
-
-After running GenAi-Perf, your file tree should contain the following:
-
-```
-genai-perf/
-├── artifacts/
-│   ├── data/
-│   └── images/
-```
-
-## File Types
-Within the artifacts and docs directories, several file types are generated,
-including .gzip, .csv, .json, .html, and .jpeg. Below is a detailed
-explanation of each file and its purpose.
-
-### Artifacts Directory
-
-#### Data Subdirectory
-
-The data subdirectory contains the raw and processed performance data files.
-
-##### GZIP Files
-
-- all_data.gzip: Aggregated performance data from all collected metrics.
-- input_sequence_lengths_vs_output_sequence_lengths.gzip: This contains data on
-the input sequence lengths versus the output sequence lengths for each request.
-- request_latency.gzip: This contains the latency for each request.
-- time_to_first_token.gzip: This contains the time to first token for each request.
-- token_to_token_vs_output_position.gzip: This contains the time from one token
-generation to the next versus the position of the output token for each token.
-- ttft_vs_input_sequence_lengths.gzip: This contains the time to first token
-versus the input sequence length for each request.
-
-##### JSON Files
-
-- llm_inputs.json: This contains the input prompts provided to the LLM during testing.
-- profile_export.json: This is provided by Perf Analyzer and contains the timestamps
-for each event in the lifecycle of each request. This is low-level data used to calculate
-metrics by GenAi-Perf.
-
-##### CSV File
-
-- profile_export_genai_perf.csv: A CSV of the output tables printed
-in the GenAi-Perf output. These may have more detail than the printed tables.
-
-#### Images Subdirectory
-
-The images subdirectory contains visual representations of the performance
-data. All images are in both HTML and JPEG formats.
-
-##### HTML and JPEG Files
-- input_sequence_lengths_vs_output_sequence_lengths: A heat map showing the
-relationship between input and generated tokens.
-- request_latency: A box plot showing request latency.
-- time_to_first_token: A box plot showing time to first token.
-- token_to_token_vs_output_position: A scatterplot showing token-to-token
-time versus output token position.
-- ttft_vs_input_sequence_lengths: A scatterplot showing token-to-token time
-versus the input sequence lengths.
-
-## Usage Instructions
-
-To use the generated files, navigate to the artifacts/data directory. Then,
-the next steps depend on the file format you wish to work with.
-
-### GZIP Files
-
-The GZIP files contain Parquet files with calculated data, which can be read
-with Pandas in Python. For example, you can create a dataframe with these files:
-
-```
-import pandas
-df = pandas.read_partquet(path_to_file)`
-```
-
-You can then use Pandas to work with the data.
-
-```
-print(df.head())     # See the first few rows of the data.
-print(df.describe()) # Get summary statistics for the data
-```
-
-### CSV and JSON Files
-Open .csv and .json files with spreadsheet or JSON parsing tools for structured
-data analysis. These can also be read via a text editor, like Vim.
-
-### HTML Files
-
-View .html visualizations in a web browser for interactive data exploration.
-
-### JPEG Files
-
-Use an image software to open .jpeg images for static visual representations.
diff --git a/src/c++/perf_analyzer/genai-perf/docs/lora.md b/src/c++/perf_analyzer/genai-perf/docs/lora.md
deleted file mode 100644
index d30867eda..000000000
--- a/src/c++/perf_analyzer/genai-perf/docs/lora.md
+++ /dev/null
@@ -1,53 +0,0 @@
-<!--
-Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
- * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
- * Neither the name of NVIDIA CORPORATION nor the names of its
-   contributors may be used to endorse or promote products derived
-   from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--->
-
-# Profile Multiple LoRA Adapters
-GenAI-Perf allows you to profile multiple LoRA adapters on top of a base model.
-
-## Select LoRA Adapters
-To do this, list multiple adapters after the model name option `-m`:
-
-```bash
-genai-perf -m lora_adapter1 lora_adapter2 lora_adapter3
-```
-
-## Choose a Strategy for Selecting Models
-When profiling with multiple models, you can specify how the models should be
-assigned to prompts using the `--model-selection-strategy` option:
-
-```bash
-genai-perf profile \
-    -m lora_adapter1 lora_adapter2 lora_adapter3 \
-    --model-selection-strategy round_robin
-```
-
-This setup will cycle through the lora_adapter1, lora_adapter2, and
-lora_adapter3 models in a round-robin manner for each prompt.
-
-For more details on additional options and configurations, refer to the
-[Command Line Options section](../README.md#command-line-options) in the README.
\ No newline at end of file
diff --git a/src/c++/perf_analyzer/genai-perf/docs/multi_modal.md b/src/c++/perf_analyzer/genai-perf/docs/multi_modal.md
deleted file mode 100644
index bb9f33c60..000000000
--- a/src/c++/perf_analyzer/genai-perf/docs/multi_modal.md
+++ /dev/null
@@ -1,122 +0,0 @@
-<!--
-Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
- * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
- * Neither the name of NVIDIA CORPORATION nor the names of its
-   contributors may be used to endorse or promote products derived
-   from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--->
-
-# Profile Vision-Language Models with GenAI-Perf
-
-GenAI-Perf allows you to profile Vision-Language Models (VLM) running on
-[OpenAI Chat Completions API](https://platform.openai.com/docs/guides/chat-completions)-compatible server
-by sending [multi-modal content](https://platform.openai.com/docs/guides/vision) to the server.
-Currently, you can send multi-modal contents with GenAI-Perf using the following two approaches:
-1. The synthetic data generation approach, where GenAI-Perf generates the multi-modal data for you.
-2. The Bring Your Own Data (BYOD) approach, where you provide GenAI-Perf with the data to send.
-
-Before we dive into the two approaches,
-you can start OpenAI API compatible server with a VLM model using following command:
-
-```bash
-docker run --runtime nvidia --gpus all \
-    -p 8000:8000 --ipc=host \
-    vllm/vllm-openai:latest \
-    --model llava-hf/llava-v1.6-mistral-7b-hf --dtype float16
-```
-
-
-## Approach 1: Synthetic Multi-Modal Data Generation
-
-GenAI-Perf can generate synthetic multi-modal data such as texts or images using
-the parameters provide by the user through CLI.
-
-```bash
-genai-perf profile \
-    -m llava-hf/llava-v1.6-mistral-7b-hf \
-    --service-kind openai \
-    --endpoint-type vision \
-    --image-width-mean 512 \
-    --image-width-stddev 30 \
-    --image-height-mean 512 \
-    --image-height-stddev 30 \
-    --image-format png \
-    --synthetic-input-tokens-mean 100 \
-    --synthetic-input-tokens-stddev 0 \
-    --streaming
-```
-
-> [!Note]
-> Under the hood, GenAI-Perf generates synthetic images using a few source images
-> under the `llm_inputs/source_images` directory.
-> If you would like to add/remove/edit the source images,
-> you can do so by directly editing the source images under the directory.
-> GenAI-Perf will pickup the images under the directory automatically when
-> generating the synthetic images.
-
-
-## Approach 2: Bring Your Own Data (BYOD)
-
-Instead of letting GenAI-Perf create the synthetic data,
-you can also provide GenAI-Perf with your own data using
-[`--input-file`](../README.md#--input-file-path) CLI option.
-The file needs to be in JSONL format and should contain both the prompt and
-the filepath to the image to send.
-
-For instance, an example of input file would look something as following:
-```bash
-// input.jsonl
-{"text_input": "What is in this image?", "image": "path/to/image1.png"}
-{"text_input": "What is the color of the dog?", "image": "path/to/image2.jpeg"}
-{"text_input": "Describe the scene in the picture.", "image": "path/to/image3.png"}
-...
-```
-
-After you create the file, you can run GenAI-Perf using the following command:
-
-```bash
-genai-perf profile \
-    -m llava-hf/llava-v1.6-mistral-7b-hf \
-    --service-kind openai \
-    --endpoint-type vision \
-    --input-file input.jsonl \
-    --streaming
-```
-
-Running GenAI-Perf using either approach will give you an example output that
-looks like below:
-
-```bash
-                                         LLM Metrics
-┏━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┓
-┃                Statistic ┃      avg ┃      min ┃      max ┃      p99 ┃      p90 ┃      p75 ┃
-┡━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━┩
-│ Time to first token (ms) │   321.05 │   291.30 │   537.07 │   497.88 │   318.46 │   317.35 │
-│ Inter token latency (ms) │    12.28 │    11.44 │    12.88 │    12.87 │    12.81 │    12.53 │
-│     Request latency (ms) │ 1,866.23 │ 1,044.70 │ 2,832.22 │ 2,779.63 │ 2,534.64 │ 2,054.03 │
-│   Output sequence length │   126.68 │    59.00 │   204.00 │   200.58 │   177.80 │   147.50 │
-│    Input sequence length │   100.00 │   100.00 │   100.00 │   100.00 │   100.00 │   100.00 │
-└──────────────────────────┴──────────┴──────────┴──────────┴──────────┴──────────┴──────────┘
-Output token throughput (per sec): 67.40
-Request throughput (per sec): 0.53
-```
diff --git a/src/c++/perf_analyzer/genai-perf/docs/rankings.md b/src/c++/perf_analyzer/genai-perf/docs/rankings.md
deleted file mode 100644
index a316ef857..000000000
--- a/src/c++/perf_analyzer/genai-perf/docs/rankings.md
+++ /dev/null
@@ -1,100 +0,0 @@
-<!--
-Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
- * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
- * Neither the name of NVIDIA CORPORATION nor the names of its
-   contributors may be used to endorse or promote products derived
-   from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--->
-
-# Profile Ranking Models with GenAI-Perf
-
-
-GenAI-Perf allows you to profile ranking models compatible with Hugging Face's
-[Text Embeddings Inference's re-ranker API](https://huggingface.co/docs/text-embeddings-inference/en/quick_tour#re-rankers).
-
-## Create a Sample Rankings Input Directory
-
-To create a sample rankings input directory, follow these steps:
-
-Create a directory called rankings_jsonl:
-```bash
-mkdir rankings_jsonl
-```
-
-Inside this directory, create a JSONL file named queries.jsonl with queries data:
-
-```bash
-echo '{"text": "What was the first car ever driven?"}
-{"text": "Who served as the 5th President of the United States of America?"}
-{"text": "Is the Sydney Opera House located in Australia?"}
-{"text": "In what state did they film Shrek 2?"}' > rankings_jsonl/queries.jsonl
-```
-
-Create another JSONL file named passages.jsonl with passages data:
-
-```bash
-echo '{"text": "Eric Anderson (born January 18, 1968) is an American sociologist and sexologist."}
-{"text": "Kevin Loader is a British film and television producer."}
-{"text": "Francisco Antonio Zea Juan Francisco Antonio Hilari was a Colombian journalist, botanist, diplomat, politician, and statesman who served as the 1st Vice President of Colombia."}
-{"text": "Daddys Home 2 Principal photography on the film began in Massachusetts in March 2017 and it was released in the United States by Paramount Pictures on November 10, 2017. Although the film received unfavorable reviews, it has grossed over $180 million worldwide on a $69 million budget."}' > rankings_jsonl/passages.jsonl
-```
-
-## Start a Hugging Face Re-Ranker-Compatible Server
-To start a Hugging Face re-ranker-compatible server, run the following commands:
-
-```bash
-model=BAAI/bge-reranker-large
-revision=refs/pr/4
-volume=$PWD/data
-
-docker run --gpus all -p 8080:80 -v $volume:/data --pull always ghcr.io/huggingface/text-embeddings-inference:1.3 --model-id $model --revision $revision
-```
-
-## Run GenAI-Perf
-To profile ranking models using GenAI-Perf, use the following command:
-
-```bash
-genai-perf profile \
-    -m BAAI/bge-reranker-large \
-    --service-kind openai \
-    --endpoint-type rankings \
-    --endpoint rerank \
-    --input-file rankings_jsonl/ \
-    -u localhost:8080 \
-    --extra-inputs rankings:tei \
-    --batch-size 2
-```
-
-This command specifies the use of Hugging Face's ranking API with `--endpoint rerank` and `--extra-inputs rankings:tei`.
-
-Example output:
-
-```
-                          Rankings Metrics
-┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━┳━━━━━━┓
-┃            Statistic ┃  avg ┃  min ┃   max ┃   p99 ┃  p90 ┃  p75 ┃
-┡━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━╇━━━━━━┩
-│ Request latency (ms) │ 5.48 │ 2.50 │ 23.91 │ 10.27 │ 8.34 │ 6.07 │
-└──────────────────────┴──────┴──────┴───────┴───────┴──────┴──────┘
-Request throughput (per sec): 180.11
-```
diff --git a/src/c++/perf_analyzer/genai-perf/docs/tutorial.md b/src/c++/perf_analyzer/genai-perf/docs/tutorial.md
deleted file mode 100644
index 15cc53efe..000000000
--- a/src/c++/perf_analyzer/genai-perf/docs/tutorial.md
+++ /dev/null
@@ -1,301 +0,0 @@
-<!--
-Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
- * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
- * Neither the name of NVIDIA CORPORATION nor the names of its
-   contributors may be used to endorse or promote products derived
-   from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--->
-
-# Tutorials
-
-- [Profile GPT2 running on Triton + TensorRT-LLM](#tensorrt-llm)
-- [Profile GPT2 running on Triton + vLLM](#triton-vllm)
-- [Profile GPT2 running on OpenAI Chat Completions API-Compatible Server](#openai-chat)
-- [Profile GPT2 running on OpenAI Completions API-Compatible Server](#openai-completions)
-
----
-
-## Profile GPT2 running on Triton + TensorRT-LLM <a id="tensorrt-llm"></a>
-
-### Run GPT2 on Triton Inference Server using TensorRT-LLM
-
-<details>
-<summary>See instructions</summary>
-
-Run Triton Inference Server with TensorRT-LLM backend container:
-
-```bash
-export RELEASE="yy.mm" # e.g. export RELEASE="24.06"
-
-docker run -it --net=host --gpus=all --shm-size=2g --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/tritonserver:${RELEASE}-trtllm-python-py3
-
-# Install Triton CLI (~5 min):
-pip install "git+https://github.com/triton-inference-server/triton_cli@0.0.8"
-
-# Download model:
-triton import -m gpt2 --backend tensorrtllm
-
-# Run server:
-triton start
-```
-
-</details>
-
-### Run GenAI-Perf
-
-Run GenAI-Perf from Triton Inference Server SDK container:
-
-```bash
-export RELEASE="yy.mm" # e.g. export RELEASE="24.06"
-
-docker run -it --net=host --gpus=all nvcr.io/nvidia/tritonserver:${RELEASE}-py3-sdk
-
-# Run GenAI-Perf in the container:
-genai-perf profile \
-  -m gpt2 \
-  --service-kind triton \
-  --backend tensorrtllm \
-  --num-prompts 100 \
-  --random-seed 123 \
-  --synthetic-input-tokens-mean 200 \
-  --synthetic-input-tokens-stddev 0 \
-  --streaming \
-  --output-tokens-mean 100 \
-  --output-tokens-stddev 0 \
-  --output-tokens-mean-deterministic \
-  --tokenizer hf-internal-testing/llama-tokenizer \
-  --concurrency 1 \
-  --measurement-interval 4000 \
-  --profile-export-file my_profile_export.json \
-  --url localhost:8001
-```
-
-Example output:
-
-```
-                                                  LLM Metrics
-┏━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
-┃                Statistic ┃         avg ┃         min ┃         max ┃         p99 ┃         p90 ┃         p75 ┃
-┡━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
-│ Time to first token (ns) │  13,266,974 │  11,818,732 │  18,351,779 │  16,513,479 │  13,741,986 │  13,544,376 │
-│ Inter token latency (ns) │   2,069,766 │      42,023 │  15,307,799 │   3,256,375 │   3,020,580 │   2,090,930 │
-│     Request latency (ns) │ 223,532,625 │ 219,123,330 │ 241,004,192 │ 238,198,306 │ 229,676,183 │ 224,715,918 │
-│   Output sequence length │         104 │         100 │         129 │         128 │         109 │         105 │
-│    Input sequence length │         199 │         199 │         199 │         199 │         199 │         199 │
-└──────────────────────────┴─────────────┴─────────────┴─────────────┴─────────────┴─────────────┴─────────────┘
-Output token throughput (per sec): 460.42
-Request throughput (per sec): 4.44
-```
-
-## Profile GPT2 running on Triton + vLLM <a id="triton-vllm"></a>
-
-### Run GPT2 on Triton Inference Server using vLLM
-
-<details>
-<summary>See instructions</summary>
-
-Run Triton Inference Server with vLLM backend container:
-
-```bash
-export RELEASE="yy.mm" # e.g. export RELEASE="24.06"
-
-
-docker run -it --net=host --gpus=1 --shm-size=2g --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/tritonserver:${RELEASE}-vllm-python-py3
-
-# Install Triton CLI (~5 min):
-pip install "git+https://github.com/triton-inference-server/triton_cli@0.0.8"
-
-# Download model:
-triton import -m gpt2 --backend vllm
-
-# Run server:
-triton start
-```
-
-</details>
-
-### Run GenAI-Perf
-
-Run GenAI-Perf from Triton Inference Server SDK container:
-
-```bash
-export RELEASE="yy.mm" # e.g. export RELEASE="24.06"
-
-docker run -it --net=host --gpus=1 nvcr.io/nvidia/tritonserver:${RELEASE}-py3-sdk
-
-# Run GenAI-Perf in the container:
-genai-perf profile \
-  -m gpt2 \
-  --service-kind triton \
-  --backend vllm \
-  --num-prompts 100 \
-  --random-seed 123 \
-  --synthetic-input-tokens-mean 200 \
-  --synthetic-input-tokens-stddev 0 \
-  --streaming \
-  --output-tokens-mean 100 \
-  --output-tokens-stddev 0 \
-  --output-tokens-mean-deterministic \
-  --tokenizer hf-internal-testing/llama-tokenizer \
-  --concurrency 1 \
-  --measurement-interval 4000 \
-  --profile-export-file my_profile_export.json \
-  --url localhost:8001
-```
-
-Example output:
-
-```
-                                                  LLM Metrics
-┏━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
-┃                Statistic ┃         avg ┃         min ┃         max ┃         p99 ┃         p90 ┃         p75 ┃
-┡━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
-│ Time to first token (ns) │  15,786,560 │  11,437,189 │  49,550,549 │  40,129,652 │  21,248,091 │  17,824,695 │
-│ Inter token latency (ns) │   3,543,380 │     591,898 │  10,013,690 │   6,152,260 │   5,039,278 │   4,060,982 │
-│     Request latency (ns) │ 388,415,721 │ 312,552,612 │ 528,229,817 │ 518,189,390 │ 484,281,365 │ 459,417,637 │
-│   Output sequence length │         113 │         105 │         123 │         122 │         119 │         115 │
-│    Input sequence length │         199 │         199 │         199 │         199 │         199 │         199 │
-└──────────────────────────┴─────────────┴─────────────┴─────────────┴─────────────┴─────────────┴─────────────┘
-Output token throughput (per sec): 290.24
-Request throughput (per sec): 2.57
-```
-
-## Profile GPT2 running on OpenAI Chat API-Compatible Server <a id="openai-chat"></a>
-
-### Run GPT2 on [OpenAI Chat Completions API](https://platform.openai.com/docs/api-reference/chat)-compatible server
-
-<details>
-<summary>See instructions</summary>
-
-Run the vLLM inference server:
-
-```bash
-docker run -it --net=host --gpus=all vllm/vllm-openai:latest --model gpt2 --dtype float16 --max-model-len 1024
-```
-
-</details>
-
-### Run GenAI-Perf
-
-Run GenAI-Perf from Triton Inference Server SDK container:
-
-```bash
-export RELEASE="yy.mm" # e.g. export RELEASE="24.06"
-
-docker run -it --net=host --gpus=all nvcr.io/nvidia/tritonserver:${RELEASE}-py3-sdk
-
-# Run GenAI-Perf in the container:
-genai-perf profile \
-  -m gpt2 \
-  --service-kind openai \
-  --endpoint v1/chat/completions \
-  --endpoint-type chat \
-  --num-prompts 100 \
-  --random-seed 123 \
-  --synthetic-input-tokens-mean 200 \
-  --synthetic-input-tokens-stddev 0 \
-  --streaming \
-  --output-tokens-mean 100 \
-  --output-tokens-stddev 0 \
-  --tokenizer hf-internal-testing/llama-tokenizer \
-  --concurrency 1 \
-  --measurement-interval 4000 \
-  --profile-export-file my_profile_export.json \
-  --url localhost:8000
-```
-
-Example output:
-
-```
-                                                  LLM Metrics
-┏━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
-┃                Statistic ┃         avg ┃         min ┃         max ┃         p99 ┃         p90 ┃         p75 ┃
-┡━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
-│ Time to first token (ns) │  13,546,815 │   9,821,658 │  48,317,756 │  34,361,913 │  16,541,625 │  14,612,026 │
-│ Inter token latency (ns) │   2,560,813 │     457,703 │   6,507,334 │   3,754,617 │   3,059,158 │   2,953,540 │
-│     Request latency (ns) │ 283,597,027 │ 240,098,890 │ 361,730,568 │ 349,164,037 │ 323,279,761 │ 306,507,562 │
-│   Output sequence length │         114 │         103 │         142 │         136 │         122 │         119 │
-│    Input sequence length │         199 │         199 │         199 │         199 │         199 │         199 │
-└──────────────────────────┴─────────────┴─────────────┴─────────────┴─────────────┴─────────────┴─────────────┘
-Output token throughput (per sec): 401.62
-Request throughput (per sec): 3.52
-```
-
-## Profile GPT2 running on OpenAI Completions API-Compatible Server <a id="openai-completions"></a>
-
-### Running GPT2 on [OpenAI Completions API](https://platform.openai.com/docs/api-reference/completions)-compatible server
-
-<details>
-<summary>See instructions</summary>
-
-Run the vLLM inference server:
-
-```bash
-docker run -it --net=host --gpus=all vllm/vllm-openai:latest --model gpt2 --dtype float16 --max-model-len 1024
-```
-
-</details>
-
-### Run GenAI-Perf
-
-Run GenAI-Perf from Triton Inference Server SDK container:
-
-```bash
-export RELEASE="yy.mm" # e.g. export RELEASE="24.06"
-
-docker run -it --net=host --gpus=all nvcr.io/nvidia/tritonserver:${RELEASE}-py3-sdk
-
-
-# Run GenAI-Perf in the container:
-genai-perf profile \
-  -m gpt2 \
-  --service-kind openai \
-  --endpoint v1/completions \
-  --endpoint-type completions \
-  --num-prompts 100 \
-  --random-seed 123 \
-  --synthetic-input-tokens-mean 200 \
-  --synthetic-input-tokens-stddev 0 \
-  --output-tokens-mean 100 \
-  --output-tokens-stddev 0 \
-  --tokenizer hf-internal-testing/llama-tokenizer \
-  --concurrency 1 \
-  --measurement-interval 4000 \
-  --profile-export-file my_profile_export.json \
-  --url localhost:8000
-```
-
-Example output:
-
-```
-                                                LLM Metrics
-┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
-┃              Statistic ┃         avg ┃        min ┃         max ┃         p99 ┃         p90 ┃         p75 ┃
-┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
-│   Request latency (ns) │ 296,990,497 │ 43,312,449 │ 332,788,242 │ 327,475,292 │ 317,392,767 │ 310,343,333 │
-│ Output sequence length │         109 │         11 │         158 │         142 │         118 │         113 │
-│  Input sequence length │           1 │          1 │           1 │           1 │           1 │           1 │
-└────────────────────────┴─────────────┴────────────┴─────────────┴─────────────┴─────────────┴─────────────┘
-Output token throughput (per sec): 366.78
-Request throughput (per sec): 3.37
-```
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/.gitignore b/src/c++/perf_analyzer/genai-perf/genai_perf/.gitignore
deleted file mode 100644
index 973a71df2..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-*.json
-*.cache
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/__init__.py b/src/c++/perf_analyzer/genai-perf/genai_perf/__init__.py
deleted file mode 100644
index d656fe629..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/__init__.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-__version__ = "0.0.5dev"
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/constants.py b/src/c++/perf_analyzer/genai-perf/genai_perf/constants.py
deleted file mode 100644
index b951524bf..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/constants.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-DEFAULT_HTTP_URL = "localhost:8000"
-DEFAULT_GRPC_URL = "localhost:8001"
-
-
-OPEN_ORCA = "openorca"
-CNN_DAILY_MAIL = "cnn_dailymail"
-DEFAULT_INPUT_DATA_JSON = "llm_inputs.json"
-
-
-DEFAULT_ARTIFACT_DIR = "artifacts"
-DEFAULT_COMPARE_DIR = "compare"
-DEFAULT_PARQUET_FILE = "all_data"
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/exceptions.py b/src/c++/perf_analyzer/genai-perf/genai_perf/exceptions.py
deleted file mode 100644
index ff4170af0..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/exceptions.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-class GenAIPerfException(Exception):
-    """
-    A custom exception specific to the genai-perf
-    """
-
-    pass
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/export_data/console_exporter.py b/src/c++/perf_analyzer/genai-perf/genai_perf/export_data/console_exporter.py
deleted file mode 100644
index 460fe5976..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/export_data/console_exporter.py
+++ /dev/null
@@ -1,107 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-from genai_perf.export_data.exporter_config import ExporterConfig
-from rich.console import Console
-from rich.table import Table
-
-
-class ConsoleExporter:
-    """
-    A class to export the statistics and arg values to the console.
-    """
-
-    STAT_COLUMN_KEYS = ["avg", "min", "max", "p99", "p90", "p75"]
-
-    def __init__(self, config: ExporterConfig):
-        self._stats = config.stats
-        self._metrics = config.metrics
-        self._args = config.args
-
-    def _get_title(self):
-        if self._args.endpoint_type == "embeddings":
-            return "Embeddings Metrics"
-        elif self._args.endpoint_type == "rankings":
-            return "Rankings Metrics"
-        else:
-            return "LLM Metrics"
-
-    def export(self) -> None:
-        table = Table(title=self._get_title())
-
-        table.add_column("Statistic", justify="right", style="cyan", no_wrap=True)
-        for stat in self.STAT_COLUMN_KEYS:
-            table.add_column(stat, justify="right", style="green")
-
-        # Request metrics table
-        self._construct_table(table)
-
-        console = Console()
-        console.print(table)
-
-        # System metrics are printed after the table
-        for metric in self._metrics.system_metrics:
-            line = metric.name.replace("_", " ").capitalize()
-            value = self._stats[metric.name]["avg"]
-            line += f" ({metric.unit}): {value:.2f}"
-            print(line)
-
-    def _construct_table(self, table: Table) -> None:
-        for metric in self._metrics.request_metrics:
-            if self._should_skip(metric.name):
-                continue
-
-            metric_str = metric.name.replace("_", " ").capitalize()
-            metric_str += f" ({metric.unit})" if metric.unit != "tokens" else ""
-            row_values = [metric_str]
-            for stat in self.STAT_COLUMN_KEYS:
-                value = self._stats[metric.name][stat]
-                row_values.append(f"{value:,.2f}")
-
-            table.add_row(*row_values)
-
-    # (TMA-1976) Refactor this method as the csv exporter shares identical method.
-    def _should_skip(self, metric_name: str) -> bool:
-        if self._args.endpoint_type == "embeddings":
-            return False  # skip nothing
-
-        # TODO (TMA-1712): need to decide if we need this metric. Remove
-        # from statistics display for now.
-        # TODO (TMA-1678): output_token_throughput_per_request is treated
-        # separately since the current code treats all throughput metrics to
-        # be displayed outside of the statistics table.
-        if metric_name == "output_token_throughput_per_request":
-            return True
-
-        # When non-streaming, skip ITL and TTFT
-        streaming_metrics = [
-            "inter_token_latency",
-            "time_to_first_token",
-        ]
-        if not self._args.streaming and metric_name in streaming_metrics:
-            return True
-        return False
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/export_data/csv_exporter.py b/src/c++/perf_analyzer/genai-perf/genai_perf/export_data/csv_exporter.py
deleted file mode 100644
index efbb9b754..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/export_data/csv_exporter.py
+++ /dev/null
@@ -1,119 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-import csv
-
-import genai_perf.logging as logging
-from genai_perf.export_data.exporter_config import ExporterConfig
-
-DEFAULT_OUTPUT_DATA_CSV = "profile_export_genai_perf.csv"
-
-logger = logging.getLogger(__name__)
-
-
-class CsvExporter:
-    """
-    A class to export the statistics and arg values in a csv format.
-    """
-
-    REQUEST_METRICS_HEADER = [
-        "Metric",
-        "avg",
-        "min",
-        "max",
-        "p99",
-        "p95",
-        "p90",
-        "p75",
-        "p50",
-        "p25",
-    ]
-
-    SYSTEM_METRICS_HEADER = [
-        "Metric",
-        "Value",
-    ]
-
-    def __init__(self, config: ExporterConfig):
-        self._stats = config.stats
-        self._metrics = config.metrics
-        self._output_dir = config.artifact_dir
-        self._args = config.args
-
-    def export(self) -> None:
-        csv_filename = self._output_dir / DEFAULT_OUTPUT_DATA_CSV
-        logger.info(f"Generating {csv_filename}")
-
-        with open(csv_filename, mode="w", newline="") as csvfile:
-            csv_writer = csv.writer(csvfile)
-            self._write_request_metrics(csv_writer)
-            csv_writer.writerow([])
-            self._write_system_metrics(csv_writer)
-
-    def _write_request_metrics(self, csv_writer) -> None:
-        csv_writer.writerow(self.REQUEST_METRICS_HEADER)
-        for metric in self._metrics.request_metrics:
-            if self._should_skip(metric.name):
-                continue
-
-            metric_str = metric.name.replace("_", " ").title()
-            metric_str += f" ({metric.unit})" if metric.unit != "tokens" else ""
-            row_values = [metric_str]
-            for stat in self.REQUEST_METRICS_HEADER[1:]:
-                value = self._stats[metric.name][stat]
-                row_values.append(f"{value:,.2f}")
-
-            csv_writer.writerow(row_values)
-
-    def _write_system_metrics(self, csv_writer) -> None:
-        csv_writer.writerow(self.SYSTEM_METRICS_HEADER)
-        for metric in self._metrics.system_metrics:
-            metric_str = metric.name.replace("_", " ").title()
-            metric_str += f" ({metric.unit})"
-            value = self._stats[metric.name]["avg"]
-            csv_writer.writerow([metric_str, f"{value:.2f}"])
-
-    def _should_skip(self, metric_name: str) -> bool:
-        if self._args.endpoint_type == "embeddings":
-            return False  # skip nothing
-
-        # TODO (TMA-1712): need to decide if we need this metric. Remove
-        # from statistics display for now.
-        # TODO (TMA-1678): output_token_throughput_per_request is treated
-        # separately since the current code treats all throughput metrics to
-        # be displayed outside of the statistics table.
-        if metric_name == "output_token_throughput_per_request":
-            return True
-
-        # When non-streaming, skip ITL and TTFT
-        streaming_metrics = [
-            "inter_token_latency",
-            "time_to_first_token",
-        ]
-        if not self._args.streaming and metric_name in streaming_metrics:
-            return True
-        return False
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/export_data/data_exporter_factory.py b/src/c++/perf_analyzer/genai-perf/genai_perf/export_data/data_exporter_factory.py
deleted file mode 100644
index ac226bdf5..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/export_data/data_exporter_factory.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from typing import List
-
-from genai_perf.export_data.console_exporter import ConsoleExporter
-from genai_perf.export_data.csv_exporter import CsvExporter
-from genai_perf.export_data.exporter_config import ExporterConfig
-from genai_perf.export_data.json_exporter import JsonExporter
-
-DataExporterList = [ConsoleExporter, JsonExporter, CsvExporter]
-
-
-class DataExporterFactory:
-    def create_data_exporters(self, config: ExporterConfig) -> List:
-        data_exporters = []
-        for exporter in DataExporterList:
-            data_exporters.append(exporter(config))
-        return data_exporters
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/export_data/data_exporter_interface.py b/src/c++/perf_analyzer/genai-perf/genai_perf/export_data/data_exporter_interface.py
deleted file mode 100644
index 56bde9a53..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/export_data/data_exporter_interface.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-from typing import Protocol
-
-
-class DataExporterInterface(Protocol):
-    def export(self):
-        pass
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/export_data/exporter_config.py b/src/c++/perf_analyzer/genai-perf/genai_perf/export_data/exporter_config.py
deleted file mode 100644
index 0d9c7cd0b..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/export_data/exporter_config.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-from genai_perf.metrics import Metrics
-
-
-class ExporterConfig:
-    def __init__(self):
-        self._stats = None
-        self._metrics = None
-        self._args = None
-        self._extra_inputs = None
-        self._artifact_dir = None
-
-    @property
-    def stats(self):
-        return self._stats
-
-    @stats.setter
-    def stats(self, stats_value):
-        self._stats = stats_value
-
-    @property
-    def metrics(self):
-        return self._metrics
-
-    @metrics.setter
-    def metrics(self, metrics: Metrics):
-        self._metrics = metrics
-
-    @property
-    def args(self):
-        return self._args
-
-    @args.setter
-    def args(self, args_value):
-        self._args = args_value
-
-    @property
-    def extra_inputs(self):
-        return self._extra_inputs
-
-    @extra_inputs.setter
-    def extra_inputs(self, extra_inputs_value):
-        self._extra_inputs = extra_inputs_value
-
-    @property
-    def artifact_dir(self):
-        return self._artifact_dir
-
-    @artifact_dir.setter
-    def artifact_dir(self, artifact_dir_value):
-        self._artifact_dir = artifact_dir_value
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/export_data/json_exporter.py b/src/c++/perf_analyzer/genai-perf/genai_perf/export_data/json_exporter.py
deleted file mode 100644
index 2ec24fae1..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/export_data/json_exporter.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-import json
-from enum import Enum
-from typing import Dict
-
-import genai_perf.logging as logging
-from genai_perf.export_data.exporter_config import ExporterConfig
-
-DEFAULT_OUTPUT_DATA_JSON = "profile_export_genai_perf.json"
-
-logger = logging.getLogger(__name__)
-
-
-class JsonExporter:
-    """
-    A class to export the statistics and arg values in a json format.
-    """
-
-    def __init__(self, config: ExporterConfig):
-        self._stats: Dict = config.stats
-        self._args = dict(vars(config.args))
-        self._extra_inputs = config.extra_inputs
-        self._output_dir = config.artifact_dir
-        self._stats_and_args: Dict = {}
-        self._prepare_args_for_export()
-        self._merge_stats_and_args()
-
-    def export(self) -> None:
-        filename = self._output_dir / DEFAULT_OUTPUT_DATA_JSON
-        logger.info(f"Generating {filename}")
-        with open(str(filename), "w") as f:
-            f.write(json.dumps(self._stats_and_args, indent=2))
-
-    def _prepare_args_for_export(self) -> None:
-        self._args.pop("func", None)
-        self._args.pop("output_format", None)
-        self._args.pop("input_file", None)
-        self._args["profile_export_file"] = str(self._args["profile_export_file"])
-        self._args["artifact_dir"] = str(self._args["artifact_dir"])
-        for k, v in self._args.items():
-            if isinstance(v, Enum):
-                self._args[k] = v.name.lower()
-        self._add_extra_inputs_to_args()
-
-    def _add_extra_inputs_to_args(self) -> None:
-        del self._args["extra_inputs"]
-        self._args.update({"extra_inputs": self._extra_inputs})
-
-    def _merge_stats_and_args(self) -> None:
-        self._stats_and_args = dict(self._stats)
-        self._stats_and_args.update({"input_config": self._args})
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/export_data/output_reporter.py b/src/c++/perf_analyzer/genai-perf/genai_perf/export_data/output_reporter.py
deleted file mode 100644
index ec8123b95..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/export_data/output_reporter.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-from argparse import Namespace
-
-from genai_perf.export_data.data_exporter_factory import DataExporterFactory
-from genai_perf.export_data.exporter_config import ExporterConfig
-from genai_perf.metrics import Statistics
-from genai_perf.parser import get_extra_inputs_as_dict
-
-
-class OutputReporter:
-    """
-    A class to orchestrate output generation.
-    """
-
-    def __init__(self, stats: Statistics, args: Namespace):
-        self.args = args
-        self.stats = stats
-        self.stats.scale_data()
-
-    def report_output(self) -> None:
-        factory = DataExporterFactory()
-        exporter_config = self._create_exporter_config()
-        data_exporters = factory.create_data_exporters(exporter_config)
-
-        for exporter in data_exporters:
-            exporter.export()
-
-    def _create_exporter_config(self) -> ExporterConfig:
-        config = ExporterConfig()
-        config.stats = self.stats.stats_dict
-        config.metrics = self.stats.metrics
-        config.args = self.args
-        config.artifact_dir = self.args.artifact_dir
-        config.extra_inputs = get_extra_inputs_as_dict(self.args)
-        return config
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/__init__.py b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/__init__.py
deleted file mode 100644
index c6959fce1..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/farewell.txt b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/farewell.txt
deleted file mode 100644
index cfbe41a7c..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/farewell.txt
+++ /dev/null
@@ -1,104 +0,0 @@
-The period for a new election of a citizen to
-administer the executive government of the United
-States being not far distant, and the time actually
-arrived when your thoughts must be employed in
-designating the person who is to be clothed with that
-important trust, it appears to me proper, especially as
-it may conduce to a more distinct expression of the
-public voice, that I should now apprise you of the
-resolution I have formed, to decline being considered
-among the number of those out of whom a choice is to be made.
-I beg you, at the same time, to do me the justice to
-be assured that this resolution has not been taken
-without a strict regard to all the considerations
-appertaining to the relation which binds a dutiful
-citizen to his country—and that, in withdrawing the
-tender of service which silence in my situation might
-imply, I am influenced by no diminution of zeal for
-your future interest, no deficiency of grateful respect
-for your past kindness; but am supported by a full
-conviction that the step is compatible with both.
-The acceptance of, and continuance hitherto in, the
-office to which your suffrages have twice called me,
-have been a uniform sacrifice of inclination to the
-opinion of duty and to a deference for what appeared
-to be your desire. I constantly hoped that it would
-have been much earlier in my power, consistently with
-motives which I was not at liberty to disregard, to
-return to that retirement from which I had been
-reluctantly drawn. The strength of my inclination to
-do this, previous to the last election, had even led to
-the preparation of an address to declare it to you; but
-mature reflection on the then perplexed and critical
-posture of our affairs with foreign nations, and the
-unanimous advice of persons entitled to my
-confidence, impelled me to abandon the idea.
-I rejoice that the state of your concerns, external as
-well as internal, no longer renders the pursuit of
-inclination incompatible with the sentiment of duty or
-propriety, and am persuaded whatever partiality may
-be retained for my services, that in the present
-circumstances of our country, you will not disapprove
-my determination to retire.
-The impressions with which I first undertook the
-arduous trust were explained on the proper occasion.
-In the discharge of this trust, I will only say that I
-have, with good intentions, contributed towards the
-organization and administration of the government,
-the best exertions of which a very fallible judgment
-was capable. Not unconscious in the outset of the
-inferiority of my qualifications, experience in my
-own eyes, perhaps still more in the eyes of others,
-has strengthened the motives to diffidence of myself;
-and every day the increasing weight of years
-admonishes me more and more that the shade of
-retirement is as necessary to me as it will be
-welcome. Satisfied that if any circumstances have
-given peculiar value to my services, they were
-temporary, I have the consolation to believe, that
-while choice and prudence invite me to quit the
-political scene, patriotism does not forbid it.
-In looking forward to the moment which is
-intended to terminate the career of my public life, my
-feelings do not permit me to suspend the deep
-acknowledgment of that debt of gratitude which I
-owe to my beloved country for the many honors it has
-conferred upon me; still more for the steadfast
-confidence with which it has supported me; and for
-the opportunities I have thence enjoyed of manifesting
-my inviolable attachment, by services faithful and
-persevering, though in usefulness unequal to my zeal.
-If benefits have resulted to our country from these
-services, let it always be remembered to your praise,
-and as an instructive example in our annals that
-under circumstances in which the passions agitated in
-every direction were liable to mislead, amidst
-appearances sometimes dubious, vicissitudes of
-fortune often discouraging, in situations in which not
-unfrequently want of success has countenanced the
-spirit of criticism, the constancy of your support was
-the essential prop of the efforts, and a guarantee of
-the plans by which they were effected. Profoundly
-penetrated with this idea, I shall carry it with me to
-my grave, as a strong incitement to unceasing vows
-that Heaven may continue to you the choicest tokens
-of its beneficence; that your Union and brotherly
-affection may be perpetual; that the free constitution,
-which is the work of your hands, may be sacredly
-maintained; that its administration in every
-department may be stamped with wisdom and virtue;
-that, in fine, the happiness of the people of these
-states, under the auspices of liberty, may be made
-complete by so careful a preservation and so prudent
-a use of this blessing as will acquire to them the glory
-of recommending it to the applause, the affection,
-and adoption of every nation which is yet a stranger to it.
-Here, perhaps, I ought to stop. But a solicitude for
-your welfare, which cannot end but with my life, and
-the apprehension of danger, natural to that
-solicitude, urge me on an occasion like the present,
-to offer to your solemn contemplation, and to
-recommend to your frequent review, some sentiments
-which are the result of much reflection, of no
-inconsiderable observation, and which appear to me
-all important to the permanency of your felicity as a
\ No newline at end of file
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py
deleted file mode 100644
index 057c33562..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py
+++ /dev/null
@@ -1,1585 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import random
-from copy import deepcopy
-from enum import Enum, auto
-from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple, cast
-
-import requests
-from genai_perf import utils
-from genai_perf.constants import CNN_DAILY_MAIL, DEFAULT_INPUT_DATA_JSON, OPEN_ORCA
-from genai_perf.exceptions import GenAIPerfException
-from genai_perf.llm_inputs.synthetic_image_generator import (
-    ImageFormat,
-    SyntheticImageGenerator,
-)
-from genai_perf.llm_inputs.synthetic_prompt_generator import SyntheticPromptGenerator
-from genai_perf.tokenizer import DEFAULT_TOKENIZER, Tokenizer, get_tokenizer
-from genai_perf.utils import load_json_str
-from PIL import Image
-from requests import Response
-
-
-class ModelSelectionStrategy(Enum):
-    ROUND_ROBIN = auto()
-    RANDOM = auto()
-
-
-class PromptSource(Enum):
-    SYNTHETIC = auto()
-    DATASET = auto()
-    FILE = auto()
-
-
-class OutputFormat(Enum):
-    OPENAI_CHAT_COMPLETIONS = auto()
-    OPENAI_COMPLETIONS = auto()
-    OPENAI_EMBEDDINGS = auto()
-    OPENAI_VISION = auto()
-    RANKINGS = auto()
-    TENSORRTLLM = auto()
-    VLLM = auto()
-
-    def to_lowercase(self):
-        return self.name.lower()
-
-
-class LlmInputs:
-    """
-    A library of methods that control the generation of LLM Inputs
-    """
-
-    OPEN_ORCA_URL = "https://datasets-server.huggingface.co/rows?dataset=Open-Orca%2FOpenOrca&config=default&split=train"
-    CNN_DAILYMAIL_URL = "https://datasets-server.huggingface.co/rows?dataset=cnn_dailymail&config=1.0.0&split=train"
-
-    DEFAULT_STARTING_INDEX = 0
-    MINIMUM_STARTING_INDEX = 0
-
-    DEFAULT_LENGTH = 100
-    MINIMUM_LENGTH = 1
-
-    DEFAULT_TENSORRTLLM_MAX_TOKENS = 256
-
-    DEFAULT_BATCH_SIZE = 1
-    DEFAULT_RANDOM_SEED = 0
-    DEFAULT_PROMPT_TOKENS_MEAN = 550
-    DEFAULT_PROMPT_TOKENS_STDDEV = 0
-    DEFAULT_OUTPUT_TOKENS_MEAN = -1
-    DEFAULT_OUTPUT_TOKENS_STDDEV = 0
-    DEFAULT_NUM_PROMPTS = 100
-
-    DEFAULT_IMAGE_WIDTH_MEAN = 100
-    DEFAULT_IMAGE_WIDTH_STDDEV = 0
-    DEFAULT_IMAGE_HEIGHT_MEAN = 100
-    DEFAULT_IMAGE_HEIGHT_STDDEV = 0
-
-    EMPTY_JSON_IN_VLLM_PA_FORMAT: Dict = {"data": []}
-    EMPTY_JSON_IN_TENSORRTLLM_PA_FORMAT: Dict = {"data": []}
-    EMPTY_JSON_IN_OPENAI_PA_FORMAT: Dict = {"data": []}
-
-    dataset_url_map = {OPEN_ORCA: OPEN_ORCA_URL, CNN_DAILY_MAIL: CNN_DAILYMAIL_URL}
-
-    @classmethod
-    def create_llm_inputs(
-        cls,
-        input_type: PromptSource,
-        output_format: OutputFormat,
-        dataset_name: str = "",
-        model_name: list = [],
-        model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
-        input_filename: Optional[Path] = Path(""),
-        starting_index: int = DEFAULT_STARTING_INDEX,
-        length: int = DEFAULT_LENGTH,
-        output_tokens_mean: int = DEFAULT_OUTPUT_TOKENS_MEAN,
-        output_tokens_stddev: int = DEFAULT_OUTPUT_TOKENS_STDDEV,
-        output_tokens_deterministic: bool = False,
-        prompt_tokens_mean: int = DEFAULT_PROMPT_TOKENS_MEAN,
-        prompt_tokens_stddev: int = DEFAULT_PROMPT_TOKENS_STDDEV,
-        image_width_mean: int = DEFAULT_IMAGE_WIDTH_MEAN,
-        image_width_stddev: int = DEFAULT_IMAGE_WIDTH_STDDEV,
-        image_height_mean: int = DEFAULT_IMAGE_HEIGHT_MEAN,
-        image_height_stddev: int = DEFAULT_IMAGE_HEIGHT_STDDEV,
-        image_format: ImageFormat = ImageFormat.PNG,
-        random_seed: int = DEFAULT_RANDOM_SEED,
-        num_of_output_prompts: int = DEFAULT_NUM_PROMPTS,
-        add_model_name: bool = False,
-        add_stream: bool = False,
-        tokenizer: Tokenizer = get_tokenizer(DEFAULT_TOKENIZER),
-        extra_inputs: Optional[Dict] = None,
-        batch_size: int = 1,
-        output_dir: Path = Path(""),
-    ) -> Dict:
-        """
-        Given an input type, input format, and output type. Output a string of LLM Inputs
-        (in a JSON dictionary) to a file
-
-        Required Parameters
-        -------------------
-        input_type:
-            Specify how the input is received
-        output_format:
-            Specify the output format
-
-        Optional Parameters
-        -------------------
-        dataset_name:
-            The name of the dataset
-        model_name:
-            The model name
-        starting_index:
-            Offset from within the list to start gathering inputs
-        length:
-            Number of entries to gather
-        add_model_name:
-            If true, adds a model name field to each payload
-        add_stream:
-            If true, adds a steam field to each payload
-        extra_inputs:
-            If provided, append these inputs to every request
-        output_tokens_mean:
-            The mean length of the output to generate. If not using fixed output lengths, this should be set to -1.
-        output_tokens_stddev:
-            The standard deviation of the length of the output to generate. This is only used if output_tokens_mean is provided.
-        output_tokens_deterministic:
-            If true, the output tokens will set the minimum and maximum tokens to be equivalent.
-        image_width_mean:
-            The mean width of images when generating synthetic image data.
-        image_width_stddev:
-            The standard deviation of width of images when generating synthetic image data.
-        image_height_mean:
-            The mean height of images when generating synthetic image data.
-        image_height_stddev:
-            The standard deviation of height of images when generating synthetic image data.
-        image_format:
-            The compression format of the images.
-        batch_size:
-            The number of inputs per request (currently only used for the embeddings and rankings endpoints)
-
-        Required Synthetic Prompt Generation Parameters
-        -----------------------------------------------
-        tokenizer:
-           The tokenizer to use when generating synthetic prompts
-
-        Optional Synthetic Prompt Generation Parameters
-        -----------------------------------------------
-        prompt_tokens_mean:
-            The mean length of the prompt to generate
-        prompt_tokens_stddev:
-            The standard deviation of the length of the prompt to generate
-        num_of_output_prompts:
-            The number of synthetic output prompts to generate
-        random_seed:
-            Seed used to generate random values
-        """
-
-        cls._check_for_valid_args(
-            input_type, dataset_name, starting_index, length, tokenizer
-        )
-
-        random.seed(random_seed)
-
-        generic_dataset_json = cls.get_generic_dataset_json(
-            input_type,
-            output_format,
-            dataset_name,
-            starting_index,
-            length,
-            tokenizer,
-            prompt_tokens_mean,
-            prompt_tokens_stddev,
-            num_of_output_prompts,
-            image_width_mean,
-            image_width_stddev,
-            image_height_mean,
-            image_height_stddev,
-            image_format,
-            batch_size,
-            input_filename,
-        )
-
-        if extra_inputs is None:
-            extra_inputs = {}
-
-        json_in_pa_format = cls._convert_generic_json_to_output_format(
-            output_format,
-            generic_dataset_json,
-            add_model_name,
-            add_stream,
-            extra_inputs,
-            output_tokens_mean,
-            output_tokens_stddev,
-            output_tokens_deterministic,
-            model_name,
-            model_selection_strategy,
-        )
-        cls._write_json_to_file(json_in_pa_format, output_dir)
-
-        return json_in_pa_format
-
-    @classmethod
-    def get_generic_dataset_json(
-        cls,
-        input_type: PromptSource,
-        output_format: OutputFormat,
-        dataset_name: str,
-        starting_index: int,
-        length: int,
-        tokenizer: Tokenizer,
-        prompt_tokens_mean: int,
-        prompt_tokens_stddev: int,
-        num_of_output_prompts: int,
-        image_width_mean: int,
-        image_width_stddev: int,
-        image_height_mean: int,
-        image_height_stddev: int,
-        image_format: ImageFormat,
-        batch_size: int,
-        input_filename: Optional[Path],
-    ) -> Dict:
-        """
-        Retrieve and convert the dataset based on the input type.
-
-        Parameters
-        ----------
-        input_type:
-            Specify how the input is received
-        output_format:
-            Specify the output format
-        dataset_name:
-            The name of the dataset
-        starting_index:
-            Offset from within the list to start gathering inputs
-        length:
-            Number of entries to gather
-        tokenizer:
-            The tokenizer to use when generating synthetic prompts
-        prompt_tokens_mean:
-            The mean length of the prompt to generate
-        prompt_tokens_stddev:
-            The standard deviation of the length of the prompt to generate
-        num_of_output_prompts:
-            The number of synthetic output prompts to generate
-        image_width_mean:
-            The mean width of images when generating synthetic image data.
-        image_width_stddev:
-            The standard deviation of width of images when generating synthetic image data.
-        image_height_mean:
-            The mean height of images when generating synthetic image data.
-        image_height_stddev:
-            The standard deviation of height of images when generating synthetic image data.
-        image_format:
-            The compression format of the images.
-        batch_size:
-            The number of inputs per request (currently only used for the embeddings and rankings endpoints)
-        input_filename:
-            The path to the input file containing the prompts in JSONL format.
-        Returns
-        -------
-        Dict:
-            The generic dataset JSON
-        """
-
-        if output_format == OutputFormat.OPENAI_EMBEDDINGS:
-            if input_type != PromptSource.FILE:
-                raise GenAIPerfException(
-                    f"{OutputFormat.OPENAI_EMBEDDINGS.to_lowercase()} only supports a file as input."
-                )
-            input_filename = cast(Path, input_filename)
-            input_file_dataset = cls._get_input_dataset_from_embeddings_file(
-                input_filename,
-                batch_size,
-                num_of_output_prompts,
-            )
-            generic_dataset_json = (
-                cls._convert_input_synthetic_or_file_dataset_to_generic_json(
-                    input_file_dataset
-                )
-            )
-        elif output_format == OutputFormat.RANKINGS:
-            if input_type != PromptSource.FILE:
-                raise GenAIPerfException(
-                    f"{OutputFormat.RANKINGS.to_lowercase()} only supports a directory as input."
-                )
-            queries_filename = cast(Path, input_filename) / "queries.jsonl"
-            passages_filename = cast(Path, input_filename) / "passages.jsonl"
-            input_file_dataset = cls._get_input_dataset_from_rankings_files(
-                queries_filename, passages_filename, batch_size, num_of_output_prompts
-            )
-
-            generic_dataset_json = (
-                cls._convert_input_synthetic_or_file_dataset_to_generic_json(
-                    input_file_dataset
-                )
-            )
-        else:
-            if input_type == PromptSource.DATASET:
-                # (TMA-1990) support VLM input from public dataset
-                if output_format == OutputFormat.OPENAI_VISION:
-                    raise GenAIPerfException(
-                        f"{OutputFormat.OPENAI_VISION.to_lowercase()} currently "
-                        "does not support dataset as input."
-                    )
-                dataset = cls._get_input_dataset_from_url(
-                    dataset_name, starting_index, length
-                )
-                generic_dataset_json = cls._convert_input_url_dataset_to_generic_json(
-                    dataset
-                )
-            elif input_type == PromptSource.SYNTHETIC:
-                synthetic_dataset = cls._get_input_dataset_from_synthetic(
-                    tokenizer,
-                    prompt_tokens_mean,
-                    prompt_tokens_stddev,
-                    num_of_output_prompts,
-                    image_width_mean,
-                    image_width_stddev,
-                    image_height_mean,
-                    image_height_stddev,
-                    image_format,
-                    output_format,
-                )
-                generic_dataset_json = (
-                    cls._convert_input_synthetic_or_file_dataset_to_generic_json(
-                        synthetic_dataset
-                    )
-                )
-            elif input_type == PromptSource.FILE:
-                input_filename = cast(Path, input_filename)
-                input_file_dataset = cls._get_input_dataset_from_file(input_filename)
-                input_file_dataset = cls._encode_images_in_input_dataset(
-                    input_file_dataset
-                )
-                generic_dataset_json = (
-                    cls._convert_input_synthetic_or_file_dataset_to_generic_json(
-                        input_file_dataset
-                    )
-                )
-            else:
-                raise GenAIPerfException("Input source is not recognized.")
-
-            # When the generic_dataset_json contains multi-modal data (e.g. images),
-            # convert the format of the content to OpenAI multi-modal format:
-            # see https://platform.openai.com/docs/guides/vision
-            if output_format == OutputFormat.OPENAI_VISION:
-                generic_dataset_json = cls._convert_to_openai_multi_modal_content(
-                    generic_dataset_json
-                )
-
-        return generic_dataset_json
-
-    @classmethod
-    def _get_input_dataset_from_embeddings_file(
-        cls, input_filename: Path, batch_size: int, num_prompts: int
-    ) -> Dict[str, Any]:
-        with open(input_filename, "r") as file:
-            file_content = [load_json_str(line) for line in file]
-
-        texts = [item["text"] for item in file_content]
-
-        if batch_size > len(texts):
-            raise ValueError(
-                "Batch size cannot be larger than the number of available texts"
-            )
-
-        dataset_json: Dict[str, Any] = {}
-        dataset_json["features"] = [{"name": "input"}]
-        dataset_json["rows"] = []
-
-        for _ in range(num_prompts):
-            sampled_texts = random.sample(texts, batch_size)
-            dataset_json["rows"].append({"row": {"payload": {"input": sampled_texts}}})
-
-        return dataset_json
-
-    @classmethod
-    def _get_input_dataset_from_rankings_files(
-        cls,
-        queries_filename: Path,
-        passages_filename: Path,
-        batch_size: int,
-        num_prompts: int,
-    ) -> Dict[str, Any]:
-
-        with open(queries_filename, "r") as file:
-            queries_content = [load_json_str(line) for line in file]
-        queries_texts = [item for item in queries_content]
-
-        with open(passages_filename, "r") as file:
-            passages_content = [load_json_str(line) for line in file]
-        passages_texts = [item for item in passages_content]
-
-        if batch_size > len(passages_texts):
-            raise ValueError(
-                "Batch size cannot be larger than the number of available passages"
-            )
-
-        dataset_json: Dict[str, Any] = {}
-        dataset_json["features"] = [{"name": "input"}]
-        dataset_json["rows"] = []
-
-        for _ in range(num_prompts):
-            sampled_texts = random.sample(passages_texts, batch_size)
-            query_sample = random.choice(queries_texts)
-            entry_dict: Dict = {}
-            entry_dict["query"] = query_sample
-            entry_dict["passages"] = sampled_texts
-            dataset_json["rows"].append({"row": {"payload": entry_dict}})
-        return dataset_json
-
-    @classmethod
-    def _check_for_valid_args(
-        cls,
-        input_type: PromptSource,
-        dataset_name: str,
-        starting_index: int,
-        length: int,
-        tokenizer: Tokenizer,
-    ) -> None:
-        try:
-            cls._check_for_dataset_name_if_input_type_is_url(input_type, dataset_name)
-            cls._check_for_tokenzier_if_input_type_is_synthetic(input_type, tokenizer)
-            cls._check_for_valid_starting_index(starting_index)
-            cls._check_for_valid_length(length)
-
-        except Exception as e:
-            raise GenAIPerfException(e)
-
-    @classmethod
-    def _get_input_dataset_from_url(
-        cls, dataset_name: str, starting_index: int, length: int
-    ) -> Response:
-        url = cls._resolve_url(dataset_name)
-        configured_url = cls._create_configured_url(url, starting_index, length)
-        dataset = cls._download_dataset(configured_url)
-
-        return dataset
-
-    @classmethod
-    def _get_input_dataset_from_synthetic(
-        cls,
-        tokenizer: Tokenizer,
-        prompt_tokens_mean: int,
-        prompt_tokens_stddev: int,
-        num_of_output_prompts: int,
-        image_width_mean: int,
-        image_width_stddev: int,
-        image_height_mean: int,
-        image_height_stddev: int,
-        image_format: ImageFormat,
-        output_format: OutputFormat,
-    ) -> Dict[str, Any]:
-        dataset_json: Dict[str, Any] = {}
-        dataset_json["features"] = [{"name": "text_input"}]
-        dataset_json["rows"] = []
-        for _ in range(num_of_output_prompts):
-            row: Dict["str", Any] = {"row": {}}
-            synthetic_prompt = cls._create_synthetic_prompt(
-                tokenizer,
-                prompt_tokens_mean,
-                prompt_tokens_stddev,
-            )
-            row["row"]["text_input"] = synthetic_prompt
-
-            if output_format == OutputFormat.OPENAI_VISION:
-                synthetic_image = cls._create_synthetic_image(
-                    image_width_mean=image_width_mean,
-                    image_width_stddev=image_width_stddev,
-                    image_height_mean=image_height_mean,
-                    image_height_stddev=image_height_stddev,
-                    image_format=image_format,
-                )
-                row["row"]["image"] = synthetic_image
-
-            dataset_json["rows"].append(row)
-
-        return dataset_json
-
-    @classmethod
-    def _resolve_url(cls, dataset_name: str) -> str:
-        if dataset_name in cls.dataset_url_map:
-            return cls.dataset_url_map[dataset_name]
-        else:
-            raise GenAIPerfException(
-                f"{dataset_name} does not have a corresponding URL in the dataset_url_map."
-            )
-
-    @classmethod
-    def _create_configured_url(cls, url: str, starting_index: int, length: int) -> str:
-        starting_index_str = str(starting_index)
-        length_str = str(length)
-        configured_url = url + f"&offset={starting_index_str}&length={length_str}"
-
-        return configured_url
-
-    @classmethod
-    def _download_dataset(cls, configured_url: str) -> Response:
-        dataset = cls._query_server(configured_url)
-
-        return dataset
-
-    @classmethod
-    def _convert_input_url_dataset_to_generic_json(cls, dataset: Response) -> Dict:
-        dataset_json = dataset.json()
-        try:
-            cls._check_for_error_in_json_of_dataset(dataset_json)
-        except Exception as e:
-            raise GenAIPerfException(e)
-
-        generic_dataset_json = cls._convert_dataset_to_generic_input_json(dataset_json)
-
-        return generic_dataset_json
-
-    @classmethod
-    def _convert_input_synthetic_or_file_dataset_to_generic_json(
-        cls, dataset: Dict
-    ) -> Dict[str, List[Dict]]:
-        generic_dataset_json = cls._convert_dataset_to_generic_input_json(dataset)
-
-        return generic_dataset_json
-
-    @classmethod
-    def _convert_dataset_to_generic_input_json(
-        cls, dataset_json: Dict
-    ) -> Dict[str, List[Dict]]:
-        generic_input_json = cls._add_features_to_generic_json({}, dataset_json)
-        generic_input_json = cls._add_rows_to_generic_json(
-            generic_input_json, dataset_json
-        )
-
-        return generic_input_json
-
-    @classmethod
-    def _add_features_to_generic_json(
-        cls, generic_input_json: Dict, dataset_json: Dict
-    ) -> Dict:
-        if "features" in dataset_json.keys():
-            generic_input_json["features"] = []
-            for feature in dataset_json["features"]:
-                generic_input_json["features"].append(feature["name"])
-
-        return generic_input_json
-
-    @classmethod
-    def _add_rows_to_generic_json(
-        cls, generic_input_json: Dict, dataset_json: Dict
-    ) -> Dict[str, List[Dict]]:
-        generic_input_json["rows"] = []
-        for row in dataset_json["rows"]:
-            generic_input_json["rows"].append(row["row"])
-
-        return generic_input_json
-
-    @classmethod
-    def _get_input_dataset_from_file(cls, input_filename: Path) -> Dict:
-        """
-        Reads the input prompts and images from a JSONL file and converts them
-        into the required dataset format.
-
-        Parameters
-        ----------
-        input_filename : Path
-            The path to the input file containing the prompts and/or images in
-            JSONL format.
-
-        Returns
-        -------
-        Dict
-            The dataset in the required format with the prompts and/or images
-            read from the file.
-        """
-        cls.verify_file(input_filename)
-        prompts, images = cls._get_prompts_from_input_file(input_filename)
-        dataset_json: Dict[str, Any] = {}
-        dataset_json["features"] = [{"name": "text_input"}]
-        dataset_json["rows"] = []
-        for prompt, image in zip(prompts, images):
-            content = {"text_input": prompt}
-            content.update({"image": image} if image else {})
-            dataset_json["rows"].append({"row": content})
-
-        return dataset_json
-
-    @classmethod
-    def _get_prompts_from_input_file(
-        cls, input_filename: Path
-    ) -> Tuple[List[str], List[str]]:
-        """
-        Reads the input prompts from a JSONL file and returns a list of prompts.
-
-        Parameters
-        ----------
-        input_filename : Path
-            The path to the input file containing the prompts in JSONL format.
-
-        Returns
-        -------
-        Tuple[List[str], List[str]]
-            A list of prompts and images read from the file.
-        """
-        prompts = []
-        images = []
-        with open(input_filename, mode="r", newline=None) as file:
-            for line in file:
-                if line.strip():
-                    prompts.append(load_json_str(line).get("text_input", "").strip())
-                    images.append(load_json_str(line).get("image", "").strip())
-        return prompts, images
-
-    @classmethod
-    def verify_file(cls, input_filename: Path) -> None:
-        if not input_filename.exists():
-            raise FileNotFoundError(f"The file '{input_filename}' does not exist.")
-
-    @classmethod
-    def _convert_to_openai_multi_modal_content(
-        cls, generic_dataset_json: Dict[str, List[Dict]]
-    ) -> Dict[str, List[Dict]]:
-        """
-        Converts to multi-modal content format of OpenAI Chat Completions API.
-        """
-        for row in generic_dataset_json["rows"]:
-            if row["image"]:
-                row["text_input"] = [
-                    {
-                        "type": "text",
-                        "text": row["text_input"],
-                    },
-                    {
-                        "type": "image_url",
-                        "image_url": {"url": row["image"]},
-                    },
-                ]
-
-        return generic_dataset_json
-
-    @classmethod
-    def _encode_images_in_input_dataset(cls, input_file_dataset: Dict) -> Dict:
-        for row in input_file_dataset["rows"]:
-            filename = row["row"].get("image")
-            if filename:
-                img = Image.open(filename)
-                if img.format.lower() not in utils.get_enum_names(ImageFormat):
-                    raise GenAIPerfException(
-                        f"Unsupported image format '{img.format}' of "
-                        f"the image '{filename}'."
-                    )
-
-                img_base64 = utils.encode_image(img, img.format)
-                payload = f"data:image/{img.format.lower()};base64,{img_base64}"
-                row["row"]["image"] = payload
-
-        return input_file_dataset
-
-    @classmethod
-    def _convert_generic_json_to_output_format(
-        cls,
-        output_format: OutputFormat,
-        generic_dataset: Dict,
-        add_model_name: bool,
-        add_stream: bool,
-        extra_inputs: Dict,
-        output_tokens_mean: int,
-        output_tokens_stddev: int,
-        output_tokens_deterministic: bool,
-        model_name: list = [],
-        model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
-    ) -> Dict:
-        if (
-            output_format == OutputFormat.OPENAI_CHAT_COMPLETIONS
-            or output_format == OutputFormat.OPENAI_VISION
-        ):
-            output_json = cls._convert_generic_json_to_openai_chat_completions_format(
-                generic_dataset,
-                add_model_name,
-                add_stream,
-                extra_inputs,
-                output_tokens_mean,
-                output_tokens_stddev,
-                output_tokens_deterministic,
-                model_name,
-                model_selection_strategy,
-            )
-        elif output_format == OutputFormat.OPENAI_COMPLETIONS:
-            output_json = cls._convert_generic_json_to_openai_completions_format(
-                generic_dataset,
-                add_model_name,
-                add_stream,
-                extra_inputs,
-                output_tokens_mean,
-                output_tokens_stddev,
-                output_tokens_deterministic,
-                model_name,
-                model_selection_strategy,
-            )
-        elif output_format == OutputFormat.OPENAI_EMBEDDINGS:
-            output_json = cls._convert_generic_json_to_openai_embeddings_format(
-                generic_dataset,
-                extra_inputs,
-                model_name,
-                model_selection_strategy,
-            )
-        elif output_format == OutputFormat.RANKINGS:
-            output_json = cls._convert_generic_json_to_rankings_format(
-                generic_dataset,
-                extra_inputs,
-                model_name,
-                model_selection_strategy,
-            )
-        elif output_format == OutputFormat.VLLM:
-            output_json = cls._convert_generic_json_to_vllm_format(
-                generic_dataset,
-                add_model_name,
-                add_stream,
-                extra_inputs,
-                output_tokens_mean,
-                output_tokens_stddev,
-                output_tokens_deterministic,
-                model_name,
-                model_selection_strategy,
-            )
-        elif output_format == OutputFormat.TENSORRTLLM:
-            output_json = cls._convert_generic_json_to_trtllm_format(
-                generic_dataset,
-                add_model_name,
-                add_stream,
-                extra_inputs,
-                output_tokens_mean,
-                output_tokens_stddev,
-                output_tokens_deterministic,
-                model_name,
-                model_selection_strategy,
-            )
-        else:
-            raise GenAIPerfException(
-                f"Output format {output_format} is not currently supported"
-            )
-
-        return output_json
-
-    @classmethod
-    def _convert_generic_json_to_openai_chat_completions_format(
-        cls,
-        dataset_json: Dict,
-        add_model_name: bool,
-        add_stream: bool,
-        extra_inputs: Dict,
-        output_tokens_mean: int,
-        output_tokens_stddev: int,
-        output_tokens_deterministic: bool,
-        model_name: list = [],
-        model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
-    ) -> Dict:
-        # TODO (TMA-1757): Implement a way to select a role for `text_input`
-        (
-            system_role_headers,
-            user_role_headers,
-            _,
-        ) = cls._determine_json_feature_roles(dataset_json)
-        pa_json = cls._populate_openai_chat_completions_output_json(
-            dataset_json,
-            system_role_headers,
-            user_role_headers,
-            add_model_name,
-            add_stream,
-            extra_inputs,
-            output_tokens_mean,
-            output_tokens_stddev,
-            output_tokens_deterministic,
-            model_name,
-            model_selection_strategy,
-        )
-
-        return pa_json
-
-    @classmethod
-    def _convert_generic_json_to_openai_completions_format(
-        cls,
-        dataset_json: Dict,
-        add_model_name: bool,
-        add_stream: bool,
-        extra_inputs: Dict,
-        output_tokens_mean: int,
-        output_tokens_stddev: int,
-        output_tokens_deterministic: bool,
-        model_name: list = [],
-        model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
-    ) -> Dict:
-        (
-            system_role_headers,
-            user_role_headers,
-            text_input_headers,
-        ) = cls._determine_json_feature_roles(dataset_json)
-        pa_json = cls._populate_openai_completions_output_json(
-            dataset_json,
-            system_role_headers,
-            user_role_headers,
-            text_input_headers,
-            add_model_name,
-            add_stream,
-            extra_inputs,
-            output_tokens_mean,
-            output_tokens_stddev,
-            output_tokens_deterministic,
-            model_name,
-            model_selection_strategy,
-        )
-
-        return pa_json
-
-    @classmethod
-    def _convert_generic_json_to_openai_embeddings_format(
-        cls,
-        generic_dataset: Dict,
-        extra_inputs: Dict,
-        model_name: list = [],
-        model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
-    ) -> Dict[str, Any]:
-        pa_json: Dict[str, Any] = {"data": []}
-
-        for index, entry in enumerate(generic_dataset["rows"]):
-            iter_model_name = cls._select_model_name(
-                model_name, index, model_selection_strategy
-            )
-            payload = entry.get("payload", {})
-            input_values = payload.get("input")
-
-            if input_values is None:
-                raise ValueError("Missing required fields 'input' in dataset entry")
-            if not isinstance(input_values, list):
-                raise ValueError(
-                    f"Required field 'input' must be a list (actual: {type(input_values)})"
-                )
-
-            payload = {
-                "input": input_values,
-                "model": iter_model_name,
-            }
-
-            for key, value in extra_inputs.items():
-                payload[key] = value
-
-            pa_json["data"].append({"payload": [payload]})
-
-        return pa_json
-
-    @classmethod
-    def contains_rankings_tei(cls, extra_inputs: Optional[Dict]) -> bool:
-        """
-        Check if user specified that they are using the Hugging Face
-        Text Embeddings Interface for ranking models
-        """
-        if extra_inputs and extra_inputs.get("rankings") == "tei":
-            return True
-        return False
-
-    @classmethod
-    def _convert_generic_json_to_rankings_format(
-        cls,
-        generic_dataset: Dict,
-        extra_inputs: Dict,
-        model_name: list = [],
-        model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
-    ) -> Dict[str, Any]:
-        pa_json: Dict[str, Any] = {"data": []}
-        use_tei_format = cls.contains_rankings_tei(extra_inputs)
-
-        for index, entry in enumerate(generic_dataset["rows"]):
-            iter_model_name = cls._select_model_name(
-                model_name, index, model_selection_strategy
-            )
-            payload = entry.get("payload", {})
-            query_values = payload.get("query")
-
-            if use_tei_format:
-                passage_values = payload.get("passages", [])
-                passage_values = [item.get("text", "") for item in passage_values]
-            else:
-                passage_values = payload.get("passages")
-
-            if query_values is None:
-                raise ValueError("Missing required fields 'query' in dataset entry")
-            if passage_values is None:
-                raise ValueError(
-                    f"Missing required fields '{'texts' if use_tei_format else 'passages'}' in dataset entry"
-                )
-            if not isinstance(passage_values, list):
-                raise ValueError(
-                    f"Required field '{'texts' if use_tei_format else 'passages'}' must be a list (actual: {type(passage_values)})"
-                )
-
-            if use_tei_format:
-                payload = {"query": query_values["text"], "texts": passage_values}
-            else:
-                payload = {
-                    "query": query_values,
-                    "passages": passage_values,
-                    "model": iter_model_name,
-                }
-
-            for key, value in extra_inputs.items():
-                if not (key == "rankings" and value == "tei"):
-                    payload[key] = value
-
-            pa_json["data"].append({"payload": [payload]})
-
-        return pa_json
-
-    @classmethod
-    def _convert_generic_json_to_vllm_format(
-        cls,
-        dataset_json: Dict,
-        add_model_name: bool,
-        add_stream: bool,
-        extra_inputs: Dict,
-        output_tokens_mean: int,
-        output_tokens_stddev: int,
-        output_tokens_deterministic: bool,
-        model_name: list = [],
-        model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
-    ) -> Dict:
-        (
-            system_role_headers,
-            user_role_headers,
-            text_input_headers,
-        ) = cls._determine_json_feature_roles(dataset_json)
-
-        pa_json = cls._populate_vllm_output_json(
-            dataset_json,
-            system_role_headers,
-            user_role_headers,
-            text_input_headers,
-            add_model_name,
-            add_stream,
-            extra_inputs,
-            output_tokens_mean,
-            output_tokens_stddev,
-            output_tokens_deterministic,
-            model_name,
-            model_selection_strategy,
-        )
-
-        return pa_json
-
-    @classmethod
-    def _convert_generic_json_to_trtllm_format(
-        cls,
-        dataset_json: Dict,
-        add_model_name: bool,
-        add_stream: bool,
-        extra_inputs: Dict,
-        output_tokens_mean: int,
-        output_tokens_stddev: int,
-        output_tokens_deterministic: bool,
-        model_name: list = [],
-        model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
-    ) -> Dict:
-        (
-            system_role_headers,
-            user_role_headers,
-            text_input_headers,
-        ) = cls._determine_json_feature_roles(dataset_json)
-
-        pa_json = cls._populate_trtllm_output_json(
-            dataset_json,
-            system_role_headers,
-            user_role_headers,
-            text_input_headers,
-            add_model_name,
-            add_stream,
-            extra_inputs,
-            output_tokens_mean,
-            output_tokens_stddev,
-            output_tokens_deterministic,
-            model_name,
-            model_selection_strategy,
-        )
-
-        return pa_json
-
-    @classmethod
-    def _write_json_to_file(cls, json_in_pa_format: Dict, output_dir: Path) -> None:
-        filename = output_dir / DEFAULT_INPUT_DATA_JSON
-        with open(str(filename), "w") as f:
-            f.write(json.dumps(json_in_pa_format, indent=2))
-
-    @classmethod
-    def _determine_json_feature_roles(
-        cls, dataset_json: Dict
-    ) -> Tuple[List[str], List[str], List[str]]:
-        SYSTEM_ROLE_LIST = ["system_prompt"]
-        USER_ROLE_LIST = ["question", "article"]
-        TEXT_INPUT_LIST = ["text_input"]
-
-        system_role_headers: List[str] = []
-        user_role_headers: List[str] = []
-        text_input_headers: List[str] = []
-
-        if "features" in dataset_json.keys():
-            # TODO (TPA-53) remove enumerate if index isnt useful
-            for index, feature in enumerate(dataset_json["features"]):
-                if feature in SYSTEM_ROLE_LIST:
-                    system_role_headers.append(feature)
-                if feature in USER_ROLE_LIST:
-                    user_role_headers.append(feature)
-                if feature in TEXT_INPUT_LIST:
-                    user_role_headers.append(feature)
-
-        assert (
-            system_role_headers is not None
-            or user_role_headers is not None
-            or text_input_headers is not None
-        )
-
-        return system_role_headers, user_role_headers, text_input_headers
-
-    @classmethod
-    def _select_model_name(cls, model_name, index, model_selection_strategy):
-        if model_selection_strategy == ModelSelectionStrategy.ROUND_ROBIN:
-            return model_name[index % len(model_name)]
-        elif model_selection_strategy == ModelSelectionStrategy.RANDOM:
-            return random.choice(model_name)
-        else:
-            raise GenAIPerfException(
-                f"Model selection strategy '{model_selection_strategy}' is unsupported"
-            )
-
-    @classmethod
-    def _populate_openai_chat_completions_output_json(
-        cls,
-        dataset_json: Dict,
-        system_role_headers: List[str],
-        user_role_headers: List[str],
-        add_model_name: bool,
-        add_stream: bool,
-        extra_inputs: Dict,
-        output_tokens_mean: int,
-        output_tokens_stddev: int,
-        output_tokens_deterministic: bool,
-        model_name: list = [],
-        model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
-    ) -> Dict:
-        pa_json = cls._create_empty_openai_pa_json()
-
-        for index, entry in enumerate(dataset_json["rows"]):
-            iter_model_name = cls._select_model_name(
-                model_name, index, model_selection_strategy
-            )
-            pa_json["data"].append({"payload": []})
-            pa_json["data"][index]["payload"].append({"messages": []})
-
-            for header, content in entry.items():
-                new_message = cls._create_new_openai_chat_completions_message(
-                    header, system_role_headers, user_role_headers, content
-                )
-
-                pa_json = cls._add_new_message_to_json(pa_json, index, new_message)
-
-            pa_json = cls._add_optional_tags_to_openai_json(
-                pa_json,
-                index,
-                add_model_name,
-                add_stream,
-                extra_inputs,
-                output_tokens_mean,
-                output_tokens_stddev,
-                output_tokens_deterministic,
-                iter_model_name,
-            )
-
-        return pa_json
-
-    @classmethod
-    def _populate_openai_completions_output_json(
-        cls,
-        dataset_json: Dict,
-        system_role_headers: List[str],
-        user_role_headers: List[str],
-        text_input_headers: List[str],
-        add_model_name: bool,
-        add_stream: bool,
-        extra_inputs: Dict,
-        output_tokens_mean: int,
-        output_tokens_stddev: int,
-        output_tokens_deterministic: bool,
-        model_name: list = [],
-        model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
-    ) -> Dict:
-        pa_json = cls._create_empty_openai_pa_json()
-
-        for index, entry in enumerate(dataset_json["rows"]):
-            iter_model_name = cls._select_model_name(
-                model_name, index, model_selection_strategy
-            )
-            pa_json["data"].append({"payload": []})
-            pa_json["data"][index]["payload"].append({"prompt": ""})
-
-            for header, content in entry.items():
-                new_prompt = cls._create_new_prompt(
-                    header,
-                    system_role_headers,
-                    user_role_headers,
-                    text_input_headers,
-                    content,
-                )
-
-                pa_json = cls._add_new_prompt_to_json(pa_json, index, new_prompt)
-
-            pa_json = cls._add_optional_tags_to_openai_json(
-                pa_json,
-                index,
-                add_model_name,
-                add_stream,
-                extra_inputs,
-                output_tokens_mean,
-                output_tokens_stddev,
-                output_tokens_deterministic,
-                iter_model_name,
-            )
-
-        return pa_json
-
-    @classmethod
-    def _populate_vllm_output_json(
-        cls,
-        dataset_json: Dict,
-        system_role_headers: List[str],
-        user_role_headers: List[str],
-        text_input_headers: List[str],
-        add_model_name: bool,
-        add_stream: bool,
-        extra_inputs: Dict,
-        output_tokens_mean: int,
-        output_tokens_stddev: int,
-        output_tokens_deterministic: bool,
-        model_name: list = [],
-        model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
-    ) -> Dict:
-        pa_json = cls._create_empty_vllm_pa_json()
-
-        for index, entry in enumerate(dataset_json["rows"]):
-            iter_model_name = cls._select_model_name(
-                model_name, index, model_selection_strategy
-            )
-            pa_json["data"].append({"text_input": [""]})
-
-            for header, content in entry.items():
-                new_text_input = cls._create_new_text_input(
-                    header,
-                    system_role_headers,
-                    user_role_headers,
-                    text_input_headers,
-                    content,
-                )
-
-                pa_json = cls._add_new_text_input_to_json(
-                    pa_json, index, new_text_input
-                )
-
-            pa_json = cls._add_optional_tags_to_vllm_json(
-                pa_json,
-                index,
-                add_model_name,
-                add_stream,
-                extra_inputs,
-                output_tokens_mean,
-                output_tokens_stddev,
-                output_tokens_deterministic,
-                iter_model_name,
-            )
-
-        return pa_json
-
-    @classmethod
-    def _populate_trtllm_output_json(
-        cls,
-        dataset_json: Dict,
-        system_role_headers: List[str],
-        user_role_headers: List[str],
-        text_input_headers: List[str],
-        add_model_name: bool,
-        add_stream: bool,
-        extra_inputs: Dict,
-        output_tokens_mean: int,
-        output_tokens_stddev: int,
-        output_tokens_deterministic: bool,
-        model_name: list = [],
-        model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
-    ) -> Dict:
-        pa_json = cls._create_empty_trtllm_pa_json()
-        default_max_tokens = (
-            "max_tokens" not in extra_inputs
-            or output_tokens_mean != cls.DEFAULT_OUTPUT_TOKENS_MEAN
-        )
-
-        for index, entry in enumerate(dataset_json["rows"]):
-            iter_model_name = cls._select_model_name(
-                model_name, index, model_selection_strategy
-            )
-            pa_json["data"].append({"text_input": [""]})
-
-            for header, content in entry.items():
-                new_text_input = cls._create_new_text_input(
-                    header,
-                    system_role_headers,
-                    user_role_headers,
-                    text_input_headers,
-                    content,
-                )
-
-                pa_json = cls._add_new_text_input_to_json(
-                    pa_json, index, new_text_input
-                )
-
-            pa_json = cls._add_required_tags_to_trtllm_json(
-                pa_json, index, default_max_tokens
-            )
-            pa_json = cls._add_optional_tags_to_trtllm_json(
-                pa_json,
-                index,
-                add_model_name,
-                add_stream,
-                extra_inputs,
-                output_tokens_mean,
-                output_tokens_stddev,
-                output_tokens_deterministic,
-                iter_model_name,
-            )
-
-        return pa_json
-
-    @classmethod
-    def _create_empty_openai_pa_json(cls) -> Dict:
-        empty_pa_json = deepcopy(cls.EMPTY_JSON_IN_OPENAI_PA_FORMAT)
-
-        return empty_pa_json
-
-    @classmethod
-    def _create_empty_vllm_pa_json(cls) -> Dict:
-        empty_pa_json = deepcopy(cls.EMPTY_JSON_IN_VLLM_PA_FORMAT)
-
-        return empty_pa_json
-
-    @classmethod
-    def _create_empty_trtllm_pa_json(cls) -> Dict:
-        empty_pa_json = deepcopy(cls.EMPTY_JSON_IN_TENSORRTLLM_PA_FORMAT)
-
-        return empty_pa_json
-
-    @classmethod
-    def _create_new_openai_chat_completions_message(
-        cls,
-        header: str,
-        system_role_headers: List[str],
-        user_role_headers: List[str],
-        content: str,
-    ) -> Optional[Dict]:
-        # Do not add messages with blank content
-        if not content:
-            return {}
-
-        if header in system_role_headers:
-            new_message = {
-                "role": "system",
-                "content": content,
-            }
-        elif header in user_role_headers:
-            new_message = {
-                "role": "user",
-                "content": content,
-            }
-        else:
-            new_message = {}
-
-        return new_message
-
-    @classmethod
-    def _create_new_prompt(
-        cls,
-        header: str,
-        system_role_headers: List[str],
-        user_role_headers: List[str],
-        text_input_headers: List[str],
-        content: str,
-    ) -> str:
-        new_prompt = ""
-
-        if (
-            header in system_role_headers
-            or header in user_role_headers
-            or header in text_input_headers
-        ):
-            new_prompt = content
-
-        return new_prompt
-
-    @classmethod
-    def _create_new_text_input(
-        cls,
-        header: str,
-        system_role_headers: List[str],
-        user_role_headers: List[str],
-        text_input_headers: List[str],
-        content: str,
-    ) -> str:
-        new_text_input = ""
-
-        if (
-            header in system_role_headers
-            or header in user_role_headers
-            or header in text_input_headers
-        ):
-            new_text_input = content
-
-        return new_text_input
-
-    @classmethod
-    def _add_new_message_to_json(
-        cls, pa_json: Dict, index: int, new_message: Optional[Dict]
-    ) -> Dict:
-        if new_message:
-            pa_json["data"][index]["payload"][0]["messages"].append(new_message)
-
-        return pa_json
-
-    @classmethod
-    def _add_new_text_input_to_json(
-        cls, pa_json: Dict, index: int, new_text_input: str
-    ) -> Dict:
-        if new_text_input:
-            if pa_json["data"][index]["text_input"][0]:
-                pa_json["data"][index]["text_input"][0] = (
-                    pa_json["data"][index]["text_input"][0] + f" {new_text_input}"
-                )
-            else:
-                pa_json["data"][index]["text_input"][0] = new_text_input
-
-        return pa_json
-
-    @classmethod
-    def _add_new_prompt_to_json(
-        cls,
-        pa_json: Dict,
-        index: int,
-        new_prompt: str,
-    ) -> Dict:
-        if new_prompt:
-            if pa_json["data"][index]["payload"][0]["prompt"]:
-                pa_json["data"][index]["payload"][0]["prompt"] += f" {new_prompt}"
-            else:
-                pa_json["data"][index]["payload"][0]["prompt"] = new_prompt
-
-        return pa_json
-
-    @classmethod
-    def _add_optional_tags_to_openai_json(
-        cls,
-        pa_json: Dict,
-        index: int,
-        add_model_name: bool,
-        add_stream: bool,
-        extra_inputs: Dict,
-        output_tokens_mean: int,
-        output_tokens_stddev: int,
-        output_tokens_deterministic: bool,
-        model_name: str = "",
-    ) -> Dict:
-        row = pa_json["data"][index]["payload"][0]
-        if add_model_name:
-            row["model"] = model_name
-        if add_stream:
-            row["stream"] = True
-        if output_tokens_mean != cls.DEFAULT_OUTPUT_TOKENS_MEAN:
-            row["max_tokens"] = int(
-                random.gauss(output_tokens_mean, output_tokens_stddev)
-            )
-        for key, value in extra_inputs.items():
-            row[key] = value
-
-        return pa_json
-
-    @classmethod
-    def _add_optional_tags_to_vllm_json(
-        cls,
-        pa_json: Dict,
-        index: int,
-        add_model_name: bool,
-        add_stream: bool,
-        extra_inputs: Dict,
-        output_tokens_mean: int,
-        output_tokens_stddev: int,
-        output_tokens_deterministic: bool,
-        model_name: str = "",
-    ) -> Dict:
-        row = pa_json["data"][index]
-        if add_model_name:
-            row["model"] = model_name
-        if add_stream:
-            row["stream"] = [True]
-        if output_tokens_mean != cls.DEFAULT_OUTPUT_TOKENS_MEAN:
-            number_of_tokens = str(
-                int(max(0, random.gauss(output_tokens_mean, output_tokens_stddev)))
-            )
-            sampling_parameters = {
-                "max_tokens": number_of_tokens,
-            }
-            if output_tokens_deterministic:
-                sampling_parameters["min_tokens"] = number_of_tokens
-            sampling_parameters_str = json.dumps(sampling_parameters)
-            row["sampling_parameters"] = [sampling_parameters_str]
-        for key, value in extra_inputs.items():
-            row[key] = [value]
-        if "exclude_input_in_output" not in row:
-            row["exclude_input_in_output"] = [True]
-
-        return pa_json
-
-    @classmethod
-    def _add_optional_tags_to_trtllm_json(
-        cls,
-        pa_json: Dict,
-        index: int,
-        add_model_name: bool,
-        add_stream: bool,
-        extra_inputs: Dict,
-        output_tokens_mean: int,
-        output_tokens_stddev: int,
-        output_tokens_deterministic: bool,
-        model_name: str = "",
-    ) -> Dict:
-        row = pa_json["data"][index]
-        if add_model_name:
-            row["model"] = model_name
-        if add_stream:
-            row["stream"] = [True]
-        if output_tokens_mean != cls.DEFAULT_OUTPUT_TOKENS_MEAN:
-            number_of_tokens = int(
-                random.gauss(output_tokens_mean, output_tokens_stddev)
-            )
-            if output_tokens_deterministic:
-                row["min_length"] = [number_of_tokens]
-            row["max_tokens"] = [number_of_tokens]
-        for key, value in extra_inputs.items():
-            row[key] = [value]
-
-        return pa_json
-
-    @classmethod
-    def _add_required_tags_to_trtllm_json(
-        cls,
-        pa_json: Dict,
-        index: int,
-        default_max_tokens: bool,
-    ) -> Dict:
-        row = pa_json["data"][index]
-        if default_max_tokens:
-            row["max_tokens"] = [cls.DEFAULT_TENSORRTLLM_MAX_TOKENS]
-
-        return pa_json
-
-    @classmethod
-    def _check_for_dataset_name_if_input_type_is_url(
-        cls, input_type: PromptSource, dataset_name: str
-    ) -> None:
-        if input_type == PromptSource.DATASET and not dataset_name:
-            raise GenAIPerfException(
-                "Input type is dataset, but dataset_name is not specified."
-            )
-
-    @classmethod
-    def _check_for_tokenzier_if_input_type_is_synthetic(
-        cls,
-        input_type: PromptSource,
-        tokenizer: Tokenizer,
-    ) -> None:
-        if input_type == PromptSource.SYNTHETIC and not tokenizer:
-            raise GenAIPerfException(
-                "Input type is SYNTHETIC, but a tokenizer was not specified."
-            )
-
-    @classmethod
-    def _check_for_valid_starting_index(cls, starting_index: int) -> None:
-        if not isinstance(starting_index, int):
-            raise GenAIPerfException(
-                f"starting_index: {starting_index} must be an integer."
-            )
-
-        if starting_index < cls.MINIMUM_STARTING_INDEX:
-            raise GenAIPerfException(
-                f"starting_index: {starting_index} must be larger than {cls.MINIMUM_STARTING_INDEX}."
-            )
-
-    @classmethod
-    def _check_for_valid_length(cls, length: int) -> None:
-        if not isinstance(length, int):
-            raise GenAIPerfException(f"length: {length} must be an integer.")
-
-        if length < cls.MINIMUM_LENGTH:
-            raise GenAIPerfException(
-                f"starting_index: {length} must be larger than {cls.MINIMUM_LENGTH}."
-            )
-
-    @classmethod
-    def _query_server(cls, configured_url: str) -> Response:
-        try:
-            response = requests.get(configured_url)
-        except Exception as e:
-            error_message = cls._create_error_message(e)
-            raise GenAIPerfException(error_message)
-
-        return response
-
-    @classmethod
-    def _create_error_message(cls, exception: Exception) -> str:
-        url_str = exception.args[0].args[0]
-        url_start = url_str.find("'")
-        url_end = url_str.find("'", url_start + 1) + 1
-        error_message = f"Invalid URL: {url_str[url_start:url_end]}"
-
-        return error_message
-
-    @classmethod
-    def _check_for_error_in_json_of_dataset(cls, dataset_json: Dict) -> None:
-        if "error" in dataset_json:
-            raise GenAIPerfException(dataset_json["error"])
-
-    @classmethod
-    def _create_synthetic_prompt(
-        cls,
-        tokenizer: Tokenizer,
-        prompt_tokens_mean: int,
-        prompt_tokens_stddev: int,
-    ) -> str:
-        return SyntheticPromptGenerator.create_synthetic_prompt(
-            tokenizer, prompt_tokens_mean, prompt_tokens_stddev
-        )
-
-    @classmethod
-    def _create_synthetic_image(
-        cls,
-        image_width_mean: int,
-        image_width_stddev: int,
-        image_height_mean: int,
-        image_height_stddev: int,
-        image_format: ImageFormat,
-    ) -> str:
-        return SyntheticImageGenerator.create_synthetic_image(
-            image_width_mean=image_width_mean,
-            image_width_stddev=image_width_stddev,
-            image_height_mean=image_height_mean,
-            image_height_stddev=image_height_stddev,
-            image_format=image_format,
-        )
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/source_images/dlss.png b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/source_images/dlss.png
deleted file mode 100644
index cdba23dd3..000000000
Binary files a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/source_images/dlss.png and /dev/null differ
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/source_images/h100.jpeg b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/source_images/h100.jpeg
deleted file mode 100644
index aee985fdc..000000000
Binary files a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/source_images/h100.jpeg and /dev/null differ
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/source_images/h200.jpeg b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/source_images/h200.jpeg
deleted file mode 100644
index eb0633b27..000000000
Binary files a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/source_images/h200.jpeg and /dev/null differ
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/source_images/jensen.jpeg b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/source_images/jensen.jpeg
deleted file mode 100644
index c9c831680..000000000
Binary files a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/source_images/jensen.jpeg and /dev/null differ
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/synthetic_image_generator.py b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/synthetic_image_generator.py
deleted file mode 100644
index a2df14d87..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/synthetic_image_generator.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import glob
-import random
-from enum import Enum, auto
-from pathlib import Path
-from typing import Optional
-
-from genai_perf import utils
-from PIL import Image
-
-
-class ImageFormat(Enum):
-    PNG = auto()
-    JPEG = auto()
-
-
-class SyntheticImageGenerator:
-    """A simple synthetic image generator that generates multiple synthetic
-    images from the source images.
-    """
-
-    @classmethod
-    def create_synthetic_image(
-        cls,
-        image_width_mean: int,
-        image_width_stddev: int,
-        image_height_mean: int,
-        image_height_stddev: int,
-        image_format: Optional[ImageFormat] = None,
-    ) -> str:
-        """Generate base64 encoded synthetic image using the source images."""
-        if image_format is None:
-            image_format = random.choice(list(ImageFormat))
-        width = cls._sample_random_positive_integer(
-            image_width_mean, image_width_stddev
-        )
-        height = cls._sample_random_positive_integer(
-            image_height_mean, image_height_stddev
-        )
-
-        image = cls._sample_source_image()
-        image = image.resize(size=(width, height))
-
-        img_base64 = utils.encode_image(image, image_format.name)
-        return f"data:image/{image_format.name.lower()};base64,{img_base64}"
-
-    @classmethod
-    def _sample_source_image(cls):
-        """Sample one image among the source images."""
-        filepath = Path(__file__).parent.resolve() / "source_images" / "*"
-        filenames = glob.glob(str(filepath))
-        return Image.open(random.choice(filenames))
-
-    @classmethod
-    def _sample_random_positive_integer(cls, mean: int, stddev: int) -> int:
-        n = int(abs(random.gauss(mean, stddev)))
-        return n if n != 0 else 1  # avoid zero
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/synthetic_prompt_generator.py b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/synthetic_prompt_generator.py
deleted file mode 100644
index 68b77fdc4..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/synthetic_prompt_generator.py
+++ /dev/null
@@ -1,125 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import itertools
-import math
-import pathlib
-import random
-import re
-from typing import List
-
-from genai_perf.tokenizer import Tokenizer
-
-
-class SyntheticPromptGenerator:
-    @classmethod
-    def create_synthetic_prompt(
-        cls,
-        tokenizer: Tokenizer,
-        prompt_tokens_mean: int = 550,
-        prompt_tokens_stddev: int = 250,
-    ) -> str:
-        """
-        Generate a prompt that randomly samples lines from
-        Washington's farewell address at farewell.txt.
-
-        Args:
-            prompt_tokens_mean:
-                The mean length of the prompt to generate
-            prompt_tokens_stddev:
-                The standard deviation of the length of the prompt to generate
-
-        Returns:
-            The prompt.
-        """
-
-        num_prompt_tokens = SyntheticPromptGenerator._sample_random_positive_int(
-            prompt_tokens_mean, prompt_tokens_stddev
-        )
-
-        farewell_lines = SyntheticPromptGenerator._create_farewell_lines()
-        prompt = SyntheticPromptGenerator._create_prompt_from_lines(
-            num_prompt_tokens, farewell_lines, tokenizer
-        )
-
-        return prompt
-
-    @classmethod
-    def _create_farewell_lines(cls) -> List[str]:
-        farewell_path = pathlib.Path(__file__).parent.resolve() / "farewell.txt"
-        with open(farewell_path, "r") as f:
-            farewell_lines = f.readlines()
-        random.shuffle(farewell_lines)
-
-        return farewell_lines
-
-    @classmethod
-    def _create_prompt_from_lines(
-        cls,
-        requested_prompt_tokens: int,
-        source_lines: List[str],
-        tokenizer: Tokenizer,
-    ) -> str:
-        get_token_length = lambda text: len(tokenizer.encode(text))
-
-        line_iterator = itertools.cycle(source_lines)
-
-        def word_generator():
-            while True:
-                next_line = next(line_iterator)
-                words = re.split("[ \n]+", next_line)
-                for word in words:
-                    yield word
-
-        word_iterator = word_generator()
-
-        # Fast add lines
-        remaining_tokens = requested_prompt_tokens
-        prompt = ""
-        num_tokens_in_avg_line = get_token_length(source_lines[0] + source_lines[1]) / 2
-        num_lines_to_add_fast = math.floor(
-            0.5 * requested_prompt_tokens / num_tokens_in_avg_line
-        )
-        while num_lines_to_add_fast:
-            for _ in range(num_lines_to_add_fast):
-                next_line = next(line_iterator)
-                prompt = prompt + next_line
-
-            curr_tokens = get_token_length(prompt)
-            remaining_tokens = requested_prompt_tokens - curr_tokens
-            num_lines_to_add_fast = math.floor(
-                0.5 * remaining_tokens / num_tokens_in_avg_line
-            )
-
-        # Fast add words
-        final_line = ""
-        while get_token_length(final_line) < remaining_tokens - 3:
-            next_word = next(word_iterator)
-            final_line += next_word + " "
-        prompt += final_line
-
-        # Final tweaks
-        diff = requested_prompt_tokens - get_token_length(prompt)
-        for _ in range(diff):
-            prompt = "hi " + prompt
-
-        return prompt
-
-    @classmethod
-    def _sample_random_positive_int(cls, mean: int, stddev: int) -> int:
-        random_pos_int = -1
-        while random_pos_int <= 0:
-            random_pos_int = int(random.gauss(mean, stddev))
-
-        return random_pos_int
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/logging.py b/src/c++/perf_analyzer/genai-perf/genai_perf/logging.py
deleted file mode 100644
index f5cab490a..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/logging.py
+++ /dev/null
@@ -1,99 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import logging
-import logging.config
-
-DEFAULT_LOG_FORMAT = "%(asctime)s [%(levelname)s] %(name)s:%(lineno)s - %(message)s"
-DEFAULT_DATE_FORMAT = "%Y-%m-%d %H:%M"
-
-
-def init_logging() -> None:
-    LOGGING_CONFIG = {
-        "version": 1,
-        "disable_existing_loggers": False,
-        "formatters": {
-            "standard": {
-                "format": DEFAULT_LOG_FORMAT,
-                "datefmt": DEFAULT_DATE_FORMAT,
-            },
-        },
-        "handlers": {
-            "console": {
-                "level": "INFO",
-                "formatter": "standard",
-                "class": "logging.StreamHandler",
-                "stream": "ext://sys.stdout",  # Default is stderr
-            },
-        },
-        "loggers": {
-            "": {  # root logger - avoid using
-                "handlers": ["console"],
-                "level": "WARNING",
-                "propagate": False,
-            },
-            "__main__": {  # if __name__ == '__main__'
-                "handlers": ["console"],
-                "level": "DEBUG",
-                "propagate": False,
-            },
-            "genai_perf.parser": {  # must use module name for loggers
-                "handlers": ["console"],
-                "level": "DEBUG",
-                "propagate": False,
-            },
-            "genai_perf.wrapper": {
-                "handlers": ["console"],
-                "level": "DEBUG",
-                "propagate": False,
-            },
-            "genai_perf.plots.plot_config_parser": {
-                "handlers": ["console"],
-                "level": "DEBUG",
-                "propagate": False,
-            },
-            "genai_perf.plots.plot_manager": {
-                "handlers": ["console"],
-                "level": "DEBUG",
-                "propagate": False,
-            },
-            "genai_perf.export_data.json_exporter": {
-                "handlers": ["console"],
-                "level": "DEBUG",
-                "propagate": False,
-            },
-            "genai_perf.export_data.csv_exporter": {
-                "handlers": ["console"],
-                "level": "DEBUG",
-                "propagate": False,
-            },
-        },
-    }
-    logging.config.dictConfig(LOGGING_CONFIG)
-
-
-def getLogger(name):
-    return logging.getLogger(name)
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/main.py b/src/c++/perf_analyzer/genai-perf/genai_perf/main.py
deleted file mode 100755
index 9ff7b5b9a..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/main.py
+++ /dev/null
@@ -1,170 +0,0 @@
-#!/usr/bin/env python3
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import os
-import sys
-import traceback
-from argparse import Namespace
-from pathlib import Path
-
-import genai_perf.logging as logging
-from genai_perf import parser
-from genai_perf.exceptions import GenAIPerfException
-from genai_perf.export_data.output_reporter import OutputReporter
-from genai_perf.llm_inputs.llm_inputs import LlmInputs
-from genai_perf.plots.plot_config_parser import PlotConfigParser
-from genai_perf.plots.plot_manager import PlotManager
-from genai_perf.profile_data_parser import LLMProfileDataParser, ProfileDataParser
-from genai_perf.tokenizer import Tokenizer, get_tokenizer
-
-
-def create_artifacts_dirs(args: Namespace) -> None:
-    plot_dir = args.artifact_dir / "plots"
-    os.makedirs(args.artifact_dir, exist_ok=True)
-    if hasattr(args, "generate_plots") and args.generate_plots:
-        os.makedirs(plot_dir, exist_ok=True)
-
-
-def generate_inputs(args: Namespace, tokenizer: Tokenizer) -> None:
-    # TODO (TMA-1759): review if add_model_name is always true
-    if args.input_file:
-        filepath, _ = args.input_file
-        input_filename = Path(filepath)
-    else:
-        input_filename = None
-    add_model_name = True
-    try:
-        extra_input_dict = parser.get_extra_inputs_as_dict(args)
-    except ValueError as e:
-        raise GenAIPerfException(e)
-
-    LlmInputs.create_llm_inputs(
-        input_type=args.prompt_source,
-        output_format=args.output_format,
-        dataset_name=args.input_dataset,
-        model_name=args.model,
-        model_selection_strategy=args.model_selection_strategy,
-        input_filename=input_filename,
-        starting_index=LlmInputs.DEFAULT_STARTING_INDEX,
-        length=args.num_prompts,
-        prompt_tokens_mean=args.synthetic_input_tokens_mean,
-        prompt_tokens_stddev=args.synthetic_input_tokens_stddev,
-        output_tokens_mean=args.output_tokens_mean,
-        output_tokens_stddev=args.output_tokens_stddev,
-        output_tokens_deterministic=args.output_tokens_mean_deterministic,
-        image_width_mean=args.image_width_mean,
-        image_width_stddev=args.image_width_stddev,
-        image_height_mean=args.image_height_mean,
-        image_height_stddev=args.image_height_stddev,
-        image_format=args.image_format,
-        random_seed=args.random_seed,
-        num_of_output_prompts=args.num_prompts,
-        add_model_name=add_model_name,
-        add_stream=args.streaming,
-        tokenizer=tokenizer,
-        extra_inputs=extra_input_dict,
-        batch_size=args.batch_size,
-        output_dir=args.artifact_dir,
-    )
-
-
-def calculate_metrics(args: Namespace, tokenizer: Tokenizer) -> ProfileDataParser:
-    if args.endpoint_type in ["embeddings", "rankings"]:
-        return ProfileDataParser(args.profile_export_file)
-    else:
-        return LLMProfileDataParser(
-            filename=args.profile_export_file,
-            tokenizer=tokenizer,
-        )
-
-
-def report_output(data_parser: ProfileDataParser, args: Namespace) -> None:
-    if args.concurrency:
-        infer_mode = "concurrency"
-        load_level = f"{args.concurrency}"
-    elif args.request_rate:
-        infer_mode = "request_rate"
-        load_level = f"{args.request_rate}"
-    else:
-        raise GenAIPerfException("No valid infer mode specified")
-
-    stats = data_parser.get_statistics(infer_mode, load_level)
-    reporter = OutputReporter(stats, args)
-    reporter.report_output()
-    if args.generate_plots:
-        create_plots(args)
-
-
-def create_plots(args: Namespace) -> None:
-    # TMA-1911: support plots CLI option
-    plot_dir = args.artifact_dir / "plots"
-    PlotConfigParser.create_init_yaml_config(
-        filenames=[args.profile_export_file],  # single run
-        output_dir=plot_dir,
-    )
-    config_parser = PlotConfigParser(plot_dir / "config.yaml")
-    plot_configs = config_parser.generate_configs()
-    plot_manager = PlotManager(plot_configs)
-    plot_manager.generate_plots()
-
-
-# Separate function that can raise exceptions used for testing
-# to assert correct errors and messages.
-def run():
-    try:
-        # TMA-1900: refactor CLI handler
-        logging.init_logging()
-        args, extra_args = parser.parse_args()
-        if args.subcommand == "compare":
-            args.func(args)
-        else:
-            create_artifacts_dirs(args)
-            tokenizer = get_tokenizer(args.tokenizer)
-            generate_inputs(args, tokenizer)
-            args.func(args, extra_args)
-            data_parser = calculate_metrics(args, tokenizer)
-            report_output(data_parser, args)
-    except Exception as e:
-        raise GenAIPerfException(e)
-
-
-def main():
-    # Interactive use will catch exceptions and log formatted errors rather than
-    # tracebacks.
-    try:
-        run()
-    except Exception as e:
-        traceback.print_exc()
-        logger = logging.getLogger(__name__)
-        logger.error(e)
-        return 1
-
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/metrics/__init__.py b/src/c++/perf_analyzer/genai-perf/genai_perf/metrics/__init__.py
deleted file mode 100644
index 01ca53c59..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/metrics/__init__.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from genai_perf.metrics.llm_metrics import LLMMetrics
-from genai_perf.metrics.metrics import MetricMetadata, Metrics
-from genai_perf.metrics.statistics import Statistics
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/metrics/llm_metrics.py b/src/c++/perf_analyzer/genai-perf/genai_perf/metrics/llm_metrics.py
deleted file mode 100755
index 13dff8a63..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/metrics/llm_metrics.py
+++ /dev/null
@@ -1,108 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from typing import List
-
-from genai_perf.metrics.metrics import MetricMetadata, Metrics
-
-
-class LLMMetrics(Metrics):
-    """A simple dataclass that holds core LLM performance metrics."""
-
-    LLM_REQUEST_METRICS = [
-        MetricMetadata("time_to_first_token", "ms"),
-        MetricMetadata("inter_token_latency", "ms"),
-        MetricMetadata("output_token_throughput_per_request", "tokens/sec"),
-        MetricMetadata("output_sequence_length", "tokens"),
-        MetricMetadata("input_sequence_length", "tokens"),
-    ]
-
-    LLM_SYSTEM_METRICS = [
-        # (TMA-1977) Make the unit consistent with statistics dict (e.g. tokens/sec)
-        MetricMetadata("output_token_throughput", "per sec"),
-    ]
-
-    def __init__(
-        self,
-        request_throughputs: List[float] = [],
-        request_latencies: List[int] = [],
-        time_to_first_tokens: List[int] = [],
-        inter_token_latencies: List[int] = [],
-        output_token_throughputs: List[float] = [],
-        output_token_throughputs_per_request: List[int] = [],
-        output_sequence_lengths: List[int] = [],
-        input_sequence_lengths: List[int] = [],
-        chunked_inter_token_latencies: List[List[int]] = [[]],
-    ) -> None:
-        super().__init__(request_throughputs, request_latencies)
-        self.time_to_first_tokens = time_to_first_tokens
-        self.inter_token_latencies = inter_token_latencies
-        self.output_token_throughputs = output_token_throughputs
-        self.output_token_throughputs_per_request = output_token_throughputs_per_request
-        self.output_sequence_lengths = output_sequence_lengths
-        self.input_sequence_lengths = input_sequence_lengths
-
-        # Keeping chunked ITL (old) as a WAR to preserve visualization.
-        # Excluded from data.
-        self._chunked_inter_token_latencies = chunked_inter_token_latencies
-
-        # add base name mapping
-        self._base_names["time_to_first_tokens"] = "time_to_first_token"
-        self._base_names["inter_token_latencies"] = "inter_token_latency"
-        self._base_names["output_token_throughputs"] = "output_token_throughput"
-        self._base_names["output_token_throughputs_per_request"] = (
-            "output_token_throughput_per_request"
-        )
-        self._base_names["output_sequence_lengths"] = "output_sequence_length"
-        self._base_names["input_sequence_lengths"] = "input_sequence_length"
-
-    @property
-    def request_metrics(self) -> List[MetricMetadata]:
-        base_metrics = super().request_metrics  # base metrics
-
-        # (TMA-1975) The order is hardcoded as below to avoid introducing any
-        # breaking changes to the users who might be parsing the outputs. However,
-        # we would eventually want to impose some consistent order such as a
-        # base metrics first and then task specific metrics. Uncomment the below
-        # line to enable this order:
-        # return base_metrics + self.LLM_REQUEST_METRICS
-        return (
-            self.LLM_REQUEST_METRICS[:2] + base_metrics + self.LLM_REQUEST_METRICS[2:]
-        )
-
-    @property
-    def system_metrics(self) -> List[MetricMetadata]:
-        base_metrics = super().system_metrics  # base metrics
-
-        # (TMA-1975) The order is hardcoded as below to avoid introducing any
-        # breaking changes to the users who might be parsing the outputs. However,
-        # we would eventually want to impose some consistent order such as a
-        # base metrics first and then task specific metrics. Uncomment the below
-        # line to enable this order:
-        # return base_metrics + self.LLM_SYSTEM_METRICS
-        return self.LLM_SYSTEM_METRICS + base_metrics
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/metrics/metrics.py b/src/c++/perf_analyzer/genai-perf/genai_perf/metrics/metrics.py
deleted file mode 100755
index 7e047094d..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/metrics/metrics.py
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from dataclasses import dataclass
-from typing import List
-
-
-@dataclass
-class MetricMetadata:
-    name: str
-    unit: str
-
-
-class Metrics:
-    """A base class that contains common request level metrics."""
-
-    REQUEST_METRICS = [
-        MetricMetadata("request_latency", "ms"),
-    ]
-
-    SYSTEM_METRICS = [
-        # (TMA-1977) Make the unit consistent with statistics dict (e.g. tokens/sec)
-        MetricMetadata("request_throughput", "per sec"),
-    ]
-
-    def __init__(
-        self,
-        request_throughputs: List[float] = [],
-        request_latencies: List[int] = [],
-    ) -> None:
-        self.request_throughputs = request_throughputs
-        self.request_latencies = request_latencies
-        self._base_names = {
-            "request_throughputs": "request_throughput",
-            "request_latencies": "request_latency",
-        }
-
-    def __repr__(self):
-        attr_strs = []
-        for k, v in self.__dict__.items():
-            if not k.startswith("_"):
-                attr_strs.append(f"{k}={v}")
-        return f"Metrics({','.join(attr_strs)})"
-
-    @property
-    def request_metrics(self) -> List[MetricMetadata]:
-        return self.REQUEST_METRICS
-
-    @property
-    def system_metrics(self) -> List[MetricMetadata]:
-        return self.SYSTEM_METRICS
-
-    @property
-    def data(self) -> dict:
-        """Returns all the metrics."""
-        return {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
-
-    def get_base_name(self, metric_name: str) -> str:
-        """Returns singular name of a given metric."""
-        if metric_name in self._base_names:
-            return self._base_names[metric_name]
-        else:
-            raise KeyError(f"No metric named '{metric_name}' exists.")
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/metrics/statistics.py b/src/c++/perf_analyzer/genai-perf/genai_perf/metrics/statistics.py
deleted file mode 100755
index f0d12cef6..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/metrics/statistics.py
+++ /dev/null
@@ -1,196 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from collections import defaultdict
-from pathlib import Path
-from typing import Dict, List, Union
-
-import numpy as np
-import pandas as pd
-from genai_perf.metrics.metrics import Metrics
-
-
-class Statistics:
-    """A class that aggregates various statistics from given metrics class.
-
-    The Statistics class goes through each metric in the metrics class and
-    calculates several statistics such as:
-      - average (arithmetic mean)
-      - percentiles (p25, p50, p75, p90, p95, p99)
-      - minimum & maximum
-      - standard deviation
-    The class will store each calculated statistics as part of its attribute.
-
-    Example:
-
-      >>> metrics = LLMMetrics(request_throughputs=[2, 4])
-      >>> stats = Statistics(metrics)
-      >>> print(stats.avg_request_throughput)  # output: 3
-    """
-
-    def __init__(self, metrics: Metrics):
-        # iterate through Metrics to calculate statistics and set attributes
-        self._metrics = metrics
-        self._stats_dict: Dict = defaultdict(dict)
-        for attr, data in metrics.data.items():
-            if self._should_skip(data, attr):
-                continue
-
-            attr = metrics.get_base_name(attr)
-            self._add_units(attr)
-            self._calculate_mean(data, attr)
-            if not self._is_system_metric(metrics, attr):
-                self._calculate_percentiles(data, attr)
-                self._calculate_minmax(data, attr)
-                self._calculate_std(data, attr)
-
-    def _should_skip(self, data: List[Union[int, float]], attr: str) -> bool:
-        """Checks if some metrics should be skipped."""
-        # No data points
-        if len(data) == 0:
-            return True
-        # Skip ITL when non-streaming (all zero)
-        elif attr == "inter_token_latencies" and sum(data) == 0:
-            return True
-        return False
-
-    def _calculate_mean(self, data: List[Union[int, float]], attr: str) -> None:
-        avg = np.mean(data)
-        setattr(self, "avg_" + attr, avg)
-        self._stats_dict[attr]["avg"] = float(avg)
-
-    def _calculate_percentiles(self, data: List[Union[int, float]], attr: str) -> None:
-        p25, p50, p75 = np.percentile(data, [25, 50, 75])
-        p90, p95, p99 = np.percentile(data, [90, 95, 99])
-        setattr(self, "p25_" + attr, p25)
-        setattr(self, "p50_" + attr, p50)
-        setattr(self, "p75_" + attr, p75)
-        setattr(self, "p90_" + attr, p90)
-        setattr(self, "p95_" + attr, p95)
-        setattr(self, "p99_" + attr, p99)
-        self._stats_dict[attr]["p99"] = float(p99)
-        self._stats_dict[attr]["p95"] = float(p95)
-        self._stats_dict[attr]["p90"] = float(p90)
-        self._stats_dict[attr]["p75"] = float(p75)
-        self._stats_dict[attr]["p50"] = float(p50)
-        self._stats_dict[attr]["p25"] = float(p25)
-
-    def _calculate_minmax(self, data: List[Union[int, float]], attr: str) -> None:
-        min, max = np.min(data), np.max(data)
-        setattr(self, "min_" + attr, min)
-        setattr(self, "max_" + attr, max)
-        self._stats_dict[attr]["max"] = float(max)
-        self._stats_dict[attr]["min"] = float(min)
-
-    def _calculate_std(self, data: List[Union[int, float]], attr: str) -> None:
-        std = np.std(data)
-        setattr(self, "std_" + attr, std)
-        self._stats_dict[attr]["std"] = float(std)
-
-    def scale_data(self, factor: float = 1 / 1e6) -> None:
-        for k1, v1 in self.stats_dict.items():
-            if self._is_time_metric(k1):
-                for k2, v2 in v1.items():
-                    if k2 != "unit":
-                        self.stats_dict[k1][k2] = self._scale(v2, factor)
-
-    def _scale(self, metric: float, factor: float = 1 / 1e6) -> float:
-        """
-        Scale metrics from nanoseconds by factor.
-        Default is nanoseconds to milliseconds.
-        """
-        return metric * factor
-
-    def _add_units(self, key) -> None:
-        if self._is_time_metric(key):
-            self._stats_dict[key]["unit"] = "ms"
-        elif key == "request_throughput":
-            self._stats_dict[key]["unit"] = "requests/sec"
-        elif key.startswith("output_token_throughput"):
-            self._stats_dict[key]["unit"] = "tokens/sec"
-        elif "sequence_length" in key:
-            self._stats_dict[key]["unit"] = "tokens"
-        else:
-            self._stats_dict[key]["unit"] = ""
-
-    def __repr__(self) -> str:
-        attr_strs = []
-        for k, v in self.__dict__.items():
-            if not k.startswith("_"):
-                attr_strs.append(f"{k}={v}")
-        return f"Statistics({','.join(attr_strs)})"
-
-    @property
-    def data(self) -> dict:
-        """Return all the aggregated statistics."""
-        return {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
-
-    @property
-    def metrics(self) -> Metrics:
-        """Return the underlying metrics used to calculate the statistics."""
-        return self._metrics
-
-    @property
-    def stats_dict(self) -> Dict:
-        return self._stats_dict
-
-    def _is_system_metric(self, metrics: Metrics, attr: str) -> bool:
-        return attr in [m.name for m in metrics.system_metrics]
-
-    def _is_time_metric(self, field: str) -> bool:
-        # TPA-188: Remove the hardcoded time metrics list
-        time_metrics = [
-            "inter_token_latency",
-            "time_to_first_token",
-            "request_latency",
-        ]
-        return field in time_metrics
-
-    def export_parquet(self, artifact_dir: Path, filename: str) -> None:
-        max_length = -1
-        col_index = 0
-        filler_list = []
-        df = pd.DataFrame()
-
-        # Data frames require all columns of the same length
-        # find the max length column
-        for key, value in self._metrics.data.items():
-            max_length = max(max_length, len(value))
-
-        # Insert None for shorter columns to match longest column
-        for key, value in self._metrics.data.items():
-            if len(value) < max_length:
-                diff = max_length - len(value)
-                filler_list = [None] * diff
-            df.insert(col_index, key, value + filler_list)
-            diff = 0
-            filler_list = []
-            col_index = col_index + 1
-
-        filepath = artifact_dir / f"{filename}.gzip"
-        df.to_parquet(filepath, compression="gzip")
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py b/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py
deleted file mode 100644
index 776535d15..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py
+++ /dev/null
@@ -1,834 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import argparse
-import json
-import os
-import sys
-from enum import Enum, auto
-from pathlib import Path
-from typing import Tuple
-
-import genai_perf.logging as logging
-import genai_perf.utils as utils
-from genai_perf.constants import (
-    CNN_DAILY_MAIL,
-    DEFAULT_ARTIFACT_DIR,
-    DEFAULT_COMPARE_DIR,
-    OPEN_ORCA,
-)
-from genai_perf.llm_inputs.llm_inputs import (
-    LlmInputs,
-    ModelSelectionStrategy,
-    OutputFormat,
-    PromptSource,
-)
-from genai_perf.llm_inputs.synthetic_image_generator import ImageFormat
-from genai_perf.plots.plot_config_parser import PlotConfigParser
-from genai_perf.plots.plot_manager import PlotManager
-from genai_perf.tokenizer import DEFAULT_TOKENIZER
-
-from . import __version__
-
-
-class PathType(Enum):
-    FILE = auto()
-    DIRECTORY = auto()
-
-    def to_lowercase(self):
-        return self.name.lower()
-
-
-class Subcommand(Enum):
-    PROFILE = auto()
-    COMPARE = auto()
-
-    def to_lowercase(self):
-        return self.name.lower()
-
-
-logger = logging.getLogger(__name__)
-
-_endpoint_type_map = {
-    "chat": "v1/chat/completions",
-    "completions": "v1/completions",
-    "embeddings": "v1/embeddings",
-    "rankings": "v1/ranking",
-    "vision": "v1/chat/completions",
-}
-
-
-def _check_model_args(
-    parser: argparse.ArgumentParser, args: argparse.Namespace
-) -> argparse.Namespace:
-    """
-    Check if model name is provided.
-    """
-    if not args.model:
-        parser.error("The -m/--model option is required and cannot be empty.")
-    args = _convert_str_to_enum_entry(
-        args, "model_selection_strategy", ModelSelectionStrategy
-    )
-    _generate_formatted_model_name(args)
-    return args
-
-
-def _generate_formatted_model_name(args: argparse.Namespace) -> None:
-    if len(args.model) == 1:
-        args.formatted_model_name = args.model[0]
-    elif len(args.model) == 0:
-        args.model = None
-        args.formatted_model_name = None
-    else:
-        args.formatted_model_name = args.model[0] + "_multi"
-
-
-def _check_compare_args(
-    parser: argparse.ArgumentParser, args: argparse.Namespace
-) -> argparse.Namespace:
-    """
-    Check compare subcommand args
-    """
-    if not args.config and not args.files:
-        parser.error("Either the --config or --files option must be specified.")
-    return args
-
-
-def _check_image_input_args(
-    parser: argparse.ArgumentParser, args: argparse.Namespace
-) -> argparse.Namespace:
-    """
-    Sanity check the image input args
-    """
-    if args.image_width_mean <= 0 or args.image_height_mean <= 0:
-        parser.error(
-            "Both --image-width-mean and --image-height-mean values must be positive."
-        )
-    if args.image_width_stddev < 0 or args.image_height_stddev < 0:
-        parser.error(
-            "Both --image-width-stddev and --image-height-stddev values must be non-negative."
-        )
-
-    args = _convert_str_to_enum_entry(args, "image_format", ImageFormat)
-    return args
-
-
-def _check_conditional_args(
-    parser: argparse.ArgumentParser, args: argparse.Namespace
-) -> argparse.Namespace:
-    """
-    Check for conditional args and raise an error if they are not set.
-    """
-
-    # Endpoint and output format checks
-    if args.service_kind == "openai":
-        if args.endpoint_type is None:
-            parser.error(
-                "The --endpoint-type option is required when using the 'openai' service-kind."
-            )
-        else:
-            if args.endpoint_type == "chat":
-                args.output_format = OutputFormat.OPENAI_CHAT_COMPLETIONS
-            elif args.endpoint_type == "completions":
-                args.output_format = OutputFormat.OPENAI_COMPLETIONS
-            elif args.endpoint_type == "embeddings":
-                args.output_format = OutputFormat.OPENAI_EMBEDDINGS
-            elif args.endpoint_type == "rankings":
-                args.output_format = OutputFormat.RANKINGS
-
-            # (TMA-1986) deduce vision format from chat completions + image CLI
-            # because there's no openai vision endpoint.
-            elif args.endpoint_type == "vision":
-                args.output_format = OutputFormat.OPENAI_VISION
-
-            if args.endpoint is not None:
-                args.endpoint = args.endpoint.lstrip(" /")
-            else:
-                args.endpoint = _endpoint_type_map[args.endpoint_type]
-    elif args.endpoint_type is not None:
-        parser.error(
-            "The --endpoint-type option should only be used when using the 'openai' service-kind."
-        )
-
-    if args.service_kind == "triton":
-        args = _convert_str_to_enum_entry(args, "backend", OutputFormat)
-        args.output_format = args.backend
-
-    # Output token distribution checks
-    if args.output_tokens_mean == LlmInputs.DEFAULT_OUTPUT_TOKENS_MEAN:
-        if args.output_tokens_stddev != LlmInputs.DEFAULT_OUTPUT_TOKENS_STDDEV:
-            parser.error(
-                "The --output-tokens-mean option is required when using --output-tokens-stddev."
-            )
-        if args.output_tokens_mean_deterministic:
-            parser.error(
-                "The --output-tokens-mean option is required when using --output-tokens-mean-deterministic."
-            )
-
-    if args.service_kind != "triton":
-        if args.output_tokens_mean_deterministic:
-            parser.error(
-                "The --output-tokens-mean-deterministic option is only supported with the Triton service-kind."
-            )
-
-    _check_conditional_args_embeddings_rankings(parser, args)
-
-    return args
-
-
-def _check_conditional_args_embeddings_rankings(
-    parser: argparse.ArgumentParser, args: argparse.Namespace
-):
-
-    if args.output_format in [
-        OutputFormat.OPENAI_EMBEDDINGS,
-        OutputFormat.RANKINGS,
-    ]:
-        if args.streaming:
-            parser.error(
-                f"The --streaming option is not supported with the {args.endpoint_type} endpoint type."
-            )
-
-        if args.generate_plots:
-            parser.error(
-                f"The --generate-plots option is not currently supported with the {args.endpoint_type} endpoint type."
-            )
-    else:
-        if args.batch_size != LlmInputs.DEFAULT_BATCH_SIZE:
-            parser.error(
-                "The --batch-size option is currently only supported with the embeddings and rankings endpoint types."
-            )
-
-    if args.input_file:
-        _, path_type = args.input_file
-        if args.output_format != OutputFormat.RANKINGS:
-            if path_type == "directory":
-                parser.error(
-                    "A directory is only currently supported for the rankings endpoint type."
-                )
-        else:
-            if path_type == PathType.FILE:
-                parser.error(
-                    "The rankings endpoint-type requires a directory value for the --input-file flag."
-                )
-
-
-def _check_load_manager_args(args: argparse.Namespace) -> argparse.Namespace:
-    """
-    Check inference load args
-    """
-    # If no concurrency or request rate is set, default to 1
-    if not args.concurrency and not args.request_rate:
-        args.concurrency = 1
-    return args
-
-
-def _set_artifact_paths(args: argparse.Namespace) -> argparse.Namespace:
-    """
-    Set paths for all the artifacts.
-    """
-    if args.artifact_dir == Path(DEFAULT_ARTIFACT_DIR):
-        # Preprocess Huggingface model names that include '/' in their model name.
-        if (args.formatted_model_name is not None) and (
-            "/" in args.formatted_model_name
-        ):
-            filtered_name = "_".join(args.formatted_model_name.split("/"))
-            logger.info(
-                f"Model name '{args.formatted_model_name}' cannot be used to create artifact "
-                f"directory. Instead, '{filtered_name}' will be used."
-            )
-            name = [f"{filtered_name}"]
-        else:
-            name = [f"{args.formatted_model_name}"]
-
-        if args.service_kind == "openai":
-            name += [f"{args.service_kind}-{args.endpoint_type}"]
-        elif args.service_kind == "triton":
-            name += [f"{args.service_kind}-{args.backend.to_lowercase()}"]
-        else:
-            raise ValueError(f"Unknown service kind '{args.service_kind}'.")
-
-        if args.concurrency:
-            name += [f"concurrency{args.concurrency}"]
-        elif args.request_rate:
-            name += [f"request_rate{args.request_rate}"]
-        args.artifact_dir = args.artifact_dir / Path("-".join(name))
-
-    if args.profile_export_file.parent != Path(""):
-        raise ValueError(
-            "Please use --artifact-dir option to define intermediary paths to "
-            "the profile export file."
-        )
-
-    args.profile_export_file = args.artifact_dir / args.profile_export_file
-    return args
-
-
-def _infer_prompt_source(args: argparse.Namespace) -> argparse.Namespace:
-    if args.input_dataset:
-        args.prompt_source = PromptSource.DATASET
-        logger.debug(f"Input source is the following dataset: {args.input_dataset}")
-    elif args.input_file:
-        args.prompt_source = PromptSource.FILE
-        if args.endpoint_type == "rankings":
-            logger.debug(
-                f"Input source is the following directory: {args.input_file[0]}"
-            )
-        else:
-            logger.debug(f"Input source is the following file: {args.input_file[0]}")
-    else:
-        args.prompt_source = PromptSource.SYNTHETIC
-        logger.debug("Input source is synthetic data")
-    return args
-
-
-def _convert_str_to_enum_entry(args, option, enum):
-    """
-    Convert string option to corresponding enum entry
-    """
-    attr_val = getattr(args, option)
-    if attr_val is not None:
-        setattr(args, f"{option}", utils.get_enum_entry(attr_val, enum))
-    return args
-
-
-### Types ###
-
-
-def file_or_directory(path: str) -> Tuple[Path, PathType]:
-    if os.path.isfile(path):
-        return (Path(path), PathType.FILE)
-    elif os.path.isdir(path):
-        return (Path(path), PathType.DIRECTORY)
-    else:
-        raise ValueError(f"'{path}' is not a valid file or directory")
-
-
-### Parsers ###
-
-
-def _add_input_args(parser):
-    input_group = parser.add_argument_group("Input")
-
-    input_group.add_argument(
-        "--batch-size",
-        "-b",
-        type=int,
-        default=LlmInputs.DEFAULT_BATCH_SIZE,
-        required=False,
-        help=f"The batch size of the requests GenAI-Perf should send. "
-        "This is currently only supported with the embeddings and rankings endpoint types.",
-    )
-
-    input_group.add_argument(
-        "--extra-inputs",
-        action="append",
-        help="Provide additional inputs to include with every request. "
-        "You can repeat this flag for multiple inputs. Inputs should be in an input_name:value format."
-        "Alternatively, a string representing a json formatted dict can be provided.",
-    )
-
-    prompt_source_group = input_group.add_mutually_exclusive_group(required=False)
-    prompt_source_group.add_argument(
-        "--input-dataset",
-        type=str.lower,
-        default=None,
-        choices=[OPEN_ORCA, CNN_DAILY_MAIL],
-        required=False,
-        help="The HuggingFace dataset to use for prompts.",
-    )
-
-    prompt_source_group.add_argument(
-        "--input-file",
-        type=file_or_directory,
-        default=None,
-        required=False,
-        help="The input file containing the prompts to use for profiling. "
-        "Each line should be a JSON object with a 'text_input' field in JSONL format. "
-        'Example: {"text_input": "Your prompt here"}'
-        "For the rankings endpoint-type, a directory should be passed in instead with "
-        'a "queries.jsonl" file and a "passages.jsonl" file with the same format.',
-    )
-
-    input_group.add_argument(
-        "--num-prompts",
-        type=int,
-        default=LlmInputs.DEFAULT_NUM_PROMPTS,
-        required=False,
-        help=f"The number of unique prompts to generate as stimulus.",
-    )
-
-    input_group.add_argument(
-        "--output-tokens-mean",
-        type=int,
-        default=LlmInputs.DEFAULT_OUTPUT_TOKENS_MEAN,
-        required=False,
-        help=f"The mean number of tokens in each output. "
-        "Ensure the --tokenizer value is set correctly. ",
-    )
-
-    input_group.add_argument(
-        "--output-tokens-mean-deterministic",
-        action="store_true",
-        required=False,
-        help=f"When using --output-tokens-mean, this flag can be set to "
-        "improve precision by setting the minimum number of tokens "
-        "equal to the requested number of tokens. This is currently "
-        "supported with the Triton service-kind. "
-        "Note that there is still some variability in the requested number "
-        "of output tokens, but GenAi-Perf attempts its best effort with your "
-        "model to get the right number of output tokens. ",
-    )
-
-    input_group.add_argument(
-        "--output-tokens-stddev",
-        type=int,
-        default=LlmInputs.DEFAULT_OUTPUT_TOKENS_STDDEV,
-        required=False,
-        help=f"The standard deviation of the number of tokens in each output. "
-        "This is only used when --output-tokens-mean is provided.",
-    )
-
-    input_group.add_argument(
-        "--random-seed",
-        type=int,
-        default=LlmInputs.DEFAULT_RANDOM_SEED,
-        required=False,
-        help="The seed used to generate random values.",
-    )
-
-    input_group.add_argument(
-        "--synthetic-input-tokens-mean",
-        type=int,
-        default=LlmInputs.DEFAULT_PROMPT_TOKENS_MEAN,
-        required=False,
-        help=f"The mean of number of tokens in the generated prompts when using synthetic data.",
-    )
-
-    input_group.add_argument(
-        "--synthetic-input-tokens-stddev",
-        type=int,
-        default=LlmInputs.DEFAULT_PROMPT_TOKENS_STDDEV,
-        required=False,
-        help=f"The standard deviation of number of tokens in the generated prompts when using synthetic data.",
-    )
-
-
-def _add_image_input_args(parser):
-    input_group = parser.add_argument_group("Image Input")
-
-    input_group.add_argument(
-        "--image-width-mean",
-        type=int,
-        default=LlmInputs.DEFAULT_IMAGE_WIDTH_MEAN,
-        required=False,
-        help=f"The mean width of images when generating synthetic image data.",
-    )
-
-    input_group.add_argument(
-        "--image-width-stddev",
-        type=int,
-        default=LlmInputs.DEFAULT_IMAGE_WIDTH_STDDEV,
-        required=False,
-        help=f"The standard deviation of width of images when generating synthetic image data.",
-    )
-
-    input_group.add_argument(
-        "--image-height-mean",
-        type=int,
-        default=LlmInputs.DEFAULT_IMAGE_HEIGHT_MEAN,
-        required=False,
-        help=f"The mean height of images when generating synthetic image data.",
-    )
-
-    input_group.add_argument(
-        "--image-height-stddev",
-        type=int,
-        default=LlmInputs.DEFAULT_IMAGE_HEIGHT_STDDEV,
-        required=False,
-        help=f"The standard deviation of height of images when generating synthetic image data.",
-    )
-
-    input_group.add_argument(
-        "--image-format",
-        type=str,
-        choices=utils.get_enum_names(ImageFormat),
-        required=False,
-        help=f"The compression format of the images. "
-        "If format is not selected, format of generated image is selected at random",
-    )
-
-
-def _add_profile_args(parser):
-    profile_group = parser.add_argument_group("Profiling")
-    load_management_group = profile_group.add_mutually_exclusive_group(required=False)
-
-    load_management_group.add_argument(
-        "--concurrency",
-        type=int,
-        required=False,
-        help="The concurrency value to benchmark.",
-    )
-
-    profile_group.add_argument(
-        "--measurement-interval",
-        "-p",
-        type=int,
-        default="10000",
-        required=False,
-        help="The time interval used for each measurement in milliseconds. "
-        "Perf Analyzer will sample a time interval specified and take "
-        "measurement over the requests completed within that time interval.",
-    )
-
-    load_management_group.add_argument(
-        "--request-rate",
-        type=float,
-        required=False,
-        help="Sets the request rate for the load generated by PA.",
-    )
-
-    profile_group.add_argument(
-        "-s",
-        "--stability-percentage",
-        type=float,
-        default=999,
-        required=False,
-        help="The allowed variation in "
-        "latency measurements when determining if a result is stable. The "
-        "measurement is considered as stable if the ratio of max / min "
-        "from the recent 3 measurements is within (stability percentage) "
-        "in terms of both infer per second and latency.",
-    )
-
-
-def _add_endpoint_args(parser):
-    endpoint_group = parser.add_argument_group("Endpoint")
-
-    endpoint_group.add_argument(
-        "-m",
-        "--model",
-        nargs="+",
-        default=[],
-        help=f"The name of the model(s) to benchmark.",
-    )
-    endpoint_group.add_argument(
-        "--model-selection-strategy",
-        type=str,
-        choices=utils.get_enum_names(ModelSelectionStrategy),
-        default="round_robin",
-        required=False,
-        help=f"When multiple model are specified, this is how a specific model "
-        "should be assigned to a prompt.  round_robin means that ith prompt in the "
-        "list gets assigned to i mod len(models).  random means that assignment is "
-        "uniformly random",
-    )
-
-    endpoint_group.add_argument(
-        "--backend",
-        type=str,
-        choices=utils.get_enum_names(OutputFormat)[2:],
-        default="tensorrtllm",
-        required=False,
-        help=f'When using the "triton" service-kind, '
-        "this is the backend of the model. "
-        "For the TENSORRT-LLM backend, you currently must set "
-        "'exclude_input_in_output' to true in the model config to "
-        "not echo the input tokens in the output.",
-    )
-
-    endpoint_group.add_argument(
-        "--endpoint",
-        type=str,
-        required=False,
-        help=f"Set a custom endpoint that differs from the OpenAI defaults.",
-    )
-
-    endpoint_group.add_argument(
-        "--endpoint-type",
-        type=str,
-        choices=["chat", "completions", "embeddings", "rankings", "vision"],
-        required=False,
-        help=f"The endpoint-type to send requests to on the "
-        'server. This is only used with the "openai" service-kind.',
-    )
-
-    endpoint_group.add_argument(
-        "--service-kind",
-        type=str,
-        choices=["triton", "openai"],
-        default="triton",
-        required=False,
-        help="The kind of service perf_analyzer will "
-        'generate load for. In order to use "openai", '
-        "you must specify an api via --endpoint-type.",
-    )
-
-    endpoint_group.add_argument(
-        "--streaming",
-        action="store_true",
-        required=False,
-        help=f"An option to enable the use of the streaming API.",
-    )
-
-    endpoint_group.add_argument(
-        "-u",
-        "--url",
-        type=str,
-        required=False,
-        dest="u",
-        metavar="URL",
-        help="URL of the endpoint to target for benchmarking.",
-    )
-
-
-def _add_output_args(parser):
-    output_group = parser.add_argument_group("Output")
-    output_group.add_argument(
-        "--artifact-dir",
-        type=Path,
-        default=Path(DEFAULT_ARTIFACT_DIR),
-        help="The directory to store all the (output) artifacts generated by "
-        "GenAI-Perf and Perf Analyzer.",
-    )
-    output_group.add_argument(
-        "--generate-plots",
-        action="store_true",
-        required=False,
-        help="An option to enable the generation of plots.",
-    )
-    output_group.add_argument(
-        "--profile-export-file",
-        type=Path,
-        default=Path("profile_export.json"),
-        help="The path where the perf_analyzer profile export will be "
-        "generated. By default, the profile export will be to profile_export.json. "
-        "The genai-perf file will be exported to <profile_export_file>_genai_perf.csv. "
-        "For example, if the profile export file is profile_export.json, the genai-perf file will be "
-        "exported to profile_export_genai_perf.csv.",
-    )
-
-
-def _add_other_args(parser):
-    other_group = parser.add_argument_group("Other")
-
-    other_group.add_argument(
-        "--tokenizer",
-        type=str,
-        default=DEFAULT_TOKENIZER,
-        required=False,
-        help="The HuggingFace tokenizer to use to interpret token metrics from prompts and responses.",
-    )
-
-    other_group.add_argument(
-        "-v",
-        "--verbose",
-        action="store_true",
-        required=False,
-        help="An option to enable verbose mode.",
-    )
-
-
-def get_extra_inputs_as_dict(args: argparse.Namespace) -> dict:
-    request_inputs = {}
-    if args.extra_inputs:
-        for input_str in args.extra_inputs:
-            if input_str.startswith("{") and input_str.endswith("}"):
-                request_inputs.update(utils.load_json_str(input_str))
-            else:
-                semicolon_count = input_str.count(":")
-                if semicolon_count != 1:
-                    raise ValueError(
-                        f"Invalid input format for --extra-inputs: {input_str}\n"
-                        "Expected input format: 'input_name:value'"
-                    )
-                input_name, value = input_str.split(":", 1)
-
-                if not input_name or not value:
-                    raise ValueError(
-                        f"Input name or value is empty in --extra-inputs: {input_str}\n"
-                        "Expected input format: 'input_name:value'"
-                    )
-
-                is_bool = value.lower() in ["true", "false"]
-                is_int = value.isdigit()
-                is_float = value.count(".") == 1 and (
-                    value[0] == "." or value.replace(".", "").isdigit()
-                )
-
-                if is_bool:
-                    value = value.lower() == "true"
-                elif is_int:
-                    value = int(value)
-                elif is_float:
-                    value = float(value)
-
-                if input_name in request_inputs:
-                    raise ValueError(
-                        f"Input name already exists in request_inputs dictionary: {input_name}"
-                    )
-                request_inputs[input_name] = value
-
-    return request_inputs
-
-
-def _parse_compare_args(subparsers) -> argparse.ArgumentParser:
-    compare = subparsers.add_parser(
-        Subcommand.COMPARE.to_lowercase(),
-        description="Subcommand to generate plots that compare multiple profile runs.",
-    )
-    compare_group = compare.add_argument_group("Input")
-    mx_group = compare_group.add_mutually_exclusive_group(required=False)
-    mx_group.add_argument(
-        "--config",
-        type=Path,
-        default=None,
-        help="The path to the YAML file that specifies plot configurations for "
-        "comparing multiple runs.",
-    )
-    mx_group.add_argument(
-        "-f",
-        "--files",
-        nargs="+",
-        default=[],
-        help="List of paths to the profile export JSON files. Users can specify "
-        "this option instead of the `--config` option if they would like "
-        "GenAI-Perf to generate default plots as well as initial YAML config file.",
-    )
-    compare.set_defaults(func=compare_handler)
-    return compare
-
-
-def _parse_profile_args(subparsers) -> argparse.ArgumentParser:
-    profile = subparsers.add_parser(
-        Subcommand.PROFILE.to_lowercase(),
-        description="Subcommand to profile LLMs and Generative AI models.",
-    )
-    _add_endpoint_args(profile)
-    _add_input_args(profile)
-    _add_image_input_args(profile)
-    _add_profile_args(profile)
-    _add_output_args(profile)
-    _add_other_args(profile)
-    profile.set_defaults(func=profile_handler)
-    return profile
-
-
-### Handlers ###
-
-
-def create_compare_dir() -> None:
-    if not os.path.exists(DEFAULT_COMPARE_DIR):
-        os.mkdir(DEFAULT_COMPARE_DIR)
-
-
-def compare_handler(args: argparse.Namespace):
-    """Handles `compare` subcommand workflow."""
-    if args.files:
-        create_compare_dir()
-        output_dir = Path(f"{DEFAULT_COMPARE_DIR}")
-        PlotConfigParser.create_init_yaml_config(args.files, output_dir)
-        args.config = output_dir / "config.yaml"
-
-    config_parser = PlotConfigParser(args.config)
-    plot_configs = config_parser.generate_configs()
-    plot_manager = PlotManager(plot_configs)
-    plot_manager.generate_plots()
-
-
-def profile_handler(args, extra_args):
-    from genai_perf.wrapper import Profiler
-
-    Profiler.run(args=args, extra_args=extra_args)
-
-
-### Parser Initialization ###
-
-
-def init_parsers():
-    parser = argparse.ArgumentParser(
-        prog="genai-perf",
-        description="CLI to profile LLMs and Generative AI models with Perf Analyzer",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-    )
-    parser.add_argument(
-        "--version",
-        action="version",
-        version="%(prog)s " + __version__,
-        help=f"An option to print the version and exit.",
-    )
-
-    # Add subcommands
-    subparsers = parser.add_subparsers(
-        help="List of subparser commands.", dest="subcommand"
-    )
-    _ = _parse_compare_args(subparsers)
-    _ = _parse_profile_args(subparsers)
-    subparsers.required = True
-
-    return parser
-
-
-def get_passthrough_args_index(argv: list) -> int:
-    if "--" in argv:
-        passthrough_index = argv.index("--")
-        logger.info(f"Detected passthrough args: {argv[passthrough_index + 1:]}")
-    else:
-        passthrough_index = len(argv)
-
-    return passthrough_index
-
-
-def refine_args(
-    parser: argparse.ArgumentParser, args: argparse.Namespace
-) -> argparse.Namespace:
-    if args.subcommand == Subcommand.PROFILE.to_lowercase():
-        args = _infer_prompt_source(args)
-        args = _check_model_args(parser, args)
-        args = _check_conditional_args(parser, args)
-        args = _check_image_input_args(parser, args)
-        args = _check_load_manager_args(args)
-        args = _set_artifact_paths(args)
-    elif args.subcommand == Subcommand.COMPARE.to_lowercase():
-        args = _check_compare_args(parser, args)
-    else:
-        raise ValueError(f"Unknown subcommand: {args.subcommand}")
-
-    return args
-
-
-### Entrypoint ###
-
-
-def parse_args():
-    argv = sys.argv
-
-    parser = init_parsers()
-    passthrough_index = get_passthrough_args_index(argv)
-    args = parser.parse_args(argv[1:passthrough_index])
-    args = refine_args(parser, args)
-
-    return args, argv[passthrough_index + 1 :]
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/plots/__init__.py b/src/c++/perf_analyzer/genai-perf/genai_perf/plots/__init__.py
deleted file mode 100755
index 086616e41..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/plots/__init__.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/usr/bin/env python3
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/plots/base_plot.py b/src/c++/perf_analyzer/genai-perf/genai_perf/plots/base_plot.py
deleted file mode 100755
index 470e0b942..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/plots/base_plot.py
+++ /dev/null
@@ -1,82 +0,0 @@
-#!/usr/bin/env python3
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from pathlib import Path
-from typing import List
-
-import pandas as pd
-from genai_perf.exceptions import GenAIPerfException
-from genai_perf.plots.plot_config import ProfileRunData
-from plotly.graph_objects import Figure
-
-
-class BasePlot:
-    """
-    Base class for plots
-    """
-
-    def __init__(self, data: List[ProfileRunData]) -> None:
-        self._profile_data = data
-
-    def create_plot(
-        self,
-        graph_title: str,
-        x_label: str,
-        y_label: str,
-        width: int,
-        height: int,
-        filename_root: str,
-        output_dir: Path,
-    ) -> None:
-        """
-        Create plot for specific graph type
-        """
-        raise NotImplementedError
-
-    def _create_dataframe(self, x_label: str, y_label: str) -> pd.DataFrame:
-        return pd.DataFrame(
-            {
-                x_label: [prd.x_metric for prd in self._profile_data],
-                y_label: [prd.y_metric for prd in self._profile_data],
-                "Run Name": [prd.name for prd in self._profile_data],
-            }
-        )
-
-    def _generate_parquet(self, df: pd.DataFrame, output_dir: Path, file: str) -> None:
-        filepath = output_dir / f"{file}.gzip"
-        df.to_parquet(filepath, compression="gzip")
-
-    def _generate_graph_file(self, fig: Figure, output_dir: Path, file: str) -> None:
-        if file.endswith("jpeg"):
-            filepath = output_dir / f"{file}"
-            fig.write_image(filepath)
-        elif file.endswith("html"):
-            filepath = output_dir / f"{file}"
-            fig.write_html(filepath)
-        else:
-            extension = file.split(".")[-1]
-            raise GenAIPerfException(f"image file type {extension} is not supported")
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/plots/box_plot.py b/src/c++/perf_analyzer/genai-perf/genai_perf/plots/box_plot.py
deleted file mode 100755
index 38aad36dc..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/plots/box_plot.py
+++ /dev/null
@@ -1,77 +0,0 @@
-#!/usr/bin/env python3
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from pathlib import Path
-from typing import List
-
-import plotly.graph_objects as go
-from genai_perf.plots.base_plot import BasePlot
-from genai_perf.plots.plot_config import ProfileRunData
-
-
-class BoxPlot(BasePlot):
-    """
-    Generate a box plot in jpeg and html format.
-    """
-
-    def __init__(self, data: List[ProfileRunData]) -> None:
-        super().__init__(data)
-
-    def create_plot(
-        self,
-        graph_title: str = "",
-        x_label: str = "",
-        y_label: str = "",
-        width: int = 700,
-        height: int = 450,
-        filename_root: str = "",
-        output_dir: Path = Path(""),
-    ) -> None:
-        fig = go.Figure()
-        for pd in self._profile_data:
-            fig.add_trace(go.Box(y=pd.y_metric, name=pd.name))
-
-        # Update layout and axis labels
-        fig.update_layout(
-            title={
-                "text": f"{graph_title}",
-                "xanchor": "center",
-                "x": 0.5,
-            },
-            width=width,
-            height=height,
-        )
-        fig.update_traces(boxpoints="all")
-        fig.update_xaxes(title_text=x_label, showticklabels=False)
-        fig.update_yaxes(title_text=y_label)
-
-        # Save dataframe as parquet file
-        df = self._create_dataframe(x_label, y_label)
-        self._generate_parquet(df, output_dir, filename_root)
-
-        self._generate_graph_file(fig, output_dir, filename_root + ".html")
-        self._generate_graph_file(fig, output_dir, filename_root + ".jpeg")
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/plots/heat_map.py b/src/c++/perf_analyzer/genai-perf/genai_perf/plots/heat_map.py
deleted file mode 100755
index 7f4dbe166..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/plots/heat_map.py
+++ /dev/null
@@ -1,100 +0,0 @@
-#!/usr/bin/env python3
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from pathlib import Path
-from typing import List
-
-import plotly.graph_objects as go
-from genai_perf.plots.base_plot import BasePlot
-from genai_perf.plots.plot_config import ProfileRunData
-from plotly.subplots import make_subplots
-
-
-class HeatMap(BasePlot):
-    """
-    Generate a heat map in jpeg and html format.
-    """
-
-    def __init__(self, data: List[ProfileRunData]) -> None:
-        super().__init__(data)
-
-    def create_plot(
-        self,
-        graph_title: str = "",
-        x_label: str = "",
-        y_label: str = "",
-        width: int = 700,
-        height: int = 450,
-        filename_root: str = "",
-        output_dir: Path = Path(""),
-    ) -> None:
-        N = len(self._profile_data)
-
-        if N <= 3:
-            n_rows, n_cols = 1, N
-        else:
-            n_rows = (N + 2) // 3
-            n_cols = 3
-
-        fig = make_subplots(
-            rows=n_rows,
-            cols=n_cols,
-            x_title=x_label,
-            y_title=y_label,
-            subplot_titles=[prd.name for prd in self._profile_data],
-        )
-
-        for index, prd in enumerate(self._profile_data):
-            hm = go.Histogram2d(
-                x=prd.x_metric,
-                y=prd.y_metric,
-                coloraxis="coloraxis",
-                name=prd.name,
-            )
-
-            # Calculate the location where the figure should be added in the subplot
-            c_row = int(index / n_cols) + 1
-            c_col = index % n_cols + 1
-            fig.add_trace(hm, c_row, c_col)
-
-        fig.update_layout(
-            title={
-                "text": graph_title,
-                "xanchor": "center",
-                "x": 0.5,
-            },
-            width=width,
-            height=height,
-        )
-
-        # Save dataframe as parquet file
-        df = self._create_dataframe(x_label, y_label)
-        self._generate_parquet(df, output_dir, filename_root)
-
-        # self._generate_parquet(df, filename_root)
-        self._generate_graph_file(fig, output_dir, filename_root + ".html")
-        self._generate_graph_file(fig, output_dir, filename_root + ".jpeg")
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/plots/plot_config.py b/src/c++/perf_analyzer/genai-perf/genai_perf/plots/plot_config.py
deleted file mode 100755
index 2408d0591..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/plots/plot_config.py
+++ /dev/null
@@ -1,57 +0,0 @@
-#!/usr/bin/env python3
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from collections.abc import Sequence
-from dataclasses import dataclass
-from enum import Enum, auto
-from pathlib import Path
-from typing import List, Sequence, Union
-
-
-class PlotType(Enum):
-    SCATTER = auto()
-    BOX = auto()
-    HEATMAP = auto()
-
-
-@dataclass
-class ProfileRunData:
-    name: str
-    x_metric: Sequence[Union[int, float]]
-    y_metric: Sequence[Union[int, float]]
-
-
-@dataclass
-class PlotConfig:
-    title: str
-    data: List[ProfileRunData]
-    x_label: str
-    y_label: str
-    width: int
-    height: int
-    type: PlotType
-    output: Path
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/plots/plot_config_parser.py b/src/c++/perf_analyzer/genai-perf/genai_perf/plots/plot_config_parser.py
deleted file mode 100755
index 00588f6bb..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/plots/plot_config_parser.py
+++ /dev/null
@@ -1,211 +0,0 @@
-#!/usr/bin/env python3
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from pathlib import Path
-from typing import List, Union
-
-import genai_perf.logging as logging
-
-# Skip type checking to avoid mypy error
-# Issue: https://github.com/python/mypy/issues/10632
-import yaml  # type: ignore
-from genai_perf.metrics import Statistics
-from genai_perf.plots.plot_config import PlotConfig, PlotType, ProfileRunData
-from genai_perf.profile_data_parser import LLMProfileDataParser
-from genai_perf.tokenizer import DEFAULT_TOKENIZER, get_tokenizer
-from genai_perf.utils import load_yaml, scale
-
-logger = logging.getLogger(__name__)
-
-
-class PlotConfigParser:
-    """Parses YAML configuration file to generate PlotConfigs."""
-
-    def __init__(self, filename: Path) -> None:
-        self._filename = filename
-
-    def generate_configs(self) -> List[PlotConfig]:
-        """Load YAML configuration file and convert to PlotConfigs."""
-        logger.info(
-            f"Generating plot configurations by parsing {self._filename}. "
-            "This may take a few seconds.",
-        )
-        configs = load_yaml(self._filename)
-
-        plot_configs = []
-        for _, config in configs.items():
-            # Collect profile run data
-            profile_data: List[ProfileRunData] = []
-            for filepath in config["paths"]:
-                stats = self._get_statistics(filepath)
-                profile_data.append(
-                    ProfileRunData(
-                        name=self._get_run_name(Path(filepath)),
-                        x_metric=self._get_metric(stats, config["x_metric"]),
-                        y_metric=self._get_metric(stats, config["y_metric"]),
-                    )
-                )
-
-            plot_configs.append(
-                PlotConfig(
-                    title=config["title"],
-                    data=profile_data,
-                    x_label=config["x_label"],
-                    y_label=config["y_label"],
-                    width=config["width"],
-                    height=config["height"],
-                    type=self._get_plot_type(config["type"]),
-                    output=Path(config["output"]),
-                )
-            )
-
-        return plot_configs
-
-    def _get_statistics(self, filepath: str) -> Statistics:
-        """Extract a single profile run data."""
-        data_parser = LLMProfileDataParser(
-            filename=Path(filepath),
-            tokenizer=get_tokenizer(DEFAULT_TOKENIZER),
-        )
-        load_info = data_parser.get_profile_load_info()
-
-        # TMA-1904: Remove single experiment assumption
-        assert len(load_info) == 1
-        infer_mode, load_level = load_info[0]
-        stats = data_parser.get_statistics(infer_mode, load_level)
-        return stats
-
-    def _get_run_name(self, filepath: Path) -> str:
-        """Construct a profile run name."""
-        if filepath.parent.name:
-            return filepath.parent.name + "/" + filepath.stem
-        return filepath.stem
-
-    def _get_metric(self, stats: Statistics, name: str) -> List[Union[int, float]]:
-        if not name:  # no metric
-            return []
-        elif name == "inter_token_latencies":
-            itls = stats.metrics.data[name]
-            return [scale(x, (1 / 1e6)) for x in itls]  # ns to ms
-        elif name == "token_positions":
-            chunked_itls = getattr(stats.metrics, "_chunked_inter_token_latencies")
-            token_positions: List[Union[int, float]] = []
-            for request_itls in chunked_itls:
-                token_positions += list(range(1, len(request_itls) + 1))
-            return token_positions
-        elif name == "time_to_first_tokens":
-            ttfts = stats.metrics.data[name]
-            return [scale(x, (1 / 1e6)) for x in ttfts]  # ns to ms
-        elif name == "request_latencies":
-            req_latencies = stats.metrics.data[name]
-            return [scale(x, (1 / 1e6)) for x in req_latencies]  # ns to ms
-
-        return stats.metrics.data[name]
-
-    def _get_plot_type(self, plot_type: str) -> PlotType:
-        """Returns the plot type as PlotType object."""
-        if plot_type == "scatter":
-            return PlotType.SCATTER
-        elif plot_type == "box":
-            return PlotType.BOX
-        elif plot_type == "heatmap":
-            return PlotType.HEATMAP
-        else:
-            raise ValueError(
-                "Unknown plot type encountered while parsing YAML configuration. "
-                "Plot type must be either 'scatter', 'box', or 'heatmap'."
-            )
-
-    @staticmethod
-    def create_init_yaml_config(filenames: List[Path], output_dir: Path) -> None:
-        config_str = f"""
-        plot1:
-          title: Time to First Token
-          x_metric: ""
-          y_metric: time_to_first_tokens
-          x_label: Time to First Token (ms)
-          y_label: ""
-          width: {1200 if len(filenames) > 1 else 700}
-          height: 450
-          type: box
-          paths: {[str(f) for f in filenames]}
-          output: {output_dir}
-
-        plot2:
-          title: Request Latency
-          x_metric: ""
-          y_metric: request_latencies
-          x_label: Request Latency (ms)
-          y_label: ""
-          width: {1200 if len(filenames) > 1 else 700}
-          height: 450
-          type: box
-          paths: {[str(f) for f in filenames]}
-          output: {output_dir}
-
-        plot3:
-          title: Distribution of Input Sequence Lengths to Output Sequence Lengths
-          x_metric: input_sequence_lengths
-          y_metric: output_sequence_lengths
-          x_label: Input Sequence Length
-          y_label: Output Sequence Length
-          width: {1200 if len(filenames) > 1 else 700}
-          height: 450
-          type: heatmap
-          paths: {[str(f) for f in filenames]}
-          output: {output_dir}
-
-        plot4:
-          title: Time to First Token vs Input Sequence Lengths
-          x_metric: input_sequence_lengths
-          y_metric: time_to_first_tokens
-          x_label: Input Sequence Length
-          y_label: Time to First Token (ms)
-          width: {1200 if len(filenames) > 1 else 700}
-          height: 450
-          type: scatter
-          paths: {[str(f) for f in filenames]}
-          output: {output_dir}
-
-        plot5:
-          title: Token-to-Token Latency vs Output Token Position
-          x_metric: token_positions
-          y_metric: inter_token_latencies
-          x_label: Output Token Position
-          y_label: Token-to-Token Latency (ms)
-          width: {1200 if len(filenames) > 1 else 700}
-          height: 450
-          type: scatter
-          paths: {[str(f) for f in filenames]}
-          output: {output_dir}
-        """
-
-        filepath = output_dir / "config.yaml"
-        logger.info(f"Creating initial YAML configuration file to {filepath}")
-        config = yaml.safe_load(config_str)
-        with open(str(filepath), "w") as f:
-            yaml.dump(config, f, sort_keys=False)
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/plots/plot_manager.py b/src/c++/perf_analyzer/genai-perf/genai_perf/plots/plot_manager.py
deleted file mode 100755
index e548a7de7..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/plots/plot_manager.py
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env python3
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from typing import List
-
-import genai_perf.logging as logging
-from genai_perf.plots.box_plot import BoxPlot
-from genai_perf.plots.heat_map import HeatMap
-from genai_perf.plots.plot_config import PlotConfig, PlotType
-from genai_perf.plots.scatter_plot import ScatterPlot
-
-logger = logging.getLogger(__name__)
-
-
-class PlotManager:
-    """
-    Manage details around plots generated
-    """
-
-    def __init__(self, plot_configs: List[PlotConfig]) -> None:
-        self._plot_configs = plot_configs
-
-    def _generate_filename(self, title: str) -> str:
-        filename = "_".join(title.lower().split())
-        return filename
-
-    def generate_plots(self) -> None:
-        for plot_config in self._plot_configs:
-            logger.info(f"Generating '{plot_config.title}' plot")
-            if plot_config.type == PlotType.BOX:
-                bp = BoxPlot(plot_config.data)
-                bp.create_plot(
-                    graph_title=plot_config.title,
-                    x_label=plot_config.x_label,
-                    width=plot_config.width,
-                    height=plot_config.height,
-                    filename_root=self._generate_filename(plot_config.title),
-                    output_dir=plot_config.output,
-                )
-
-            elif plot_config.type == PlotType.HEATMAP:
-                hm = HeatMap(plot_config.data)
-                hm.create_plot(
-                    graph_title=plot_config.title,
-                    x_label=plot_config.x_label,
-                    y_label=plot_config.y_label,
-                    width=plot_config.width,
-                    height=plot_config.height,
-                    filename_root=self._generate_filename(plot_config.title),
-                    output_dir=plot_config.output,
-                )
-
-            elif plot_config.type == PlotType.SCATTER:
-                sp = ScatterPlot(plot_config.data)
-                sp.create_plot(
-                    graph_title=plot_config.title,
-                    x_label=plot_config.x_label,
-                    y_label=plot_config.y_label,
-                    width=plot_config.width,
-                    height=plot_config.height,
-                    filename_root=self._generate_filename(plot_config.title),
-                    output_dir=plot_config.output,
-                )
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/plots/scatter_plot.py b/src/c++/perf_analyzer/genai-perf/genai_perf/plots/scatter_plot.py
deleted file mode 100755
index 35dca8fc3..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/plots/scatter_plot.py
+++ /dev/null
@@ -1,82 +0,0 @@
-#!/usr/bin/env python3
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from pathlib import Path
-from typing import List
-
-import plotly.graph_objects as go
-from genai_perf.plots.base_plot import BasePlot
-from genai_perf.plots.plot_config import ProfileRunData
-
-
-class ScatterPlot(BasePlot):
-    """
-    Generate a scatter plot in jpeg and html format.
-    """
-
-    def __init__(self, data: List[ProfileRunData]) -> None:
-        super().__init__(data)
-
-    def create_plot(
-        self,
-        graph_title: str = "",
-        x_label: str = "",
-        y_label: str = "",
-        width: int = 700,
-        height: int = 450,
-        filename_root: str = "",
-        output_dir: Path = Path(""),
-    ) -> None:
-        fig = go.Figure()
-        for pd in self._profile_data:
-            fig.add_trace(
-                go.Scatter(
-                    x=pd.x_metric,
-                    y=pd.y_metric,
-                    mode="markers",
-                    name=pd.name,
-                )
-            )
-
-        fig.update_layout(
-            title={
-                "text": f"{graph_title}",
-                "xanchor": "center",
-                "x": 0.5,
-            },
-            width=width,
-            height=height,
-        )
-        fig.update_xaxes(title_text=f"{x_label}")
-        fig.update_yaxes(title_text=f"{y_label}")
-
-        # Save dataframe as parquet file
-        df = self._create_dataframe(x_label, y_label)
-        self._generate_parquet(df, output_dir, filename_root)
-
-        self._generate_graph_file(fig, output_dir, filename_root + ".html")
-        self._generate_graph_file(fig, output_dir, filename_root + ".jpeg")
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/profile_data_parser/__init__.py b/src/c++/perf_analyzer/genai-perf/genai_perf/profile_data_parser/__init__.py
deleted file mode 100644
index 2e7798c40..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/profile_data_parser/__init__.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from genai_perf.profile_data_parser.llm_profile_data_parser import LLMProfileDataParser
-from genai_perf.profile_data_parser.profile_data_parser import (
-    ProfileDataParser,
-    ResponseFormat,
-)
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/profile_data_parser/llm_profile_data_parser.py b/src/c++/perf_analyzer/genai-perf/genai_perf/profile_data_parser/llm_profile_data_parser.py
deleted file mode 100755
index 183f21fd2..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/profile_data_parser/llm_profile_data_parser.py
+++ /dev/null
@@ -1,299 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import json
-from itertools import tee
-from pathlib import Path
-from typing import Dict, List, Tuple
-
-from genai_perf.metrics import LLMMetrics, Metrics
-from genai_perf.profile_data_parser.profile_data_parser import (
-    ProfileDataParser,
-    ResponseFormat,
-)
-from genai_perf.tokenizer import Tokenizer
-from genai_perf.utils import load_json_str, remove_sse_prefix
-
-
-class LLMProfileDataParser(ProfileDataParser):
-    """A class that calculates and aggregates all the LLM performance statistics
-    across the Perf Analyzer profile results.
-
-    The LLMProfileDataParser class parses profile export JSON file, collects the
-    core LLM performance metrics, and calculates summary statistics for each
-    different Perf Analyzer runs/experiments.
-
-    Example:
-
-      >>> ... # run Perf Analyzer with concurrency level 10
-      >>>
-      >>> from transformers import AutoTokenizer
-      >>>
-      >>> tokenizer = AutoTokenizer.from_pretrained("gpt2")
-      >>> pd = LLMProfileDataParser(
-      >>>     filename="profile_export.json",
-      >>>     tokenizer=tokenizer,
-      >>> )
-      >>> stats = pd.get_statistics(infer_mode="concurrency", level=10)
-      >>>
-      >>> print(stats)  # output: Statistics(avg_time_to_first_token=...)
-      >>> stats.pretty_print()  # Output: time_to_first_token_s: ...
-    """
-
-    def __init__(
-        self,
-        filename: Path,
-        tokenizer: Tokenizer,
-    ) -> None:
-        self._tokenizer = tokenizer
-        super().__init__(filename)
-
-    def _parse_requests(self, requests: dict) -> Metrics:
-        """Parse each requests in profile export data to extract key metrics."""
-        min_req_timestamp, max_res_timestamp = float("inf"), 0
-        request_latencies = []
-        time_to_first_tokens = []
-        inter_token_latencies = []
-        output_token_throughputs_per_request = []
-        input_sequence_lengths = []
-        output_sequence_lengths = []
-        chunked_inter_token_latencies = []
-
-        for request in requests:
-            req_timestamp = request["timestamp"]
-            req_inputs = request["request_inputs"]
-            res_timestamps = request["response_timestamps"]
-            res_outputs = request["response_outputs"]
-
-            self._preprocess_response(res_timestamps, res_outputs)
-
-            # Skip requests with empty response. This happens sometimes when the
-            # model returns a single response with empty string.
-            if not res_timestamps:
-                continue
-
-            # track entire benchmark duration
-            min_req_timestamp = min(min_req_timestamp, req_timestamp)
-            max_res_timestamp = max(max_res_timestamp, res_timestamps[-1])
-
-            # request latencies
-            req_latency_ns = res_timestamps[-1] - req_timestamp
-            request_latencies.append(req_latency_ns)  # nanosec
-            req_latency_s = req_latency_ns / 1e9  # sec
-
-            # time to first token
-            ttft = res_timestamps[0] - req_timestamp
-            time_to_first_tokens.append(ttft)
-
-            # number of input tokens
-            input_seq_len = self._get_input_token_count(req_inputs)
-            input_sequence_lengths.append(input_seq_len)
-
-            # output token throughput per request
-            output_token_counts, total_output_token = self._get_output_token_counts(
-                res_outputs
-            )
-            output_token_throughputs_per_request.append(
-                total_output_token / req_latency_s
-            )
-            output_sequence_lengths.append(total_output_token)
-
-            # inter token latencies
-            if total_output_token > 1:
-                inter_token_latency = (req_latency_ns - ttft) / (total_output_token - 1)
-                inter_token_latencies.append(round(inter_token_latency))
-
-            # The new ITL calculation above loses all token-level ITL information
-            # and as a result breaks ITL vs token position visualization. Keep
-            # the old version of inter token latency as a WAR to preserve the
-            # visualization.
-            chunked_inter_token_latency = []
-            for (t1, _), (t2, n2) in self._pairwise(
-                zip(res_timestamps, output_token_counts)
-            ):
-                # TMA-1676: handle empty first/last responses
-                # if the latter response has zero token (e.g. empty string),
-                # then set it default to one for the sake of inter token latency
-                # calculation and to avoid divide by zero.
-                num_token = 1 if n2 == 0 else n2
-                chunked_inter_token_latency.append(round((t2 - t1) / num_token))
-            chunked_inter_token_latencies.append(chunked_inter_token_latency)
-
-        # request & output token throughput
-        benchmark_duration = (max_res_timestamp - min_req_timestamp) / 1e9  # nanosec
-        request_throughputs = [len(requests) / benchmark_duration]
-        output_token_throughputs = [sum(output_sequence_lengths) / benchmark_duration]
-
-        return LLMMetrics(
-            request_throughputs,
-            request_latencies,
-            time_to_first_tokens,
-            inter_token_latencies,
-            output_token_throughputs,
-            output_token_throughputs_per_request,
-            output_sequence_lengths,
-            input_sequence_lengths,
-            chunked_inter_token_latencies,
-        )
-
-    def _pairwise(self, iterable):
-        """Generate pairs of consecutive elements from the given iterable."""
-        a, b = tee(iterable)
-        next(b, None)
-        return zip(a, b)
-
-    def _preprocess_response(
-        self, res_timestamps: List[int], res_outputs: List[Dict[str, str]]
-    ) -> None:
-        """Helper function to preprocess responses of a request."""
-        if self._service_kind == "openai":
-            # PA sometimes receives multiple SSE responses at once (as a single
-            # response). Handle these responses by merging into a single response.
-            for i in range(len(res_outputs)):
-                response = res_outputs[i]["response"]
-                responses = response.strip().split("\n\n")
-                if len(responses) > 1:
-                    merged_response = load_json_str(remove_sse_prefix(responses[0]))
-                    if (
-                        merged_response["choices"][0]["delta"].get("content", None)
-                        is None
-                    ):
-                        merged_response["choices"][0]["delta"]["content"] = ""
-                    for r in responses[1:]:
-                        text = self._extract_openai_text_output(r)
-                        merged_response["choices"][0]["delta"]["content"] += text
-
-                    res_outputs[i] = {"response": json.dumps(merged_response)}
-
-            # Remove responses without any content
-            indices_to_remove = []
-            for idx, out in enumerate(res_outputs):
-                if self._is_openai_empty_response(out["response"]):
-                    indices_to_remove.append(idx)
-            indices_to_remove.sort(reverse=True)
-            for index in indices_to_remove:
-                res_timestamps.pop(index)
-                res_outputs.pop(index)
-
-    def _get_input_token_count(self, req_inputs: dict) -> int:
-        """Deserialize the request input and return tokenized inputs."""
-        if self._service_kind == "triton":
-            input_text = req_inputs["text_input"]
-        elif self._service_kind == "openai":
-            input_text = self._get_openai_input_text(req_inputs)
-        else:
-            raise ValueError(f"Unknown service kind: '{self._service_kind}'.")
-
-        return len(self._tokenizer.encode(input_text))
-
-    def _get_openai_input_text(self, req_inputs: dict) -> str:
-        """Tokenize the OpenAI request input texts."""
-        payload = load_json_str(req_inputs["payload"])
-        if self._response_format == ResponseFormat.OPENAI_CHAT_COMPLETIONS:
-            return payload["messages"][0]["content"]
-        elif self._response_format == ResponseFormat.OPENAI_COMPLETIONS:
-            return payload["prompt"]
-        elif self._response_format == ResponseFormat.OPENAI_VISION:
-            content = payload["messages"][0]["content"]
-            return " ".join(c["text"] for c in content if c["type"] == "text")
-        else:
-            raise ValueError(
-                "Failed to parse OpenAI request input in profile export file."
-            )
-
-    def _get_output_token_counts(
-        self, res_outputs: List[Dict]
-    ) -> Tuple[List[int], int]:
-        """Return response-level token counts and total token count."""
-        if self._service_kind == "triton":
-            output_texts = self._get_triton_output_tokens(res_outputs)
-        elif self._service_kind == "openai":
-            output_texts = self._get_openai_output_tokens(res_outputs)
-        else:
-            raise ValueError(f"Unknown service kind: '{self._service_kind}'.")
-
-        full_text_token_count = len(self._tokenizer.encode("".join(output_texts)))
-
-        output_tokens = self._get_response_output_tokens(output_texts)
-        output_token_counts = list(map(len, output_tokens))
-        return output_token_counts, full_text_token_count
-
-    def _get_triton_output_tokens(self, res_outputs: List[Dict]) -> List[str]:
-        """Return a list of Triton response texts."""
-        return [r["text_output"] for r in res_outputs]
-
-    def _get_openai_output_tokens(self, res_outputs: List[Dict]) -> List[str]:
-        """Return a list of OpenAI response texts."""
-        output_texts = []
-        for output in res_outputs:
-            text = self._extract_openai_text_output(output["response"])
-            output_texts.append(text)
-        return output_texts
-
-    def _get_response_output_tokens(self, output_texts: List[str]) -> List[List[int]]:
-        """Return a list of response output tokens."""
-        # Exclamation mark trick forces the llama tokenization to consistently
-        # start each output with a specific token which allows us to safely skip
-        # the first token of every tokenized output and get only the ones that
-        # are returned by the model
-        encodings = self._tokenizer(["!" + txt for txt in output_texts])
-        return [out[1:] for out in encodings.data["input_ids"]]
-
-    def _extract_openai_text_output(self, response: str) -> str:
-        """Extracts text/content of the OpenAI response object."""
-        response = remove_sse_prefix(response)
-
-        if response == "[DONE]":
-            return ""
-
-        data = load_json_str(response)
-        completions = data["choices"][0]
-
-        text_output = ""
-        if "object" not in data:
-            # FIXME: TPA-47 workaround for vLLM not following OpenAI Completions
-            # API specification when streaming, missing 'object' field:
-            # https://platform.openai.com/docs/api-reference/completions
-            text_output = completions.get("text", "")
-        elif data["object"] == "text_completion":  # legacy
-            text_output = completions.get("text", "")
-        elif data["object"] == "chat.completion":  # non-streaming
-            text_output = completions["message"].get("content", "")
-        elif data["object"] == "chat.completion.chunk":  # streaming
-            text_output = completions["delta"].get("content", "")
-        else:
-            obj_type = data["object"]
-            raise ValueError(f"Unknown OpenAI response object type '{obj_type}'.")
-        return text_output
-
-    def _is_openai_empty_response(self, response: str) -> bool:
-        """Returns true if the response is an openai response with no content (or empty content)"""
-        text = self._extract_openai_text_output(response)
-        if text:
-            return False
-        return True
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/profile_data_parser/profile_data_parser.py b/src/c++/perf_analyzer/genai-perf/genai_perf/profile_data_parser/profile_data_parser.py
deleted file mode 100755
index 74eb48a23..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/profile_data_parser/profile_data_parser.py
+++ /dev/null
@@ -1,152 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from enum import Enum, auto
-from pathlib import Path
-from typing import List, Tuple
-
-from genai_perf.metrics import Metrics, Statistics
-from genai_perf.utils import load_json
-
-
-class ResponseFormat(Enum):
-    HUGGINGFACE_RANKINGS = auto()
-    OPENAI_CHAT_COMPLETIONS = auto()
-    OPENAI_COMPLETIONS = auto()
-    OPENAI_EMBEDDINGS = auto()
-    OPENAI_VISION = auto()
-    RANKINGS = auto()
-    TRITON = auto()
-
-
-class ProfileDataParser:
-    """Base profile data parser class that reads the profile data JSON file to
-    extract core metrics and calculate various performance statistics.
-    """
-
-    def __init__(self, filename: Path) -> None:
-        data = load_json(filename)
-        self._get_profile_metadata(data)
-        self._parse_profile_data(data)
-
-    def _get_profile_metadata(self, data: dict) -> None:
-        self._service_kind = data["service_kind"]
-        if self._service_kind == "openai":
-            if data["endpoint"] == "rerank":
-                self._response_format = ResponseFormat.HUGGINGFACE_RANKINGS
-            elif data["endpoint"] == "v1/chat/completions":
-                # (TPA-66) add PA metadata to deduce the response format instead
-                # of parsing the request input payload in profile export json
-                # file.
-                request = data["experiments"][0]["requests"][0]
-                request_input = request["request_inputs"]["payload"]
-                if "image_url" in request_input:
-                    self._response_format = ResponseFormat.OPENAI_VISION
-                else:
-                    self._response_format = ResponseFormat.OPENAI_CHAT_COMPLETIONS
-            elif data["endpoint"] == "v1/completions":
-                self._response_format = ResponseFormat.OPENAI_COMPLETIONS
-            elif data["endpoint"] == "v1/embeddings":
-                self._response_format = ResponseFormat.OPENAI_EMBEDDINGS
-            elif data["endpoint"] == "v1/ranking":
-                self._response_format = ResponseFormat.RANKINGS
-            else:
-                # (TPA-66) add PA metadata to handle this case
-                # When endpoint field is either empty or custom endpoint, fall
-                # back to parsing the response to extract the response format.
-                request = data["experiments"][0]["requests"][0]
-                request_input = request["request_inputs"]["payload"]
-                response = request["response_outputs"][0]["response"]
-                if "chat.completion" in response:
-                    if "image_url" in request_input:
-                        self._response_format = ResponseFormat.OPENAI_VISION
-                    else:
-                        self._response_format = ResponseFormat.OPENAI_CHAT_COMPLETIONS
-                elif "text_completion" in response:
-                    self._response_format = ResponseFormat.OPENAI_COMPLETIONS
-                elif "embedding" in response:
-                    self._response_format = ResponseFormat.OPENAI_EMBEDDINGS
-                elif "ranking" in response:
-                    self._response_format = ResponseFormat.RANKINGS
-                else:
-                    raise RuntimeError("Unknown OpenAI response format.")
-
-        elif self._service_kind == "triton":
-            self._response_format = ResponseFormat.TRITON
-        else:
-            raise ValueError(f"Unknown service kind: {self._service_kind}")
-
-    def _parse_profile_data(self, data: dict) -> None:
-        """Parse through the entire profile data to collect statistics."""
-        self._profile_results = {}
-        for experiment in data["experiments"]:
-            infer_mode = experiment["experiment"]["mode"]
-            load_level = experiment["experiment"]["value"]
-            requests = experiment["requests"]
-
-            metrics = self._parse_requests(requests)
-
-            # aggregate and calculate statistics
-            statistics = Statistics(metrics)
-            self._profile_results[(infer_mode, str(load_level))] = statistics
-
-    def _parse_requests(self, requests: dict) -> Metrics:
-        """Parse each request in profile data to extract core metrics."""
-        min_req_timestamp, max_res_timestamp = float("inf"), 0
-        request_latencies = []
-
-        for request in requests:
-            req_timestamp = request["timestamp"]
-            res_timestamps = request["response_timestamps"]
-
-            # track entire benchmark duration
-            min_req_timestamp = min(min_req_timestamp, req_timestamp)
-            max_res_timestamp = max(max_res_timestamp, res_timestamps[-1])
-
-            # request latencies
-            req_latency = res_timestamps[-1] - req_timestamp
-            request_latencies.append(req_latency)
-
-        # request throughput
-        benchmark_duration = (max_res_timestamp - min_req_timestamp) / 1e9  # to seconds
-        request_throughputs = [len(requests) / benchmark_duration]
-
-        return Metrics(
-            request_throughputs,
-            request_latencies,
-        )
-
-    def get_statistics(self, infer_mode: str, load_level: str) -> Statistics:
-        """Return profile statistics if it exists."""
-        if (infer_mode, load_level) not in self._profile_results:
-            raise KeyError(f"Profile with {infer_mode}={load_level} does not exist.")
-        return self._profile_results[(infer_mode, load_level)]
-
-    def get_profile_load_info(self) -> List[Tuple[str, str]]:
-        """Return available (infer_mode, load_level) tuple keys."""
-        return [k for k, _ in self._profile_results.items()]
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/tokenizer.py b/src/c++/perf_analyzer/genai-perf/genai_perf/tokenizer.py
deleted file mode 100644
index 052a478e5..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/tokenizer.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import contextlib
-import io
-from typing import List
-
-from genai_perf.exceptions import GenAIPerfException
-
-# Silence tokenizer warning on import
-with contextlib.redirect_stdout(io.StringIO()) as stdout, contextlib.redirect_stderr(
-    io.StringIO()
-) as stderr:
-    from transformers import AutoTokenizer, BatchEncoding
-    from transformers import logging as token_logger
-
-    token_logger.set_verbosity_error()
-
-DEFAULT_TOKENIZER = "hf-internal-testing/llama-tokenizer"
-
-
-class Tokenizer:
-    """
-    A small wrapper class around Huggingface Tokenizer
-    """
-
-    def __init__(self, name: str) -> None:
-        """
-        Initialize by downloading the tokenizer from Huggingface.co
-        """
-        try:
-            # Silence tokenizer warning on first use
-            with contextlib.redirect_stdout(
-                io.StringIO()
-            ) as stdout, contextlib.redirect_stderr(io.StringIO()) as stderr:
-                tokenizer = AutoTokenizer.from_pretrained(name)
-        except Exception as e:
-            raise GenAIPerfException(e)
-
-        self._tokenizer = tokenizer
-
-        # default tokenizer parameters for __call__, encode, decode methods
-        self._call_args = {"add_special_tokens": False}
-        self._encode_args = {"add_special_tokens": False}
-        self._decode_args = {"skip_special_tokens": True}
-
-    def __call__(self, text, **kwargs) -> BatchEncoding:
-        self._call_args.update(kwargs)
-        return self._tokenizer(text, **self._call_args)
-
-    def encode(self, text, **kwargs) -> List[int]:
-        self._encode_args.update(kwargs)
-        return self._tokenizer.encode(text, **self._encode_args)
-
-    def decode(self, token_ids, **kwargs) -> str:
-        self._decode_args.update(kwargs)
-        return self._tokenizer.decode(token_ids, **self._decode_args)
-
-    def __repr__(self) -> str:
-        return self._tokenizer.__repr__()
-
-
-def get_tokenizer(tokenizer_model: str) -> Tokenizer:
-    """
-    Return tokenizer for the given model name
-    """
-    return Tokenizer(tokenizer_model)
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/utils.py b/src/c++/perf_analyzer/genai-perf/genai_perf/utils.py
deleted file mode 100644
index 4b625352a..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/utils.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import json
-from enum import Enum
-from pathlib import Path
-from typing import Any, Dict, List, Optional, Type
-
-import genai_perf.logging as logging
-
-# Skip type checking to avoid mypy error
-# Issue: https://github.com/python/mypy/issues/10632
-import yaml  # type: ignore
-from PIL import Image
-
-logger = logging.getLogger(__name__)
-
-
-def encode_image(img: Image, format: str):
-    """Encodes an image into base64 encoding."""
-    # Lazy import for vision related endpoints
-    import base64
-    from io import BytesIO
-
-    # JPEG does not support P or RGBA mode (commonly used for PNG) so it needs
-    # to be converted to RGB before an image can be saved as JPEG format.
-    if format == "JPEG" and img.mode != "RGB":
-        img = img.convert("RGB")
-
-    buffered = BytesIO()
-    img.save(buffered, format=format)
-    return base64.b64encode(buffered.getvalue()).decode("utf-8")
-
-
-def remove_sse_prefix(msg: str) -> str:
-    prefix = "data: "
-    if msg.startswith(prefix):
-        return msg[len(prefix) :].strip()
-    return msg.strip()
-
-
-def load_yaml(filepath: Path) -> Dict[str, Any]:
-    with open(str(filepath)) as f:
-        configs = yaml.safe_load(f)
-    return configs
-
-
-def load_json(filepath: Path) -> Dict[str, Any]:
-    with open(str(filepath), encoding="utf-8", errors="ignore") as f:
-        content = f.read()
-        return load_json_str(content)
-
-
-def load_json_str(json_str: str) -> Dict[str, Any]:
-    try:
-        return json.loads(json_str)
-    except json.JSONDecodeError:
-        snippet = json_str[:200] + ("..." if len(json_str) > 200 else "")
-        logger.error("Failed to parse JSON string: '%s'", snippet)
-        raise
-
-
-def remove_file(file: Path) -> None:
-    if file.is_file():
-        file.unlink()
-
-
-def convert_option_name(name: str) -> str:
-    return name.replace("_", "-")
-
-
-def get_enum_names(enum: Type[Enum]) -> List:
-    names = []
-    for e in enum:
-        names.append(e.name.lower())
-    return names
-
-
-def get_enum_entry(name: str, enum: Type[Enum]) -> Optional[Enum]:
-    for e in enum:
-        if e.name.lower() == name.lower():
-            return e
-    return None
-
-
-def scale(value, factor):
-    return value * factor
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/wrapper.py b/src/c++/perf_analyzer/genai-perf/genai_perf/wrapper.py
deleted file mode 100644
index 76ef3e321..000000000
--- a/src/c++/perf_analyzer/genai-perf/genai_perf/wrapper.py
+++ /dev/null
@@ -1,147 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import subprocess
-from argparse import Namespace
-from typing import List, Optional
-
-import genai_perf.logging as logging
-import genai_perf.utils as utils
-from genai_perf.constants import DEFAULT_GRPC_URL, DEFAULT_INPUT_DATA_JSON
-from genai_perf.llm_inputs.llm_inputs import OutputFormat
-
-logger = logging.getLogger(__name__)
-
-
-class Profiler:
-    @staticmethod
-    def add_protocol_args(args: Namespace) -> List[str]:
-        cmd = []
-        if args.service_kind == "triton":
-            cmd += ["-i", "grpc", "--streaming"]
-            if args.u is None:  # url
-                cmd += ["-u", f"{DEFAULT_GRPC_URL}"]
-            if args.output_format == OutputFormat.TENSORRTLLM:
-                cmd += ["--shape", "max_tokens:1", "--shape", "text_input:1"]
-        elif args.service_kind == "openai":
-            cmd += ["-i", "http"]
-        return cmd
-
-    @staticmethod
-    def add_inference_load_args(args: Namespace) -> List[str]:
-        cmd = []
-        if args.concurrency:
-            cmd += ["--concurrency-range", f"{args.concurrency}"]
-        elif args.request_rate:
-            cmd += ["--request-rate-range", f"{args.request_rate}"]
-        return cmd
-
-    @staticmethod
-    def build_cmd(args: Namespace, extra_args: Optional[List[str]] = None) -> List[str]:
-        skip_args = [
-            "artifact_dir",
-            "backend",
-            "batch_size",
-            "concurrency",
-            "endpoint_type",
-            "extra_inputs",
-            "formatted_model_name",
-            "func",
-            "generate_plots",
-            "input_dataset",
-            "input_file",
-            "input_format",
-            "model",
-            "model_selection_strategy",
-            "num_prompts",
-            "output_format",
-            "output_tokens_mean_deterministic",
-            "output_tokens_mean",
-            "output_tokens_stddev",
-            "prompt_source",
-            "random_seed",
-            "request_rate",
-            # The 'streaming' passed in to this script is to determine if the
-            # LLM response should be streaming. That is different than the
-            # 'streaming' that PA takes, which means something else (and is
-            # required for decoupled models into triton).
-            "streaming",
-            "synthetic_input_tokens_mean",
-            "synthetic_input_tokens_stddev",
-            "subcommand",
-            "tokenizer",
-            "image_width_mean",
-            "image_width_stddev",
-            "image_height_mean",
-            "image_height_stddev",
-            "image_format",
-        ]
-
-        utils.remove_file(args.profile_export_file)
-
-        cmd = [
-            f"perf_analyzer",
-            f"-m",
-            f"{args.formatted_model_name}",
-            f"--async",
-            f"--input-data",
-            f"{args.artifact_dir / DEFAULT_INPUT_DATA_JSON}",
-        ]
-        for arg, value in vars(args).items():
-            if arg in skip_args:
-                pass
-            elif value is None:
-                pass
-            elif value is False:
-                pass
-            elif value is True:
-                if len(arg) == 1:
-                    cmd += [f"-{arg}"]
-                else:
-                    cmd += [f"--{arg}"]
-            else:
-                if len(arg) == 1:
-                    cmd += [f"-{arg}", f"{value}"]
-                else:
-                    arg = utils.convert_option_name(arg)
-                    cmd += [f"--{arg}", f"{value}"]
-
-        cmd += Profiler.add_protocol_args(args)
-        cmd += Profiler.add_inference_load_args(args)
-
-        if extra_args is not None:
-            for arg in extra_args:
-                cmd += [f"{arg}"]
-        return cmd
-
-    @staticmethod
-    def run(args: Namespace, extra_args: Optional[List[str]]) -> None:
-        cmd = Profiler.build_cmd(args, extra_args)
-        logger.info(f"Running Perf Analyzer : '{' '.join(cmd)}'")
-        if args and args.verbose:
-            subprocess.run(cmd, check=True, stdout=None)
-        else:
-            subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL)
diff --git a/src/c++/perf_analyzer/genai-perf/pyproject.toml b/src/c++/perf_analyzer/genai-perf/pyproject.toml
deleted file mode 100644
index f1f78a7e2..000000000
--- a/src/c++/perf_analyzer/genai-perf/pyproject.toml
+++ /dev/null
@@ -1,97 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-[project]
-name = "genai-perf"
-readme = "README.md"
-description = "GenAI Perf Analyzer CLI - CLI tool to simplify profiling LLMs and Generative AI models with Perf Analyzer"
-dynamic = ["version"]
-classifiers = [
-    "Development Status :: 3 - Alpha",
-    "Intended Audience :: Science/Research",
-    "Intended Audience :: Developers",
-    "Topic :: Software Development",
-    "Topic :: Scientific/Engineering",
-    "Programming Language :: Python",
-    "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.8",
-    "Programming Language :: Python :: 3.10",
-    "Operating System :: Unix",
-]
-authors = []
-maintainers = []
-keywords = []
-requires-python = ">=3.8,<4"
-dependencies = [
-  "numpy<2",
-  "pytest",
-  "rich",
-  "transformers",
-  "plotly",
-  "pandas",
-  "kaleido",
-  "statsmodels",
-  "pyarrow",
-  "fastparquet",
-  "pytest-mock",
-  "pyyaml",
-  "responses",
-  "pillow",
-]
-
-# CLI Entrypoint
-[project.scripts]
-genai-perf = "genai_perf.main:main"
-
-[project.urls]
-"Homepage" = "https://github.com/triton-inference-server/client"
-"Bug Tracker" = "https://github.com/triton-inference-server/client/issues"
-
-# Build
-[build-system]
-requires = ["hatchling"]
-build-backend = "hatchling.build"
-
-[tool.hatch.version]
-path = "genai_perf/__init__.py"
-
-# Pre-commit hook tool configs
-[tool.codespell]
-# note: pre-commit passes explicit lists of files here, which this skip file list doesn't override -
-# this is only to allow you to run codespell interactively
-skip = "./.git,./.github"
-# ignore short words, and typename parameters like OffsetT
-ignore-regex = "\\b(.{1,4}|[A-Z]\\w*T)\\b"
-# use the 'clear' dictionary for unambiguous spelling mistakes
-builtin = "clear"
-# disable warnings about binary files and wrong encoding
-quiet-level = 3
-
-# Linting/formatting
-[tool.ruff]
-# Same as Black.
-line-length = 88
-indent-width = 4
diff --git a/src/c++/perf_analyzer/genai-perf/tests/__init__.py b/src/c++/perf_analyzer/genai-perf/tests/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_artifacts.py b/src/c++/perf_analyzer/genai-perf/tests/test_artifacts.py
deleted file mode 100644
index cdcc4afc9..000000000
--- a/src/c++/perf_analyzer/genai-perf/tests/test_artifacts.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from argparse import Namespace
-from pathlib import Path
-
-import pytest
-from genai_perf.main import create_artifacts_dirs
-
-
-@pytest.fixture
-def mock_makedirs(mocker):
-    return mocker.patch("os.makedirs")
-
-
-def test_create_artifacts_dirs_custom_path(mock_makedirs):
-    artifacts_dir_path = "/genai_perf_artifacts"
-    mock_args = Namespace(artifact_dir=Path(artifacts_dir_path), generate_plots=True)
-    create_artifacts_dirs(mock_args)
-    mock_makedirs.assert_any_call(
-        Path(artifacts_dir_path), exist_ok=True
-    ), f"Expected os.makedirs to create artifacts directory inside {artifacts_dir_path} path."
-    mock_makedirs.assert_any_call(
-        Path(artifacts_dir_path) / "plots", exist_ok=True
-    ), f"Expected os.makedirs to create plots directory inside {artifacts_dir_path}/plots path."
-    assert mock_makedirs.call_count == 2
-
-
-def test_create_artifacts_disable_generate_plots(mock_makedirs):
-    artifacts_dir_path = "/genai_perf_artifacts"
-    mock_args = Namespace(artifact_dir=Path(artifacts_dir_path))
-    create_artifacts_dirs(mock_args)
-    mock_makedirs.assert_any_call(
-        Path(artifacts_dir_path), exist_ok=True
-    ), f"Expected os.makedirs to create artifacts directory inside {artifacts_dir_path} path."
-    assert mock_makedirs.call_count == 1
diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_cli.py b/src/c++/perf_analyzer/genai-perf/tests/test_cli.py
deleted file mode 100644
index 2ef5d52ba..000000000
--- a/src/c++/perf_analyzer/genai-perf/tests/test_cli.py
+++ /dev/null
@@ -1,855 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import argparse
-from pathlib import Path
-
-import genai_perf.logging as logging
-import pytest
-from genai_perf import __version__, parser
-from genai_perf.llm_inputs.llm_inputs import (
-    ImageFormat,
-    ModelSelectionStrategy,
-    OutputFormat,
-    PromptSource,
-)
-from genai_perf.llm_inputs.synthetic_image_generator import ImageFormat
-from genai_perf.parser import PathType
-
-
-class TestCLIArguments:
-    # ================================================
-    # PROFILE COMMAND
-    # ================================================
-    expected_help_output = (
-        "CLI to profile LLMs and Generative AI models with Perf Analyzer"
-    )
-    expected_version_output = f"genai-perf {__version__}"
-
-    @pytest.mark.parametrize(
-        "args, expected_output",
-        [
-            (["-h"], expected_help_output),
-            (["--help"], expected_help_output),
-            (["--version"], expected_version_output),
-        ],
-    )
-    def test_help_version_arguments_output_and_exit(
-        self, monkeypatch, args, expected_output, capsys
-    ):
-        monkeypatch.setattr("sys.argv", ["genai-perf"] + args)
-
-        with pytest.raises(SystemExit) as excinfo:
-            _ = parser.parse_args()
-
-        # Check that the exit was successful
-        assert excinfo.value.code == 0
-
-        # Capture that the correct message was displayed
-        captured = capsys.readouterr()
-        assert expected_output in captured.out
-
-    @pytest.mark.parametrize(
-        "arg, expected_attributes",
-        [
-            (
-                ["--artifact-dir", "test_artifact_dir"],
-                {"artifact_dir": Path("test_artifact_dir")},
-            ),
-            (
-                [
-                    "--batch-size",
-                    "5",
-                    "--endpoint-type",
-                    "embeddings",
-                    "--service-kind",
-                    "openai",
-                ],
-                {"batch_size": 5},
-            ),
-            (
-                [
-                    "-b",
-                    "5",
-                    "--endpoint-type",
-                    "embeddings",
-                    "--service-kind",
-                    "openai",
-                ],
-                {"batch_size": 5},
-            ),
-            (["--concurrency", "3"], {"concurrency": 3}),
-            (
-                ["--endpoint-type", "completions", "--service-kind", "openai"],
-                {"endpoint": "v1/completions"},
-            ),
-            (
-                ["--endpoint-type", "chat", "--service-kind", "openai"],
-                {"endpoint": "v1/chat/completions"},
-            ),
-            (
-                ["--endpoint-type", "rankings", "--service-kind", "openai"],
-                {"endpoint": "v1/ranking"},
-            ),
-            (
-                [
-                    "--endpoint-type",
-                    "chat",
-                    "--service-kind",
-                    "openai",
-                    "--endpoint",
-                    "custom/address",
-                ],
-                {"endpoint": "custom/address"},
-            ),
-            (
-                [
-                    "--endpoint-type",
-                    "chat",
-                    "--service-kind",
-                    "openai",
-                    "--endpoint",
-                    "   /custom/address",
-                ],
-                {"endpoint": "custom/address"},
-            ),
-            (
-                [
-                    "--endpoint-type",
-                    "completions",
-                    "--service-kind",
-                    "openai",
-                    "--endpoint",
-                    "custom/address",
-                ],
-                {"endpoint": "custom/address"},
-            ),
-            (
-                ["--extra-inputs", "test_key:test_value"],
-                {"extra_inputs": ["test_key:test_value"]},
-            ),
-            (
-                [
-                    "--extra-inputs",
-                    "test_key:5",
-                    "--extra-inputs",
-                    "another_test_key:6",
-                ],
-                {"extra_inputs": ["test_key:5", "another_test_key:6"]},
-            ),
-            (
-                [
-                    "--extra-inputs",
-                    '{"name": "Wolverine","hobbies": ["hacking", "slashing"],"address": {"street": "1407 Graymalkin Lane, Salem Center","city": "NY"}}',
-                ],
-                {
-                    "extra_inputs": [
-                        '{"name": "Wolverine","hobbies": ["hacking", "slashing"],"address": {"street": "1407 Graymalkin Lane, Salem Center","city": "NY"}}'
-                    ]
-                },
-            ),
-            (["--input-dataset", "openorca"], {"input_dataset": "openorca"}),
-            (["--measurement-interval", "100"], {"measurement_interval": 100}),
-            (
-                ["--model-selection-strategy", "random"],
-                {"model_selection_strategy": ModelSelectionStrategy.RANDOM},
-            ),
-            (["--num-prompts", "101"], {"num_prompts": 101}),
-            (
-                ["--output-tokens-mean", "6"],
-                {"output_tokens_mean": 6},
-            ),
-            (
-                ["--output-tokens-mean", "6", "--output-tokens-stddev", "7"],
-                {"output_tokens_stddev": 7},
-            ),
-            (
-                ["--output-tokens-mean", "6", "--output-tokens-mean-deterministic"],
-                {"output_tokens_mean_deterministic": True},
-            ),
-            (["-p", "100"], {"measurement_interval": 100}),
-            (
-                ["--profile-export-file", "test.json"],
-                {
-                    "profile_export_file": Path(
-                        "artifacts/test_model-triton-tensorrtllm-concurrency1/test.json"
-                    )
-                },
-            ),
-            (["--random-seed", "8"], {"random_seed": 8}),
-            (["--request-rate", "9.0"], {"request_rate": 9.0}),
-            (["-s", "99.5"], {"stability_percentage": 99.5}),
-            (["--service-kind", "triton"], {"service_kind": "triton"}),
-            (
-                ["--service-kind", "openai", "--endpoint-type", "chat"],
-                {"service_kind": "openai", "endpoint": "v1/chat/completions"},
-            ),
-            (["--stability-percentage", "99.5"], {"stability_percentage": 99.5}),
-            (["--streaming"], {"streaming": True}),
-            (
-                ["--synthetic-input-tokens-mean", "6"],
-                {"synthetic_input_tokens_mean": 6},
-            ),
-            (
-                ["--synthetic-input-tokens-stddev", "7"],
-                {"synthetic_input_tokens_stddev": 7},
-            ),
-            (
-                ["--image-width-mean", "123"],
-                {"image_width_mean": 123},
-            ),
-            (
-                ["--image-width-stddev", "123"],
-                {"image_width_stddev": 123},
-            ),
-            (
-                ["--image-height-mean", "456"],
-                {"image_height_mean": 456},
-            ),
-            (
-                ["--image-height-stddev", "456"],
-                {"image_height_stddev": 456},
-            ),
-            (["--image-format", "png"], {"image_format": ImageFormat.PNG}),
-            (["-v"], {"verbose": True}),
-            (["--verbose"], {"verbose": True}),
-            (["-u", "test_url"], {"u": "test_url"}),
-            (["--url", "test_url"], {"u": "test_url"}),
-        ],
-    )
-    def test_non_file_flags_parsed(self, monkeypatch, arg, expected_attributes, capsys):
-        logging.init_logging()
-        combined_args = ["genai-perf", "profile", "--model", "test_model"] + arg
-        monkeypatch.setattr("sys.argv", combined_args)
-        args, _ = parser.parse_args()
-
-        # Check that the attributes are set correctly
-        for key, value in expected_attributes.items():
-            assert getattr(args, key) == value
-
-        # Check that nothing was printed as a byproduct of parsing the arguments
-        captured = capsys.readouterr()
-        assert captured.out == ""
-
-    @pytest.mark.parametrize(
-        "models, expected_model_list, formatted_name",
-        [
-            (
-                ["--model", "test_model_A"],
-                {"model": ["test_model_A"]},
-                {"formatted_model_name": "test_model_A"},
-            ),
-            (
-                ["--model", "test_model_A", "test_model_B"],
-                {"model": ["test_model_A", "test_model_B"]},
-                {"formatted_model_name": "test_model_A_multi"},
-            ),
-            (
-                ["--model", "test_model_A", "test_model_B", "test_model_C"],
-                {"model": ["test_model_A", "test_model_B", "test_model_C"]},
-                {"formatted_model_name": "test_model_A_multi"},
-            ),
-            (
-                ["--model", "test_model_A:math", "test_model_B:embedding"],
-                {"model": ["test_model_A:math", "test_model_B:embedding"]},
-                {"formatted_model_name": "test_model_A:math_multi"},
-            ),
-        ],
-    )
-    def test_multiple_model_args(
-        self, monkeypatch, models, expected_model_list, formatted_name, capsys
-    ):
-        logging.init_logging()
-        combined_args = ["genai-perf", "profile"] + models
-        monkeypatch.setattr("sys.argv", combined_args)
-        args, _ = parser.parse_args()
-
-        # Check that models are handled correctly
-        for key, value in expected_model_list.items():
-            assert getattr(args, key) == value
-
-        # Check that the formatted_model_name is correctly generated
-        for key, value in formatted_name.items():
-            assert getattr(args, key) == value
-
-        # Check that nothing was printed as a byproduct of parsing the arguments
-        captured = capsys.readouterr()
-        assert captured.out == ""
-
-    def test_file_flags_parsed(self, monkeypatch, mocker):
-        _ = mocker.patch("os.path.isfile", return_value=True)
-        combined_args = [
-            "genai-perf",
-            "profile",
-            "--model",
-            "test_model",
-            "--input-file",
-            "fakefile.txt",
-        ]
-        monkeypatch.setattr("sys.argv", combined_args)
-        args, _ = parser.parse_args()
-        filepath, pathtype = args.input_file
-        assert filepath == Path(
-            "fakefile.txt"
-        ), "The file argument should be the path to the file"
-        assert pathtype == PathType.FILE
-
-    @pytest.mark.parametrize(
-        "arg, expected_path",
-        [
-            (
-                ["--service-kind", "openai", "--endpoint-type", "chat"],
-                "artifacts/test_model-openai-chat-concurrency1",
-            ),
-            (
-                ["--service-kind", "openai", "--endpoint-type", "completions"],
-                "artifacts/test_model-openai-completions-concurrency1",
-            ),
-            (
-                ["--service-kind", "openai", "--endpoint-type", "rankings"],
-                "artifacts/test_model-openai-rankings-concurrency1",
-            ),
-            (
-                ["--service-kind", "triton", "--backend", "tensorrtllm"],
-                "artifacts/test_model-triton-tensorrtllm-concurrency1",
-            ),
-            (
-                ["--service-kind", "triton", "--backend", "vllm"],
-                "artifacts/test_model-triton-vllm-concurrency1",
-            ),
-            (
-                [
-                    "--service-kind",
-                    "triton",
-                    "--backend",
-                    "vllm",
-                    "--concurrency",
-                    "32",
-                ],
-                "artifacts/test_model-triton-vllm-concurrency32",
-            ),
-        ],
-    )
-    def test_default_profile_export_filepath(
-        self, monkeypatch, arg, expected_path, capsys
-    ):
-        logging.init_logging()
-        combined_args = ["genai-perf", "profile", "--model", "test_model"] + arg
-        monkeypatch.setattr("sys.argv", combined_args)
-        args, _ = parser.parse_args()
-
-        assert args.artifact_dir == Path(expected_path)
-        captured = capsys.readouterr()
-        assert captured.out == ""
-
-    @pytest.mark.parametrize(
-        "arg, expected_path, expected_output",
-        [
-            (
-                ["--model", "strange/test_model"],
-                "artifacts/strange_test_model-triton-tensorrtllm-concurrency1",
-                (
-                    "Model name 'strange/test_model' cannot be used to create "
-                    "artifact directory. Instead, 'strange_test_model' will be used"
-                ),
-            ),
-            (
-                [
-                    "--model",
-                    "hello/world/test_model",
-                    "--service-kind",
-                    "openai",
-                    "--endpoint-type",
-                    "chat",
-                ],
-                "artifacts/hello_world_test_model-openai-chat-concurrency1",
-                (
-                    "Model name 'hello/world/test_model' cannot be used to create "
-                    "artifact directory. Instead, 'hello_world_test_model' will be used"
-                ),
-            ),
-        ],
-    )
-    def test_model_name_artifact_path(
-        self, monkeypatch, arg, expected_path, expected_output, capsys
-    ):
-        logging.init_logging()
-        combined_args = ["genai-perf", "profile"] + arg
-        monkeypatch.setattr("sys.argv", combined_args)
-        args, _ = parser.parse_args()
-
-        assert args.artifact_dir == Path(expected_path)
-        captured = capsys.readouterr()
-        assert expected_output in captured.out
-
-    def test_default_load_level(self, monkeypatch, capsys):
-        logging.init_logging()
-        monkeypatch.setattr(
-            "sys.argv", ["genai-perf", "profile", "--model", "test_model"]
-        )
-        args, _ = parser.parse_args()
-        assert args.concurrency == 1
-        captured = capsys.readouterr()
-        assert captured.out == ""
-
-    def test_load_level_mutually_exclusive(self, monkeypatch, capsys):
-        monkeypatch.setattr(
-            "sys.argv",
-            ["genai-perf", "profile", "--concurrency", "3", "--request-rate", "9.0"],
-        )
-        expected_output = (
-            "argument --request-rate: not allowed with argument --concurrency"
-        )
-
-        with pytest.raises(SystemExit) as excinfo:
-            parser.parse_args()
-
-        assert excinfo.value.code != 0
-        captured = capsys.readouterr()
-        assert expected_output in captured.err
-
-    def test_model_not_provided(self, monkeypatch, capsys):
-        monkeypatch.setattr("sys.argv", ["genai-perf", "profile"])
-        expected_output = "The -m/--model option is required and cannot be empty."
-
-        with pytest.raises(SystemExit) as excinfo:
-            parser.parse_args()
-
-        assert excinfo.value.code != 0
-        captured = capsys.readouterr()
-        assert expected_output in captured.err
-
-    def test_pass_through_args(self, monkeypatch):
-        args = ["genai-perf", "profile", "-m", "test_model"]
-        other_args = ["--", "With", "great", "power"]
-        monkeypatch.setattr("sys.argv", args + other_args)
-        _, pass_through_args = parser.parse_args()
-
-        assert pass_through_args == other_args[1:]
-
-    def test_unrecognized_arg(self, monkeypatch, capsys):
-        monkeypatch.setattr(
-            "sys.argv",
-            [
-                "genai-perf",
-                "profile",
-                "-m",
-                "nonexistent_model",
-                "--wrong-arg",
-            ],
-        )
-        expected_output = "unrecognized arguments: --wrong-arg"
-
-        with pytest.raises(SystemExit) as excinfo:
-            parser.parse_args()
-
-        assert excinfo.value.code != 0
-        captured = capsys.readouterr()
-        assert expected_output in captured.err
-
-    @pytest.mark.parametrize(
-        "args, expected_output",
-        [
-            (
-                [
-                    "genai-perf",
-                    "profile",
-                    "-m",
-                    "test_model",
-                    "--service-kind",
-                    "openai",
-                ],
-                "The --endpoint-type option is required when using the 'openai' service-kind.",
-            ),
-            (
-                [
-                    "genai-perf",
-                    "profile",
-                    "-m",
-                    "test_model",
-                    "--service-kind",
-                    "openai",
-                    "--endpoint",
-                    "custom/address",
-                ],
-                "The --endpoint-type option is required when using the 'openai' service-kind.",
-            ),
-            (
-                [
-                    "genai-perf",
-                    "profile",
-                    "-m",
-                    "test_model",
-                    "--output-tokens-stddev",
-                    "5",
-                ],
-                "The --output-tokens-mean option is required when using --output-tokens-stddev.",
-            ),
-            (
-                [
-                    "genai-perf",
-                    "profile",
-                    "-m",
-                    "test_model",
-                    "--output-tokens-mean-deterministic",
-                ],
-                "The --output-tokens-mean option is required when using --output-tokens-mean-deterministic.",
-            ),
-            (
-                [
-                    "genai-perf",
-                    "profile",
-                    "-m",
-                    "test_model",
-                    "--output-tokens-mean-deterministic",
-                ],
-                "The --output-tokens-mean option is required when using --output-tokens-mean-deterministic.",
-            ),
-            (
-                [
-                    "genai-perf",
-                    "profile",
-                    "-m",
-                    "test_model",
-                    "--service-kind",
-                    "openai",
-                    "--endpoint-type",
-                    "chat",
-                    "--output-tokens-mean",
-                    "100",
-                    "--output-tokens-mean-deterministic",
-                ],
-                "The --output-tokens-mean-deterministic option is only supported with the Triton service-kind",
-            ),
-            (
-                [
-                    "genai-perf",
-                    "profile",
-                    "-m",
-                    "test_model",
-                    "--batch-size",
-                    "10",
-                ],
-                "The --batch-size option is currently only supported with the embeddings and rankings endpoint types",
-            ),
-            (
-                [
-                    "genai-perf",
-                    "profile",
-                    "-m",
-                    "test_model",
-                    "--service-kind",
-                    "openai",
-                    "--endpoint-type",
-                    "embeddings",
-                    "--streaming",
-                ],
-                "The --streaming option is not supported with the embeddings endpoint type",
-            ),
-            (
-                [
-                    "genai-perf",
-                    "profile",
-                    "-m",
-                    "test_model",
-                    "--service-kind",
-                    "openai",
-                    "--endpoint-type",
-                    "rankings",
-                    "--streaming",
-                ],
-                "The --streaming option is not supported with the rankings endpoint type",
-            ),
-            (
-                [
-                    "genai-perf",
-                    "profile",
-                    "-m",
-                    "test_model",
-                    "--service-kind",
-                    "openai",
-                    "--endpoint-type",
-                    "embeddings",
-                    "--generate-plots",
-                ],
-                "The --generate-plots option is not currently supported with the embeddings endpoint type",
-            ),
-            (
-                [
-                    "genai-perf",
-                    "profile",
-                    "-m",
-                    "test_model",
-                    "--service-kind",
-                    "openai",
-                    "--endpoint-type",
-                    "rankings",
-                    "--generate-plots",
-                ],
-                "The --generate-plots option is not currently supported with the rankings endpoint type",
-            ),
-        ],
-    )
-    def test_conditional_errors(self, args, expected_output, monkeypatch, capsys):
-        monkeypatch.setattr("sys.argv", args)
-
-        with pytest.raises(SystemExit) as excinfo:
-            parser.parse_args()
-
-        assert excinfo.value.code != 0
-        captured = capsys.readouterr()
-        assert expected_output in captured.err
-
-    @pytest.mark.parametrize(
-        "args, expected_format",
-        [
-            (
-                ["--service-kind", "openai", "--endpoint-type", "chat"],
-                OutputFormat.OPENAI_CHAT_COMPLETIONS,
-            ),
-            (
-                ["--service-kind", "openai", "--endpoint-type", "completions"],
-                OutputFormat.OPENAI_COMPLETIONS,
-            ),
-            (
-                [
-                    "--service-kind",
-                    "openai",
-                    "--endpoint-type",
-                    "completions",
-                    "--endpoint",
-                    "custom/address",
-                ],
-                OutputFormat.OPENAI_COMPLETIONS,
-            ),
-            (
-                ["--service-kind", "openai", "--endpoint-type", "rankings"],
-                OutputFormat.RANKINGS,
-            ),
-            (
-                ["--service-kind", "triton", "--backend", "tensorrtllm"],
-                OutputFormat.TENSORRTLLM,
-            ),
-            (["--service-kind", "triton", "--backend", "vllm"], OutputFormat.VLLM),
-        ],
-    )
-    def test_inferred_output_format(self, monkeypatch, args, expected_format):
-        monkeypatch.setattr(
-            "sys.argv", ["genai-perf", "profile", "-m", "test_model"] + args
-        )
-
-        parsed_args, _ = parser.parse_args()
-        assert parsed_args.output_format == expected_format
-
-    @pytest.mark.parametrize(
-        "args, expected_error",
-        [
-            (
-                ["--extra-inputs", "hi:"],
-                "Input name or value is empty in --extra-inputs: hi:\nExpected input format: 'input_name:value'",
-            ),
-            (
-                ["--extra-inputs", ":a"],
-                "Input name or value is empty in --extra-inputs: :a\nExpected input format: 'input_name:value'",
-            ),
-            (
-                ["--extra-inputs", ":a:"],
-                "Invalid input format for --extra-inputs: :a:\nExpected input format: 'input_name:value'",
-            ),
-            (
-                ["--extra-inputs", "unknown"],
-                "Invalid input format for --extra-inputs: unknown\nExpected input format: 'input_name:value'",
-            ),
-            (
-                ["--extra-inputs", "test_key:5", "--extra-inputs", "test_key:6"],
-                "Input name already exists in request_inputs dictionary: test_key",
-            ),
-        ],
-    )
-    def test_repeated_extra_arg_warning(self, monkeypatch, args, expected_error):
-        combined_args = ["genai-perf", "profile", "-m", "test_model"] + args
-        monkeypatch.setattr("sys.argv", combined_args)
-
-        parsed_args, _ = parser.parse_args()
-
-        with pytest.raises(ValueError) as exc_info:
-            _ = parser.get_extra_inputs_as_dict(parsed_args)
-
-        assert str(exc_info.value) == expected_error
-
-    @pytest.mark.parametrize(
-        "args, expected_prompt_source",
-        [
-            ([], PromptSource.SYNTHETIC),
-            (["--input-dataset", "openorca"], PromptSource.DATASET),
-            (["--input-file", "prompt.txt"], PromptSource.FILE),
-            (
-                ["--input-file", "prompt.txt", "--synthetic-input-tokens-mean", "10"],
-                PromptSource.FILE,
-            ),
-        ],
-    )
-    def test_inferred_prompt_source(
-        self, monkeypatch, mocker, args, expected_prompt_source
-    ):
-        _ = mocker.patch("builtins.open", mocker.mock_open(read_data="data"))
-        _ = mocker.patch("os.path.isfile", return_value=True)
-        _ = mocker.patch("os.path.isdir", return_value=True)
-        combined_args = ["genai-perf", "profile", "--model", "test_model"] + args
-        monkeypatch.setattr("sys.argv", combined_args)
-        args, _ = parser.parse_args()
-
-        assert args.prompt_source == expected_prompt_source
-
-    def test_prompt_source_assertions(self, monkeypatch, mocker, capsys):
-        _ = mocker.patch("builtins.open", mocker.mock_open(read_data="data"))
-        _ = mocker.patch("os.path.isfile", return_value=True)
-        _ = mocker.patch("os.path.isdir", return_value=True)
-        args = [
-            "genai-perf",
-            "profile",
-            "--model",
-            "test_model",
-            "--input-dataset",
-            "openorca",
-            "--input-file",
-            "prompt.txt",
-        ]
-        monkeypatch.setattr("sys.argv", args)
-
-        expected_output = (
-            "argument --input-file: not allowed with argument --input-dataset"
-        )
-
-        with pytest.raises(SystemExit) as excinfo:
-            parser.parse_args()
-
-        assert excinfo.value.code != 0
-        captured = capsys.readouterr()
-        assert expected_output in captured.err
-
-    @pytest.mark.parametrize(
-        "args",
-        [
-            # negative numbers
-            ["--image-width-mean", "-123"],
-            ["--image-width-stddev", "-34"],
-            ["--image-height-mean", "-123"],
-            ["--image-height-stddev", "-34"],
-            # zeros
-            ["--image-width-mean", "0"],
-            ["--image-height-mean", "0"],
-        ],
-    )
-    def test_positive_image_input_args(self, monkeypatch, args):
-        combined_args = ["genai-perf", "profile", "-m", "test_model"] + args
-        monkeypatch.setattr("sys.argv", combined_args)
-
-        with pytest.raises(SystemExit) as excinfo:
-            parser.parse_args()
-
-    # ================================================
-    # COMPARE SUBCOMMAND
-    # ================================================
-    expected_compare_help_output = (
-        "Subcommand to generate plots that compare multiple profile runs."
-    )
-
-    @pytest.mark.parametrize(
-        "args, expected_output",
-        [
-            (["-h"], expected_compare_help_output),
-            (["--help"], expected_compare_help_output),
-        ],
-    )
-    def test_compare_help_arguments_output_and_exit(
-        self, monkeypatch, args, expected_output, capsys
-    ):
-        logging.init_logging()
-        monkeypatch.setattr("sys.argv", ["genai-perf", "compare"] + args)
-
-        with pytest.raises(SystemExit) as excinfo:
-            _ = parser.parse_args()
-
-        # Check that the exit was successful
-        assert excinfo.value.code == 0
-
-        # Capture that the correct message was displayed
-        captured = capsys.readouterr()
-        assert expected_output in captured.out
-
-    def test_compare_mutually_exclusive(self, monkeypatch, capsys):
-        args = ["genai-perf", "compare", "--config", "hello", "--files", "a", "b", "c"]
-        monkeypatch.setattr("sys.argv", args)
-        expected_output = "argument -f/--files: not allowed with argument --config"
-
-        with pytest.raises(SystemExit) as excinfo:
-            parser.parse_args()
-
-        assert excinfo.value.code != 0
-        captured = capsys.readouterr()
-        assert expected_output in captured.err
-
-    def test_compare_not_provided(self, monkeypatch, capsys):
-        args = ["genai-perf", "compare"]
-        monkeypatch.setattr("sys.argv", args)
-        expected_output = "Either the --config or --files option must be specified."
-
-        with pytest.raises(SystemExit) as excinfo:
-            parser.parse_args()
-
-        assert excinfo.value.code != 0
-        captured = capsys.readouterr()
-        assert expected_output in captured.err
-
-    @pytest.mark.parametrize(
-        "extra_inputs_list, expected_dict",
-        [
-            (["test_key:test_value"], {"test_key": "test_value"}),
-            (
-                ["test_key:1", "another_test_key:2"],
-                {"test_key": 1, "another_test_key": 2},
-            ),
-            (
-                [
-                    '{"name": "Wolverine","hobbies": ["hacking", "slashing"],"address": {"street": "1407 Graymalkin Lane, Salem Center","city": "NY"}}'
-                ],
-                {
-                    "name": "Wolverine",
-                    "hobbies": ["hacking", "slashing"],
-                    "address": {
-                        "street": "1407 Graymalkin Lane, Salem Center",
-                        "city": "NY",
-                    },
-                },
-            ),
-        ],
-    )
-    def test_get_extra_inputs_as_dict(self, extra_inputs_list, expected_dict):
-        namespace = argparse.Namespace()
-        namespace.extra_inputs = extra_inputs_list
-        actual_dict = parser.get_extra_inputs_as_dict(namespace)
-        assert actual_dict == expected_dict
diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_console_exporter.py b/src/c++/perf_analyzer/genai-perf/tests/test_console_exporter.py
deleted file mode 100644
index dda62e04a..000000000
--- a/src/c++/perf_analyzer/genai-perf/tests/test_console_exporter.py
+++ /dev/null
@@ -1,176 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from genai_perf import parser
-from genai_perf.export_data.console_exporter import ConsoleExporter
-from genai_perf.export_data.exporter_config import ExporterConfig
-from genai_perf.metrics import LLMMetrics, Metrics, Statistics
-
-
-class TestConsoleExporter:
-
-    def test_streaming_llm_output(self, monkeypatch, capsys) -> None:
-        argv = [
-            "genai-perf",
-            "profile",
-            "-m",
-            "model_name",
-            "--service-kind",
-            "openai",
-            "--endpoint-type",
-            "chat",
-            "--streaming",
-        ]
-        monkeypatch.setattr("sys.argv", argv)
-        args, _ = parser.parse_args()
-
-        metrics = LLMMetrics(
-            request_throughputs=[123],
-            request_latencies=[4, 5, 6],
-            time_to_first_tokens=[7, 8, 9],
-            inter_token_latencies=[10, 11, 12],
-            output_token_throughputs=[456],
-            output_sequence_lengths=[1, 2, 3],
-            input_sequence_lengths=[5, 6, 7],
-        )
-        stats = Statistics(metrics=metrics)
-
-        config = ExporterConfig()
-        config.stats = stats.stats_dict
-        config.metrics = stats.metrics
-        config.args = args
-
-        exporter = ConsoleExporter(config)
-        exporter.export()
-
-        expected_content = (
-            "                                LLM Metrics                                 \n"
-            "┏━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┓\n"
-            "┃                Statistic ┃   avg ┃   min ┃   max ┃   p99 ┃   p90 ┃   p75 ┃\n"
-            "┡━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━┩\n"
-            "│ Time to first token (ms) │  8.00 │  7.00 │  9.00 │  8.98 │  8.80 │  8.50 │\n"
-            "│ Inter token latency (ms) │ 11.00 │ 10.00 │ 12.00 │ 11.98 │ 11.80 │ 11.50 │\n"
-            "│     Request latency (ms) │  5.00 │  4.00 │  6.00 │  5.98 │  5.80 │  5.50 │\n"
-            "│   Output sequence length │  2.00 │  1.00 │  3.00 │  2.98 │  2.80 │  2.50 │\n"
-            "│    Input sequence length │  6.00 │  5.00 │  7.00 │  6.98 │  6.80 │  6.50 │\n"
-            "└──────────────────────────┴───────┴───────┴───────┴───────┴───────┴───────┘\n"
-            "Output token throughput (per sec): 456.00\n"
-            "Request throughput (per sec): 123.00\n"
-        )
-
-        returned_data = capsys.readouterr().out
-        assert returned_data == expected_content
-
-    def test_nonstreaming_llm_output(self, monkeypatch, capsys) -> None:
-        argv = [
-            "genai-perf",
-            "profile",
-            "-m",
-            "model_name",
-            "--service-kind",
-            "openai",
-            "--endpoint-type",
-            "chat",
-        ]
-        monkeypatch.setattr("sys.argv", argv)
-        args, _ = parser.parse_args()
-
-        metrics = LLMMetrics(
-            request_throughputs=[123],
-            request_latencies=[4, 5, 6],
-            time_to_first_tokens=[4, 5, 6],  # same as request_latency
-            inter_token_latencies=[],  # no ITL
-            output_token_throughputs=[456],
-            output_sequence_lengths=[1, 2, 3],
-            input_sequence_lengths=[5, 6, 7],
-        )
-        stats = Statistics(metrics=metrics)
-
-        config = ExporterConfig()
-        config.stats = stats.stats_dict
-        config.metrics = stats.metrics
-        config.args = args
-
-        exporter = ConsoleExporter(config)
-        exporter.export()
-
-        # No TTFT and ITL in the output
-        expected_content = (
-            "                            LLM Metrics                             \n"
-            "┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━┓\n"
-            "┃              Statistic ┃  avg ┃  min ┃  max ┃  p99 ┃  p90 ┃  p75 ┃\n"
-            "┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━┩\n"
-            "│   Request latency (ms) │ 5.00 │ 4.00 │ 6.00 │ 5.98 │ 5.80 │ 5.50 │\n"
-            "│ Output sequence length │ 2.00 │ 1.00 │ 3.00 │ 2.98 │ 2.80 │ 2.50 │\n"
-            "│  Input sequence length │ 6.00 │ 5.00 │ 7.00 │ 6.98 │ 6.80 │ 6.50 │\n"
-            "└────────────────────────┴──────┴──────┴──────┴──────┴──────┴──────┘\n"
-            "Output token throughput (per sec): 456.00\n"
-            "Request throughput (per sec): 123.00\n"
-        )
-
-        returned_data = capsys.readouterr().out
-        assert returned_data == expected_content
-
-    def test_embedding_output(self, monkeypatch, capsys) -> None:
-        argv = [
-            "genai-perf",
-            "profile",
-            "-m",
-            "model_name",
-            "--service-kind",
-            "openai",
-            "--endpoint-type",
-            "embeddings",
-        ]
-        monkeypatch.setattr("sys.argv", argv)
-        args, _ = parser.parse_args()
-
-        metrics = Metrics(
-            request_throughputs=[123],
-            request_latencies=[4, 5, 6],
-        )
-        stats = Statistics(metrics=metrics)
-
-        config = ExporterConfig()
-        config.stats = stats.stats_dict
-        config.metrics = stats.metrics
-        config.args = args
-
-        exporter = ConsoleExporter(config)
-        exporter.export()
-
-        expected_content = (
-            "                        Embeddings Metrics                        \n"
-            "┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━┓\n"
-            "┃            Statistic ┃  avg ┃  min ┃  max ┃  p99 ┃  p90 ┃  p75 ┃\n"
-            "┡━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━┩\n"
-            "│ Request latency (ms) │ 5.00 │ 4.00 │ 6.00 │ 5.98 │ 5.80 │ 5.50 │\n"
-            "└──────────────────────┴──────┴──────┴──────┴──────┴──────┴──────┘\n"
-            "Request throughput (per sec): 123.00\n"
-        )
-
-        returned_data = capsys.readouterr().out
-        assert returned_data == expected_content
diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_csv_exporter.py b/src/c++/perf_analyzer/genai-perf/tests/test_csv_exporter.py
deleted file mode 100644
index 6a60bc2dc..000000000
--- a/src/c++/perf_analyzer/genai-perf/tests/test_csv_exporter.py
+++ /dev/null
@@ -1,213 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from io import StringIO
-from pathlib import Path
-from typing import Any, List
-
-import pytest
-from genai_perf import parser
-from genai_perf.export_data.csv_exporter import CsvExporter
-from genai_perf.export_data.exporter_config import ExporterConfig
-from genai_perf.metrics import LLMMetrics, Metrics, Statistics
-
-
-class TestCsvExporter:
-    @pytest.fixture
-    def mock_read_write(self, monkeypatch: pytest.MonkeyPatch) -> List[str]:
-        """
-        This function will mock the open function for specific files.
-        """
-
-        written_data = []
-
-        original_open = open
-
-        def custom_open(filename, *args, **kwargs):
-            def write(self: Any, content: str) -> int:
-                written_data.append(content)
-                return len(content)
-
-            if str(filename) == "profile_export_genai_perf.csv":
-                tmp_file = StringIO()
-                tmp_file.write = write.__get__(tmp_file)
-                return tmp_file
-            else:
-                return original_open(filename, *args, **kwargs)
-
-        monkeypatch.setattr("builtins.open", custom_open)
-
-        return written_data
-
-    def test_streaming_llm_csv_output(
-        self, monkeypatch, mock_read_write: pytest.MonkeyPatch
-    ) -> None:
-        """
-        Collect LLM metrics from profile export data and confirm correct values are
-        printed in csv.
-        """
-        argv = [
-            "genai-perf",
-            "profile",
-            "-m",
-            "model_name",
-            "--service-kind",
-            "openai",
-            "--endpoint-type",
-            "chat",
-            "--streaming",
-        ]
-        monkeypatch.setattr("sys.argv", argv)
-        args, _ = parser.parse_args()
-
-        metrics = LLMMetrics(
-            request_throughputs=[123],
-            request_latencies=[4, 5, 6],
-            time_to_first_tokens=[7, 8, 9],
-            inter_token_latencies=[10, 11, 12],
-            output_token_throughputs=[456],
-            output_sequence_lengths=[1, 2, 3],
-            input_sequence_lengths=[5, 6, 7],
-        )
-        stats = Statistics(metrics=metrics)
-
-        config = ExporterConfig()
-        config.stats = stats.stats_dict
-        config.metrics = stats.metrics
-        config.artifact_dir = Path(".")
-        config.args = args
-
-        exporter = CsvExporter(config)
-        exporter.export()
-
-        expected_content = [
-            "Metric,avg,min,max,p99,p95,p90,p75,p50,p25\r\n",
-            "Time To First Token (ms),8.00,7.00,9.00,8.98,8.90,8.80,8.50,8.00,7.50\r\n",
-            "Inter Token Latency (ms),11.00,10.00,12.00,11.98,11.90,11.80,11.50,11.00,10.50\r\n",
-            "Request Latency (ms),5.00,4.00,6.00,5.98,5.90,5.80,5.50,5.00,4.50\r\n",
-            "Output Sequence Length,2.00,1.00,3.00,2.98,2.90,2.80,2.50,2.00,1.50\r\n",
-            "Input Sequence Length,6.00,5.00,7.00,6.98,6.90,6.80,6.50,6.00,5.50\r\n",
-            "\r\n",
-            "Metric,Value\r\n",
-            "Output Token Throughput (per sec),456.00\r\n",
-            "Request Throughput (per sec),123.00\r\n",
-        ]
-        returned_data = mock_read_write
-        assert returned_data == expected_content
-
-    def test_nonstreaming_llm_csv_output(
-        self, monkeypatch, mock_read_write: pytest.MonkeyPatch
-    ) -> None:
-        """
-        Collect LLM metrics from profile export data and confirm correct values are
-        printed in csv.
-        """
-        argv = [
-            "genai-perf",
-            "profile",
-            "-m",
-            "model_name",
-            "--service-kind",
-            "openai",
-            "--endpoint-type",
-            "chat",
-        ]
-        monkeypatch.setattr("sys.argv", argv)
-        args, _ = parser.parse_args()
-
-        metrics = LLMMetrics(
-            request_throughputs=[123],
-            request_latencies=[4, 5, 6],
-            time_to_first_tokens=[4, 5, 6],  # same as request_latency
-            inter_token_latencies=[],  # no ITL
-            output_token_throughputs=[456],
-            output_sequence_lengths=[1, 2, 3],
-            input_sequence_lengths=[5, 6, 7],
-        )
-        stats = Statistics(metrics=metrics)
-
-        config = ExporterConfig()
-        config.stats = stats.stats_dict
-        config.metrics = stats.metrics
-        config.artifact_dir = Path(".")
-        config.args = args
-
-        exporter = CsvExporter(config)
-        exporter.export()
-
-        expected_content = [
-            "Metric,avg,min,max,p99,p95,p90,p75,p50,p25\r\n",
-            "Request Latency (ms),5.00,4.00,6.00,5.98,5.90,5.80,5.50,5.00,4.50\r\n",
-            "Output Sequence Length,2.00,1.00,3.00,2.98,2.90,2.80,2.50,2.00,1.50\r\n",
-            "Input Sequence Length,6.00,5.00,7.00,6.98,6.90,6.80,6.50,6.00,5.50\r\n",
-            "\r\n",
-            "Metric,Value\r\n",
-            "Output Token Throughput (per sec),456.00\r\n",
-            "Request Throughput (per sec),123.00\r\n",
-        ]
-        returned_data = mock_read_write
-        assert returned_data == expected_content
-
-    def test_embedding_csv_output(
-        self, monkeypatch, mock_read_write: pytest.MonkeyPatch
-    ) -> None:
-        argv = [
-            "genai-perf",
-            "profile",
-            "-m",
-            "model_name",
-            "--service-kind",
-            "openai",
-            "--endpoint-type",
-            "embeddings",
-        ]
-        monkeypatch.setattr("sys.argv", argv)
-        args, _ = parser.parse_args()
-
-        metrics = Metrics(
-            request_throughputs=[123],
-            request_latencies=[4, 5, 6],
-        )
-        stats = Statistics(metrics=metrics)
-
-        config = ExporterConfig()
-        config.stats = stats.stats_dict
-        config.metrics = stats.metrics
-        config.artifact_dir = Path(".")
-        config.args = args
-
-        exporter = CsvExporter(config)
-        exporter.export()
-
-        expected_content = [
-            "Metric,avg,min,max,p99,p95,p90,p75,p50,p25\r\n",
-            "Request Latency (ms),5.00,4.00,6.00,5.98,5.90,5.80,5.50,5.00,4.50\r\n",
-            "\r\n",
-            "Metric,Value\r\n",
-            "Request Throughput (per sec),123.00\r\n",
-        ]
-        returned_data = mock_read_write
-        assert returned_data == expected_content
diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_data_exporter_factory.py b/src/c++/perf_analyzer/genai-perf/tests/test_data_exporter_factory.py
deleted file mode 100644
index 1a1628ac7..000000000
--- a/src/c++/perf_analyzer/genai-perf/tests/test_data_exporter_factory.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-from argparse import Namespace
-
-import genai_perf.export_data.data_exporter_factory as factory
-from genai_perf.export_data.console_exporter import ConsoleExporter
-from genai_perf.export_data.csv_exporter import CsvExporter
-from genai_perf.export_data.exporter_config import ExporterConfig
-from genai_perf.export_data.json_exporter import JsonExporter
-from genai_perf.parser import get_extra_inputs_as_dict
-
-
-class TestOutputReporter:
-    stats = {
-        "request_latency": {
-            "unit": "ms",
-            "avg": 1,
-            "p99": 2,
-            "p95": 3,
-            "p90": 4,
-            "p75": 5,
-            "p50": 6,
-            "p25": 7,
-            "max": 8,
-            "min": 9,
-            "std": 0,
-        },
-    }
-    args = {
-        "model": ["gpt2_vllm"],
-        "formatted_model_name": "gpt2_vllm",
-        "model_selection_strategy": "round_robin",
-        "func": "Should_be_removed",
-        "output_format": "Should_be_removed",
-        "profile_export_file": ".",
-        "artifact_dir": ".",
-        "extra_inputs": ["max_tokens:200"],
-    }
-    args_namespace = Namespace(**args)
-
-    config = ExporterConfig()
-    config.stats = stats
-    config.args = args_namespace
-    config.artifact_dir = args_namespace.artifact_dir
-    config.extra_inputs = get_extra_inputs_as_dict(args_namespace)
-    f = factory.DataExporterFactory()
-
-    def test_return_json_exporter(self) -> None:
-        exporter_list = self.f.create_data_exporters(self.config)
-        assert any(isinstance(exporter, JsonExporter) for exporter in exporter_list)
-
-    def test_return_csv_exporter(self) -> None:
-        exporter_list = self.f.create_data_exporters(self.config)
-        assert any(isinstance(exporter, CsvExporter) for exporter in exporter_list)
-
-    def test_return_console_exporter(self) -> None:
-        exporter_list = self.f.create_data_exporters(self.config)
-        assert any(isinstance(exporter, ConsoleExporter) for exporter in exporter_list)
diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_json_exporter.py b/src/c++/perf_analyzer/genai-perf/tests/test_json_exporter.py
deleted file mode 100644
index f82e59312..000000000
--- a/src/c++/perf_analyzer/genai-perf/tests/test_json_exporter.py
+++ /dev/null
@@ -1,274 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import json
-
-import genai_perf.parser as parser
-from genai_perf.export_data.exporter_config import ExporterConfig
-from genai_perf.export_data.json_exporter import JsonExporter
-
-
-class TestJsonExporter:
-    def test_generate_json(self, monkeypatch) -> None:
-        cli_cmd = [
-            "genai-perf",
-            "profile",
-            "-m",
-            "gpt2_vllm",
-            "--backend",
-            "vllm",
-            "--streaming",
-            "--extra-inputs",
-            "max_tokens:256",
-            "--extra-inputs",
-            "ignore_eos:true",
-        ]
-        monkeypatch.setattr("sys.argv", cli_cmd)
-        args, _ = parser.parse_args()
-        config = ExporterConfig()
-        config.stats = self.stats
-        config.args = args
-        config.extra_inputs = parser.get_extra_inputs_as_dict(args)
-        config.artifact_dir = args.artifact_dir
-        json_exporter = JsonExporter(config)
-        assert json_exporter._stats_and_args == json.loads(self.expected_json_output)
-
-    stats = {
-        "request_throughput": {"unit": "requests/sec", "avg": "7"},
-        "request_latency": {
-            "unit": "ms",
-            "avg": 1,
-            "p99": 2,
-            "p95": 3,
-            "p90": 4,
-            "p75": 5,
-            "p50": 6,
-            "p25": 7,
-            "max": 8,
-            "min": 9,
-            "std": 0,
-        },
-        "time_to_first_token": {
-            "unit": "ms",
-            "avg": 11,
-            "p99": 12,
-            "p95": 13,
-            "p90": 14,
-            "p75": 15,
-            "p50": 16,
-            "p25": 17,
-            "max": 18,
-            "min": 19,
-            "std": 10,
-        },
-        "inter_token_latency": {
-            "unit": "ms",
-            "avg": 21,
-            "p99": 22,
-            "p95": 23,
-            "p90": 24,
-            "p75": 25,
-            "p50": 26,
-            "p25": 27,
-            "max": 28,
-            "min": 29,
-            "std": 20,
-        },
-        "output_token_throughput": {
-            "unit": "tokens/sec",
-            "avg": 31,
-        },
-        "output_token_throughput_per_request": {
-            "unit": "tokens/sec",
-            "avg": 41,
-            "p99": 42,
-            "p95": 43,
-            "p90": 44,
-            "p75": 45,
-            "p50": 46,
-            "p25": 47,
-            "max": 48,
-            "min": 49,
-            "std": 40,
-        },
-        "output_sequence_length": {
-            "unit": "tokens",
-            "avg": 51,
-            "p99": 52,
-            "p95": 53,
-            "p90": 54,
-            "p75": 55,
-            "p50": 56,
-            "p25": 57,
-            "max": 58,
-            "min": 59,
-            "std": 50,
-        },
-        "input_sequence_length": {
-            "unit": "tokens",
-            "avg": 61,
-            "p99": 62,
-            "p95": 63,
-            "p90": 64,
-            "p75": 65,
-            "p50": 66,
-            "p25": 67,
-            "max": 68,
-            "min": 69,
-            "std": 60,
-        },
-    }
-
-    expected_json_output = """
-      {
-        "request_throughput": {
-          "unit": "requests/sec",
-          "avg": "7"
-          },
-          "request_latency": {
-              "unit": "ms",
-              "avg": 1,
-              "p99": 2,
-              "p95": 3,
-              "p90": 4,
-              "p75": 5,
-              "p50": 6,
-              "p25": 7,
-              "max": 8,
-              "min": 9,
-              "std": 0
-          },
-          "time_to_first_token": {
-              "unit": "ms",
-              "avg": 11,
-              "p99": 12,
-              "p95": 13,
-              "p90": 14,
-              "p75": 15,
-              "p50": 16,
-              "p25": 17,
-              "max": 18,
-              "min": 19,
-              "std": 10
-          },
-          "inter_token_latency": {
-              "unit": "ms",
-              "avg": 21,
-              "p99": 22,
-              "p95": 23,
-              "p90": 24,
-              "p75": 25,
-              "p50": 26,
-              "p25": 27,
-              "max": 28,
-              "min": 29,
-              "std": 20
-          },
-          "output_token_throughput": {
-              "unit": "tokens/sec",
-              "avg": 31
-          },
-          "output_token_throughput_per_request": {
-              "unit": "tokens/sec",
-              "avg": 41,
-              "p99": 42,
-              "p95": 43,
-              "p90": 44,
-              "p75": 45,
-              "p50": 46,
-              "p25": 47,
-              "max": 48,
-              "min": 49,
-              "std": 40
-          },
-          "output_sequence_length": {
-              "unit": "tokens",
-              "avg": 51,
-              "p99": 52,
-              "p95": 53,
-              "p90": 54,
-              "p75": 55,
-              "p50": 56,
-              "p25": 57,
-              "max": 58,
-              "min": 59,
-              "std": 50
-          },
-          "input_sequence_length": {
-              "unit": "tokens",
-              "avg": 61,
-              "p99": 62,
-              "p95": 63,
-              "p90": 64,
-              "p75": 65,
-              "p50": 66,
-              "p25": 67,
-              "max": 68,
-              "min": 69,
-              "std": 60
-          },
-        "input_config": {
-          "model": ["gpt2_vllm"],
-          "formatted_model_name": "gpt2_vllm",
-          "model_selection_strategy": "round_robin",
-          "backend": "vllm",
-          "batch_size": 1,
-          "endpoint": null,
-          "endpoint_type": null,
-          "service_kind": "triton",
-          "streaming": true,
-          "u": null,
-          "input_dataset": null,
-          "num_prompts": 100,
-          "output_tokens_mean": -1,
-          "output_tokens_mean_deterministic": false,
-          "output_tokens_stddev": 0,
-          "random_seed": 0,
-          "synthetic_input_tokens_mean": 550,
-          "synthetic_input_tokens_stddev": 0,
-          "image_width_mean": 100,
-          "image_width_stddev": 0,
-          "image_height_mean": 100,
-          "image_height_stddev": 0,
-          "image_format": null,
-          "concurrency": 1,
-          "measurement_interval": 10000,
-          "request_rate": null,
-          "stability_percentage": 999,
-          "generate_plots": false,
-          "profile_export_file": "artifacts/gpt2_vllm-triton-vllm-concurrency1/profile_export.json",
-          "artifact_dir": "artifacts/gpt2_vllm-triton-vllm-concurrency1",
-          "tokenizer": "hf-internal-testing/llama-tokenizer",
-          "verbose": false,
-          "subcommand": "profile",
-          "prompt_source": "synthetic",
-          "extra_inputs": {
-            "max_tokens": 256,
-            "ignore_eos": true
-          }
-        }
-      }
-    """
diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_library.py b/src/c++/perf_analyzer/genai-perf/tests/test_library.py
deleted file mode 100644
index 09cd13d45..000000000
--- a/src/c++/perf_analyzer/genai-perf/tests/test_library.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import genai_perf
-
-
-# Placeholder to add real tests in the future
-def test_version():
-    print(genai_perf.__version__)
diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py b/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py
deleted file mode 100644
index 028e72849..000000000
--- a/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py
+++ /dev/null
@@ -1,882 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import os
-import random
-import statistics
-from collections import namedtuple
-from pathlib import Path
-from unittest.mock import mock_open, patch
-
-import pytest
-import responses
-from genai_perf import tokenizer
-from genai_perf.constants import CNN_DAILY_MAIL, DEFAULT_INPUT_DATA_JSON, OPEN_ORCA
-from genai_perf.exceptions import GenAIPerfException
-from genai_perf.llm_inputs.llm_inputs import (
-    LlmInputs,
-    ModelSelectionStrategy,
-    OutputFormat,
-    PromptSource,
-)
-from genai_perf.llm_inputs.synthetic_image_generator import ImageFormat
-from genai_perf.tokenizer import DEFAULT_TOKENIZER, get_tokenizer
-from PIL import Image
-
-mocked_openorca_data = {
-    "features": [
-        {"feature_idx": 0, "name": "id", "type": {"dtype": "string", "_type": "Value"}},
-        {
-            "feature_idx": 1,
-            "name": "system_prompt",
-            "type": {"dtype": "string", "_type": "Value"},
-        },
-        {
-            "feature_idx": 2,
-            "name": "question",
-            "type": {"dtype": "string", "_type": "Value"},
-        },
-        {
-            "feature_idx": 3,
-            "name": "response",
-            "type": {"dtype": "string", "_type": "Value"},
-        },
-    ],
-    "rows": [
-        {
-            "row_idx": 0,
-            "row": {
-                "id": "niv.242684",
-                "system_prompt": "",
-                "question": "You will be given a definition of a task first, then some input of the task.\\nThis task is about using the specified sentence and converting the sentence to Resource Description Framework (RDF) triplets of the form (subject, predicate object). The RDF triplets generated must be such that the triplets accurately capture the structure and semantics of the input sentence. The input is a sentence and the output is a list of triplets of the form [subject, predicate, object] that capture the relationships present in the sentence. When a sentence has more than 1 RDF triplet possible, the output must contain all of them.\\n\\nAFC Ajax (amateurs)'s ground is Sportpark De Toekomst where Ajax Youth Academy also play.\\nOutput:",
-                "response": '[\\n  ["AFC Ajax (amateurs)", "has ground", "Sportpark De Toekomst"],\\n  ["Ajax Youth Academy", "plays at", "Sportpark De Toekomst"]\\n]',
-            },
-            "truncated_cells": [],
-        }
-    ],
-    "num_rows_total": 2914896,
-    "num_rows_per_page": 100,
-    "partial": True,
-}
-
-TEST_LENGTH = 1
-
-
-class TestLlmInputs:
-    # Define service kind, backend or api, and output format combinations
-    SERVICE_KIND_BACKEND_ENDPOINT_TYPE_FORMATS = [
-        ("triton", "vllm", OutputFormat.VLLM),
-        ("triton", "tensorrtllm", OutputFormat.TENSORRTLLM),
-        ("openai", "v1/completions", OutputFormat.OPENAI_COMPLETIONS),
-        ("openai", "v1/chat/completions", OutputFormat.OPENAI_CHAT_COMPLETIONS),
-        ("openai", "v1/chat/completions", OutputFormat.OPENAI_VISION),
-    ]
-
-    @pytest.fixture
-    def default_configured_url(self):
-        default_configured_url = LlmInputs._create_configured_url(
-            LlmInputs.OPEN_ORCA_URL,
-            LlmInputs.DEFAULT_STARTING_INDEX,
-            LlmInputs.DEFAULT_LENGTH,
-        )
-
-        yield default_configured_url
-
-    # TODO (TMA-1754): Add tests that verify json schemas
-    @pytest.fixture(scope="class")
-    def default_tokenizer(self):
-        yield tokenizer.get_tokenizer(tokenizer.DEFAULT_TOKENIZER)
-
-    def test_input_type_url_no_dataset_name(self):
-        """
-        Test for exception when input type is URL and no dataset name
-        """
-        with pytest.raises(GenAIPerfException):
-            _ = LlmInputs._check_for_dataset_name_if_input_type_is_url(
-                input_type=PromptSource.DATASET, dataset_name=""
-            )
-
-    def test_input_type_synthetic_no_tokenizer(self):
-        """
-        Test for exception when input type is SYNTHETIC and no tokenizer
-        """
-        with pytest.raises(GenAIPerfException):
-            _ = LlmInputs._check_for_tokenzier_if_input_type_is_synthetic(
-                input_type=PromptSource.SYNTHETIC, tokenizer=None  # type: ignore
-            )
-
-    def test_illegal_starting_index(self):
-        """
-        Test for exceptions when illegal values are given for starting index
-        """
-        with pytest.raises(GenAIPerfException):
-            _ = LlmInputs._check_for_valid_starting_index(starting_index="foo")  # type: ignore
-
-        with pytest.raises(GenAIPerfException):
-            _ = LlmInputs._check_for_valid_starting_index(starting_index=-1)
-
-    def test_illegal_length(self):
-        """
-        Test for exceptions when illegal values are given for length
-        """
-        with pytest.raises(GenAIPerfException):
-            _ = LlmInputs._check_for_valid_length(length="foo")  # type: ignore
-
-        with pytest.raises(GenAIPerfException):
-            _ = LlmInputs._check_for_valid_length(length=0)
-
-    def test_create_configured_url(self):
-        """
-        Test that we are appending and configuring the URL correctly
-        """
-        expected_configured_url = (
-            "http://test-url.com"
-            + f"&offset={LlmInputs.DEFAULT_STARTING_INDEX}"
-            + f"&length={LlmInputs.DEFAULT_LENGTH}"
-        )
-        configured_url = LlmInputs._create_configured_url(
-            "http://test-url.com",
-            LlmInputs.DEFAULT_STARTING_INDEX,
-            LlmInputs.DEFAULT_LENGTH,
-        )
-
-        assert configured_url == expected_configured_url
-
-    def test_download_dataset_illegal_url(self):
-        """
-        Test for exception when URL is bad
-        """
-        with pytest.raises(GenAIPerfException):
-            _ = LlmInputs._download_dataset(
-                "https://bad-url.zzz",
-            )
-
-    def test_llm_inputs_error_in_server_response(self):
-        """
-        Test for exception when length is out of range
-        """
-        with pytest.raises(GenAIPerfException):
-            _ = LlmInputs.create_llm_inputs(
-                input_type=PromptSource.DATASET,
-                dataset_name=OPEN_ORCA,
-                output_format=OutputFormat.OPENAI_CHAT_COMPLETIONS,
-                starting_index=LlmInputs.DEFAULT_STARTING_INDEX,
-                length=int(LlmInputs.DEFAULT_LENGTH * 100),
-            )
-
-    @responses.activate
-    def test_llm_inputs_with_defaults(self, default_configured_url):
-        """
-        Test that default options work
-        """
-        responses.add(
-            responses.GET,
-            f"{default_configured_url}",
-            json=mocked_openorca_data,
-            status=200,
-        )
-
-        dataset = LlmInputs._download_dataset(
-            default_configured_url,
-        )
-        dataset_json = LlmInputs._convert_input_url_dataset_to_generic_json(
-            dataset=dataset
-        )
-
-        assert dataset_json is not None
-        assert len(dataset_json["rows"]) == TEST_LENGTH
-
-    # TODO (TPA-114) Refactor LLM inputs and testing
-    # def test_llm_inputs_with_non_default_length(self):
-    #     """
-    #     Test that non-default length works
-    #     """
-    #     configured_url = LlmInputs._create_configured_url(
-    #         LlmInputs.OPEN_ORCA_URL,
-    #         LlmInputs.DEFAULT_STARTING_INDEX,
-    #         (int(LlmInputs.DEFAULT_LENGTH / 2)),
-    #     )
-    #     dataset = LlmInputs._download_dataset(
-    #         configured_url,
-    #     )
-    #     dataset_json = LlmInputs._convert_input_url_dataset_to_generic_json(
-    #         dataset=dataset
-    #     )
-
-    #     assert dataset_json is not None
-    #     assert len(dataset_json["rows"]) == LlmInputs.DEFAULT_LENGTH / 2
-
-    # def test_convert_default_json_to_pa_format(self, default_configured_url):
-    #     """
-    #     Test that conversion to PA JSON format is correct
-    #     """
-    #     dataset = LlmInputs._download_dataset(
-    #         default_configured_url,
-    #     )
-    #     dataset_json = LlmInputs._convert_input_url_dataset_to_generic_json(
-    #         dataset=dataset
-    #     )
-    #     pa_json = LlmInputs._convert_generic_json_to_output_format(
-    #         output_format=OutputFormat.OPENAI_CHAT_COMPLETIONS,
-    #         generic_dataset=dataset_json,
-    #         add_model_name=False,
-    #         add_stream=False,
-    #         extra_inputs={},
-    #         output_tokens_mean=LlmInputs.DEFAULT_OUTPUT_TOKENS_MEAN,
-    #         output_tokens_stddev=LlmInputs.DEFAULT_OUTPUT_TOKENS_STDDEV,
-    #         output_tokens_deterministic=False,
-    #         model_name=["test_model_A"],
-    #     )
-
-    #     assert pa_json is not None
-    #     assert len(pa_json["data"]) == LlmInputs.DEFAULT_LENGTH
-
-    # def test_create_openai_llm_inputs_cnn_dailymail(self):
-    #     """
-    #     Test CNN_DAILYMAIL can be accessed
-    #     """
-    #     pa_json = LlmInputs.create_llm_inputs(
-    #         input_type=PromptSource.DATASET,
-    #         dataset_name=CNN_DAILY_MAIL,
-    #         output_format=OutputFormat.OPENAI_CHAT_COMPLETIONS,
-    #         model_name=["test_model_A"],
-    #     )
-
-    #     os.remove(DEFAULT_INPUT_DATA_JSON)
-
-    #     assert pa_json is not None
-    #     assert len(pa_json["data"]) == LlmInputs.DEFAULT_LENGTH
-
-    # def test_write_to_file(self):
-    #     """
-    #     Test that write to file is working correctly
-    #     """
-    #     pa_json = LlmInputs.create_llm_inputs(
-    #         input_type=PromptSource.DATASET,
-    #         dataset_name=OPEN_ORCA,
-    #         output_format=OutputFormat.OPENAI_CHAT_COMPLETIONS,
-    #         model_name="open_orca",
-    #         add_model_name=True,
-    #         add_stream=True,
-    #     )
-    #     try:
-    #         with open(DEFAULT_INPUT_DATA_JSON, "r") as f:
-    #             json_str = f.read()
-    #     finally:
-    #         os.remove(DEFAULT_INPUT_DATA_JSON)
-
-    #     assert pa_json == json.loads(json_str)
-
-    # def test_create_openai_to_vllm(self):
-    #     """
-    #     Test conversion of openai to vllm
-    #     """
-    #     pa_json = LlmInputs.create_llm_inputs(
-    #         input_type=PromptSource.DATASET,
-    #         output_format=OutputFormat.VLLM,
-    #         dataset_name=OPEN_ORCA,
-    #         add_model_name=False,
-    #         add_stream=True,
-    #         model_name=["test_model_A"],
-    #     )
-
-    #     os.remove(DEFAULT_INPUT_DATA_JSON)
-
-    #     assert pa_json is not None
-    #     assert len(pa_json["data"]) == LlmInputs.DEFAULT_LENGTH
-
-    # def test_create_openai_to_completions(self):
-    #     """
-    #     Test conversion of openai to completions
-    #     """
-    #     pa_json = LlmInputs.create_llm_inputs(
-    #         input_type=PromptSource.DATASET,
-    #         output_format=OutputFormat.OPENAI_COMPLETIONS,
-    #         dataset_name=OPEN_ORCA,
-    #         add_model_name=False,
-    #         add_stream=True,
-    #         model_name=["test_model_A"],
-    #     )
-
-    #     os.remove(DEFAULT_INPUT_DATA_JSON)
-
-    #     assert pa_json is not None
-    #     assert len(pa_json["data"]) == LlmInputs.DEFAULT_LENGTH
-    #     # NIM legacy completion endpoint only supports string and not
-    #     # array of strings. Verify that the prompt is of type string
-    #     # not list
-    #     assert isinstance(pa_json["data"][0]["payload"][0]["prompt"], str)
-
-    # def test_create_openai_to_trtllm(self):
-    #     """
-    #     Test conversion of openai to trtllm
-    #     """
-    #     pa_json = LlmInputs.create_llm_inputs(
-    #         input_type=PromptSource.DATASET,
-    #         output_format=OutputFormat.TENSORRTLLM,
-    #         dataset_name=OPEN_ORCA,
-    #         add_model_name=False,
-    #         add_stream=True,
-    #         model_name=["test_model_A"],
-    #     )
-
-    #     os.remove(DEFAULT_INPUT_DATA_JSON)
-
-    #     assert pa_json is not None
-    #     assert len(pa_json["data"]) == LlmInputs.DEFAULT_LENGTH
-
-    # def test_random_synthetic_no_stddev(self, default_tokenizer):
-    #     """
-    #     Test that we can produce an exact number of random synthetic tokens
-    #     """
-    #     random.seed(1)
-
-    #     def _subtest(token_length):
-    #         synthetic_prompt = LlmInputs._create_synthetic_prompt(
-    #             tokenizer=default_tokenizer,
-    #             prompt_tokens_mean=token_length,
-    #             prompt_tokens_stddev=0,
-    #         )
-
-    #         actual_token_length = len(default_tokenizer.encode(synthetic_prompt))
-    #         assert token_length == actual_token_length
-
-    #     # Test all of 500-600 to make sure exact
-    #     for i in range(500, 600):
-    #         _subtest(i)
-
-    #     # Test some larger values
-    #     _subtest(1500)
-    #     _subtest(10000)
-
-    # def test_random_synthetic_stddev(self, default_tokenizer):
-    #     """
-    #     Test that we can produce random synthetic tokens within a requested stddev
-    #     """
-    #     random.seed(1)
-
-    #     def _subtest(num_samples, mean, stddev):
-    #         prompt_tokens = []
-    #         for _ in range(num_samples):
-    #             prompt = LlmInputs._create_synthetic_prompt(
-    #                 tokenizer=default_tokenizer,
-    #                 prompt_tokens_mean=mean,
-    #                 prompt_tokens_stddev=stddev,
-    #             )
-    #             prompt_tokens.append(len(default_tokenizer.encode(prompt)))
-
-    #         assert statistics.mean(prompt_tokens) == pytest.approx(mean, rel=0.1)
-    #         assert statistics.stdev(prompt_tokens) == pytest.approx(stddev, rel=0.2)
-
-    #     _subtest(50, 200, 20)
-    #     _subtest(50, 400, 10)
-    #     _subtest(200, 50, 10)
-
-    # def test_random_seed(self, default_tokenizer):
-    #     """
-    #     Test that when given the same seed, create_llm_inputs will return the same result,
-    #     and that when given a different seed, it will produce a different result
-    #     """
-
-    #     inputs_seed5_a = LlmInputs.create_llm_inputs(
-    #         tokenizer=default_tokenizer,
-    #         input_type=PromptSource.SYNTHETIC,
-    #         output_format=OutputFormat.TENSORRTLLM,
-    #         prompt_tokens_mean=300,
-    #         prompt_tokens_stddev=20,
-    #         num_of_output_prompts=5,
-    #         random_seed=5,
-    #         model_name=["test_model_A"],
-    #     )
-
-    #     inputs_seed5_b = LlmInputs.create_llm_inputs(
-    #         tokenizer=default_tokenizer,
-    #         input_type=PromptSource.SYNTHETIC,
-    #         output_format=OutputFormat.TENSORRTLLM,
-    #         prompt_tokens_mean=300,
-    #         prompt_tokens_stddev=20,
-    #         num_of_output_prompts=5,
-    #         random_seed=5,
-    #         model_name=["test_model_A"],
-    #     )
-
-    #     inputs_seed10 = LlmInputs.create_llm_inputs(
-    #         tokenizer=default_tokenizer,
-    #         input_type=PromptSource.SYNTHETIC,
-    #         output_format=OutputFormat.TENSORRTLLM,
-    #         prompt_tokens_mean=300,
-    #         prompt_tokens_stddev=20,
-    #         num_of_output_prompts=5,
-    #         random_seed=10,
-    #         model_name=["test_model_A"],
-    #     )
-
-    #     assert inputs_seed5_a == inputs_seed5_b
-    #     assert inputs_seed5_a != inputs_seed10
-
-    # def test_synthetic_to_vllm(self, default_tokenizer):
-    #     """
-    #     Test generating synthetic prompts and converting to vllm
-    #     """
-    #     pa_json = LlmInputs.create_llm_inputs(
-    #         input_type=PromptSource.SYNTHETIC,
-    #         output_format=OutputFormat.VLLM,
-    #         num_of_output_prompts=5,
-    #         add_model_name=False,
-    #         add_stream=True,
-    #         tokenizer=default_tokenizer,
-    #         model_name=["test_model_A"],
-    #     )
-
-    #     os.remove(DEFAULT_INPUT_DATA_JSON)
-
-    #     assert pa_json is not None
-    #     assert len(pa_json["data"]) == 5
-
-    # def test_synthetic_to_trtllm(self, default_tokenizer):
-    #     """
-    #     Test generating synthetic prompts and converting to trtllm
-    #     """
-    #     pa_json = LlmInputs.create_llm_inputs(
-    #         input_type=PromptSource.SYNTHETIC,
-    #         output_format=OutputFormat.TENSORRTLLM,
-    #         num_of_output_prompts=5,
-    #         add_model_name=False,
-    #         add_stream=True,
-    #         tokenizer=default_tokenizer,
-    #         model_name=["test_model_A"],
-    #     )
-
-    #     os.remove(DEFAULT_INPUT_DATA_JSON)
-
-    #     assert pa_json is not None
-    #     assert len(pa_json["data"]) == 5
-
-    # def test_synthetic_to_openai_chat_completions(self, default_tokenizer):
-    #     """
-    #     Test generating synthetic prompts and converting to OpenAI chat completions
-    #     """
-    #     pa_json = LlmInputs.create_llm_inputs(
-    #         input_type=PromptSource.SYNTHETIC,
-    #         output_format=OutputFormat.OPENAI_CHAT_COMPLETIONS,
-    #         num_of_output_prompts=5,
-    #         add_model_name=False,
-    #         add_stream=True,
-    #         tokenizer=default_tokenizer,
-    #         model_name=["test_model_A"],
-    #     )
-
-    #     os.remove(DEFAULT_INPUT_DATA_JSON)
-
-    #     assert pa_json is not None
-    #     assert len(pa_json["data"]) == 5
-
-    # def test_synthetic_to_openai_completions(self, default_tokenizer):
-    #     """
-    #     Test generating synthetic prompts and converting to OpenAI completions
-    #     """
-    #     pa_json = LlmInputs.create_llm_inputs(
-    #         input_type=PromptSource.SYNTHETIC,
-    #         output_format=OutputFormat.OPENAI_COMPLETIONS,
-    #         num_of_output_prompts=5,
-    #         add_model_name=False,
-    #         add_stream=True,
-    #         tokenizer=default_tokenizer,
-    #         model_name=["test_model_A"],
-    #     )
-
-    #     os.remove(DEFAULT_INPUT_DATA_JSON)
-
-    #     assert pa_json is not None
-    #     assert len(pa_json["data"]) == 5
-
-    # @pytest.mark.parametrize(
-    #     "output_format",
-    #     [format[2] for format in SERVICE_KIND_BACKEND_ENDPOINT_TYPE_FORMATS],
-    # )
-    # def test_extra_inputs(
-    #     self, default_tokenizer: Tokenizer, output_format: OutputFormat
-    # ) -> None:
-    #     input_name = "max_tokens"
-    #     input_value = 5
-    #     request_inputs = {input_name: input_value}
-
-    #     pa_json = LlmInputs.create_llm_inputs(
-    #         input_type=PromptSource.SYNTHETIC,
-    #         output_format=output_format,
-    #         num_of_output_prompts=5,
-    #         add_model_name=False,
-    #         add_stream=True,
-    #         tokenizer=default_tokenizer,
-    #         extra_inputs=request_inputs,
-    #         model_name=["test_model_A"],
-    #     )
-
-    #     assert len(pa_json["data"]) == 5
-
-    #     if (
-    #         output_format == OutputFormat.OPENAI_CHAT_COMPLETIONS
-    #         or output_format == OutputFormat.OPENAI_COMPLETIONS
-    #     ):
-    #         for entry in pa_json["data"]:
-    #             assert "payload" in entry, "Payload is missing in the request"
-    #             payload = entry["payload"]
-    #             for item in payload:
-    #                 assert (
-    #                     input_name in item
-    #                 ), f"The input name {input_name} is not present in the request"
-    #                 assert (
-    #                     item[input_name] == input_value
-    #                 ), f"The value of {input_name} is incorrect"
-    #     elif (
-    #         output_format == OutputFormat.TENSORRTLLM
-    #         or output_format == OutputFormat.VLLM
-    #     ):
-    #         for entry in pa_json["data"]:
-    #             assert (
-    #                 input_name in entry
-    #             ), f"The {input_name} is not present in the request"
-    #             assert entry[input_name] == [
-    #                 input_value
-    #             ], f"The value of {input_name} is incorrect"
-    #     else:
-    #         assert False, f"Unsupported output format: {output_format}"
-
-    def test_add_image_inputs_openai_vision(self) -> None:
-        generic_json = {
-            "rows": [
-                {"text_input": "test input one", "image": "test_image1"},
-                {"text_input": "test input two", "image": "test_image2"},
-            ]
-        }
-
-        generic_json = LlmInputs._convert_to_openai_multi_modal_content(generic_json)
-
-        row1 = generic_json["rows"][0]["text_input"]
-        assert row1 == [
-            {
-                "type": "text",
-                "text": "test input one",
-            },
-            {
-                "type": "image_url",
-                "image_url": {"url": "test_image1"},
-            },
-        ]
-
-        row2 = generic_json["rows"][1]["text_input"]
-        assert row2 == [
-            {
-                "type": "text",
-                "text": "test input two",
-            },
-            {
-                "type": "image_url",
-                "image_url": {"url": "test_image2"},
-            },
-        ]
-
-    @patch(
-        "genai_perf.llm_inputs.llm_inputs.LlmInputs._create_synthetic_prompt",
-        return_value="This is test prompt",
-    )
-    @patch(
-        "genai_perf.llm_inputs.llm_inputs.LlmInputs._create_synthetic_image",
-        return_value="test_image_base64",
-    )
-    @pytest.mark.parametrize(
-        "output_format",
-        [
-            OutputFormat.OPENAI_CHAT_COMPLETIONS,
-            OutputFormat.OPENAI_COMPLETIONS,
-            OutputFormat.OPENAI_EMBEDDINGS,
-            OutputFormat.RANKINGS,
-            OutputFormat.OPENAI_VISION,
-            OutputFormat.VLLM,
-            OutputFormat.TENSORRTLLM,
-        ],
-    )
-    def test_get_input_dataset_from_synthetic(
-        self, mock_prompt, mock_image, output_format
-    ) -> None:
-        _placeholder = 123  # dummy value
-        num_prompts = 3
-
-        dataset_json = LlmInputs._get_input_dataset_from_synthetic(
-            tokenizer=get_tokenizer(DEFAULT_TOKENIZER),
-            prompt_tokens_mean=_placeholder,
-            prompt_tokens_stddev=_placeholder,
-            num_of_output_prompts=num_prompts,
-            image_width_mean=_placeholder,
-            image_width_stddev=_placeholder,
-            image_height_mean=_placeholder,
-            image_height_stddev=_placeholder,
-            image_format=ImageFormat.PNG,
-            output_format=output_format,
-        )
-
-        assert len(dataset_json["rows"]) == num_prompts
-
-        for i in range(num_prompts):
-            row = dataset_json["rows"][i]["row"]
-
-            if output_format == OutputFormat.OPENAI_VISION:
-                assert row == {
-                    "text_input": "This is test prompt",
-                    "image": "test_image_base64",
-                }
-            else:
-                assert row == {
-                    "text_input": "This is test prompt",
-                }
-
-    # def test_trtllm_default_max_tokens(self, default_tokenizer: Tokenizer) -> None:
-    #     input_name = "max_tokens"
-    #     input_value = 256
-
-    #     pa_json = LlmInputs.create_llm_inputs(
-    #         input_type=PromptSource.SYNTHETIC,
-    #         output_format=OutputFormat.TENSORRTLLM,
-    #         num_of_output_prompts=5,
-    #         add_model_name=False,
-    #         add_stream=True,
-    #         tokenizer=default_tokenizer,
-    #         model_name=["test_model_A"],
-    #     )
-
-    #     assert len(pa_json["data"]) == 5
-    #     for entry in pa_json["data"]:
-    #         assert (
-    #             input_name in entry
-    #         ), f"The {input_name} is not present in the request"
-    #         assert entry[input_name] == [
-    #             input_value
-    #         ], f"The value of {input_name} is incorrect"
-
-    # @pytest.mark.parametrize(
-    #     "output_format",
-    #     [format[2] for format in SERVICE_KIND_BACKEND_ENDPOINT_TYPE_FORMATS],
-    # )
-    # def test_output_tokens_mean(self, output_format, default_tokenizer):
-    #     if (
-    #         output_format != OutputFormat.VLLM
-    #         and output_format != OutputFormat.TENSORRTLLM
-    #     ):
-    #         return
-
-    #     output_tokens_mean = 100
-    #     output_tokens_stddev = 0
-    #     for deterministic in [True, False]:
-    #         _ = LlmInputs.create_llm_inputs(
-    #             input_type=PromptSource.SYNTHETIC,
-    #             output_format=output_format,
-    #             num_of_output_prompts=5,
-    #             add_model_name=False,
-    #             add_stream=True,
-    #             tokenizer=default_tokenizer,
-    #             output_tokens_mean=output_tokens_mean,
-    #             output_tokens_stddev=output_tokens_stddev,
-    #             output_tokens_deterministic=deterministic,
-    #             model_name=["test_model_A"],
-    #         )
-
-    #         assert os.path.exists(
-    #             DEFAULT_INPUT_DATA_JSON
-    #         ), "llm_inputs.json file is not created"
-
-    #         with open(DEFAULT_INPUT_DATA_JSON, "r") as f:
-    #             llm_inputs_data = json.load(f)
-
-    #         for entry in llm_inputs_data["data"]:
-    #             if output_format == OutputFormat.VLLM:
-    #                 assert (
-    #                     "sampling_parameters" in entry
-    #                 ), "sampling_parameters is missing in llm_inputs.json"
-    #                 sampling_parameters = json.loads(entry["sampling_parameters"][0])
-    #                 assert (
-    #                     "max_tokens" in sampling_parameters
-    #                 ), "max_tokens parameter is missing in sampling_parameters"
-    #                 assert sampling_parameters["max_tokens"] == str(
-    #                     output_tokens_mean
-    #                 ), "max_tokens parameter is not properly set"
-    #                 if deterministic:
-    #                     assert (
-    #                         "min_tokens" in sampling_parameters
-    #                     ), "min_tokens parameter is missing in sampling_parameters"
-    #                     assert sampling_parameters["min_tokens"] == str(
-    #                         output_tokens_mean
-    #                     ), "min_tokens parameter is not properly set"
-    #                 else:
-    #                     assert (
-    #                         "min_tokens" not in sampling_parameters
-    #                     ), "min_tokens parameter is present in sampling_parameters"
-    #             elif output_format == OutputFormat.TENSORRTLLM:
-    #                 assert (
-    #                     "max_tokens" in entry
-    #                 ), "max_tokens parameter is missing in llm_inputs.json"
-    #                 assert (
-    #                     entry["max_tokens"][0] == output_tokens_mean
-    #                 ), "max_tokens parameter is not properly set"
-    #                 if deterministic:
-    #                     assert (
-    #                         "min_length" in entry
-    #                     ), "min_length parameter is missing in llm_inputs.json"
-    #                     assert (
-    #                         entry["min_length"][0] == output_tokens_mean
-    #                     ), "min_length parameter is not properly set"
-    #                 else:
-    #                     assert (
-    #                         "min_length" not in entry
-    #                     ), "min_length parameter is present in llm_inputs.json"
-    #             else:
-    #                 assert False, f"Unsupported output format: {output_format}"
-
-    #         os.remove(DEFAULT_INPUT_DATA_JSON)
-
-    def test_get_input_file_without_file_existing(self):
-        with pytest.raises(FileNotFoundError):
-            LlmInputs._get_input_dataset_from_file(Path("prompt.txt"))
-
-    @patch("pathlib.Path.exists", return_value=True)
-    @patch(
-        "builtins.open",
-        new_callable=mock_open,
-        read_data='{"text_input": "single prompt"}\n',
-    )
-    def test_get_input_file_with_single_prompt(self, mock_file, mock_exists):
-        expected_prompts = ["single prompt"]
-        dataset = LlmInputs._get_input_dataset_from_file(Path("prompt.txt"))
-
-        assert dataset is not None
-        assert len(dataset["rows"]) == len(expected_prompts)
-        for i, prompt in enumerate(expected_prompts):
-            assert dataset["rows"][i]["row"]["text_input"] == prompt
-
-    @patch("pathlib.Path.exists", return_value=True)
-    @patch(
-        "builtins.open",
-        new_callable=mock_open,
-        read_data='{"text_input": "prompt1"}\n{"text_input": "prompt2"}\n{"text_input": "prompt3"}\n',
-    )
-    def test_get_input_file_with_multiple_prompts(self, mock_file, mock_exists):
-        expected_prompts = ["prompt1", "prompt2", "prompt3"]
-        dataset = LlmInputs._get_input_dataset_from_file(Path("prompt.txt"))
-
-        assert dataset is not None
-        assert len(dataset["rows"]) == len(expected_prompts)
-        for i, prompt in enumerate(expected_prompts):
-            assert dataset["rows"][i]["row"]["text_input"] == prompt
-
-    @patch("pathlib.Path.exists", return_value=True)
-    @patch("PIL.Image.open", return_value=Image.new("RGB", (10, 10)))
-    @patch(
-        "builtins.open",
-        new_callable=mock_open,
-        read_data=(
-            '{"text_input": "prompt1", "image": "image1.png"}\n'
-            '{"text_input": "prompt2", "image": "image2.png"}\n'
-            '{"text_input": "prompt3", "image": "image3.png"}\n'
-        ),
-    )
-    def test_get_input_file_with_multi_modal_data(
-        self, mock_exists, mock_image, mock_file
-    ):
-        Data = namedtuple("Data", ["text_input", "image"])
-        expected_data = [
-            Data(text_input="prompt1", image="image1.png"),
-            Data(text_input="prompt2", image="image2.png"),
-            Data(text_input="prompt3", image="image3.png"),
-        ]
-        dataset = LlmInputs._get_input_dataset_from_file(Path("somefile.txt"))
-
-        assert dataset is not None
-        assert len(dataset["rows"]) == len(expected_data)
-        for i, data in enumerate(expected_data):
-            assert dataset["rows"][i]["row"]["text_input"] == data.text_input
-            assert dataset["rows"][i]["row"]["image"] == data.image
-
-    @pytest.mark.parametrize(
-        "seed, model_name_list, index,model_selection_strategy,expected_model",
-        [
-            (
-                1,
-                ["test_model_A", "test_model_B", "test_model_C"],
-                0,
-                ModelSelectionStrategy.ROUND_ROBIN,
-                "test_model_A",
-            ),
-            (
-                1,
-                ["test_model_A", "test_model_B", "test_model_C"],
-                1,
-                ModelSelectionStrategy.ROUND_ROBIN,
-                "test_model_B",
-            ),
-            (
-                1,
-                ["test_model_A", "test_model_B", "test_model_C"],
-                2,
-                ModelSelectionStrategy.ROUND_ROBIN,
-                "test_model_C",
-            ),
-            (
-                1,
-                ["test_model_A", "test_model_B", "test_model_C"],
-                3,
-                ModelSelectionStrategy.ROUND_ROBIN,
-                "test_model_A",
-            ),
-            (
-                100,
-                ["test_model_A", "test_model_B", "test_model_C"],
-                0,
-                ModelSelectionStrategy.RANDOM,
-                "test_model_A",
-            ),
-            (
-                100,
-                ["test_model_A", "test_model_B", "test_model_C"],
-                1,
-                ModelSelectionStrategy.RANDOM,
-                "test_model_A",
-            ),
-            (
-                1652,
-                ["test_model_A", "test_model_B", "test_model_C"],
-                0,
-                ModelSelectionStrategy.RANDOM,
-                "test_model_B",
-            ),
-            (
-                95,
-                ["test_model_A", "test_model_B", "test_model_C"],
-                0,
-                ModelSelectionStrategy.RANDOM,
-                "test_model_C",
-            ),
-        ],
-    )
-    def test_select_model_name(
-        self, seed, model_name_list, index, model_selection_strategy, expected_model
-    ):
-        """
-        Test that model selection strategy controls the model selected
-        """
-        random.seed(seed)
-
-        actual_model = LlmInputs._select_model_name(
-            model_name_list, index, model_selection_strategy
-        )
-        assert actual_model == expected_model
diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs_embeddings.py b/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs_embeddings.py
deleted file mode 100644
index 0cefa38a7..000000000
--- a/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs_embeddings.py
+++ /dev/null
@@ -1,172 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from pathlib import Path
-from unittest.mock import mock_open, patch
-
-import pytest
-from genai_perf.llm_inputs.llm_inputs import LlmInputs, ModelSelectionStrategy
-
-
-class TestLlmInputsEmbeddings:
-    @patch("pathlib.Path.exists", return_value=True)
-    @patch(
-        "builtins.open",
-        new_callable=mock_open,
-        read_data="\n".join(
-            [
-                '{"text": "What production company co-owned by Kevin Loader and Rodger Michell produced My Cousin Rachel?"}',
-                '{"text": "Who served as the 1st Vice President of Colombia under El Libertador?"}',
-                '{"text": "Are the Barton Mine and Hermiston-McCauley Mine located in The United States of America?"}',
-                '{"text": "what state did they film daddy\'s home 2"}',
-            ]
-        ),
-    )
-    def test_get_input_dataset_from_embeddings_file(self, mock_file, mock_exists):
-        input_filename = Path("embeddings.jsonl")
-        batch_size = 3
-        dataset = LlmInputs._get_input_dataset_from_embeddings_file(
-            input_filename, batch_size, num_prompts=100
-        )
-
-        assert dataset is not None
-        assert len(dataset["rows"]) == 100
-        for row in dataset["rows"]:
-            assert "row" in row
-            assert "payload" in row["row"]
-            payload = row["row"]["payload"]
-            assert "input" in payload
-            assert isinstance(payload["input"], list)
-            assert len(payload["input"]) == batch_size
-
-        # Try error case where batch size is larger than the number of available texts
-        with pytest.raises(
-            ValueError,
-            match="Batch size cannot be larger than the number of available texts",
-        ):
-            LlmInputs._get_input_dataset_from_embeddings_file(
-                input_filename, 5, num_prompts=10
-            )
-
-    def test_convert_generic_json_to_openai_embeddings_format(self):
-        generic_dataset = {
-            "rows": [
-                {"payload": {"input": ["text 1", "text 2"]}},
-                {"payload": {"input": ["text 3", "text 4"]}},
-            ]
-        }
-
-        expected_result = {
-            "data": [
-                {
-                    "payload": [
-                        {
-                            "input": ["text 1", "text 2"],
-                            "model": "test_model",
-                        }
-                    ]
-                },
-                {
-                    "payload": [
-                        {
-                            "input": ["text 3", "text 4"],
-                            "model": "test_model",
-                        }
-                    ]
-                },
-            ]
-        }
-
-        result = LlmInputs._convert_generic_json_to_openai_embeddings_format(
-            generic_dataset,
-            extra_inputs={},
-            model_name=["test_model"],
-            model_selection_strategy=ModelSelectionStrategy.ROUND_ROBIN,
-        )
-
-        assert result is not None
-        assert "data" in result
-        assert len(result["data"]) == len(expected_result["data"])
-
-        for i, item in enumerate(expected_result["data"]):
-            assert "payload" in result["data"][i]
-            assert result["data"][i]["payload"] == item["payload"]
-
-    def test_convert_generic_json_to_openai_embeddings_format_with_extra_inputs(self):
-        generic_dataset = {
-            "rows": [
-                {"payload": {"input": ["text 1", "text 2"]}},
-                {"payload": {"input": ["text 3", "text 4"]}},
-            ]
-        }
-
-        extra_inputs = {
-            "encoding_format": "base64",
-            "truncate": "END",
-            "additional_key": "additional_value",
-        }
-
-        expected_result = {
-            "data": [
-                {
-                    "payload": [
-                        {
-                            "input": ["text 1", "text 2"],
-                            "model": "test_model",
-                            "encoding_format": "base64",
-                            "truncate": "END",
-                            "additional_key": "additional_value",
-                        }
-                    ]
-                },
-                {
-                    "payload": [
-                        {
-                            "input": ["text 3", "text 4"],
-                            "model": "test_model",
-                            "encoding_format": "base64",
-                            "truncate": "END",
-                            "additional_key": "additional_value",
-                        }
-                    ]
-                },
-            ]
-        }
-
-        result = LlmInputs._convert_generic_json_to_openai_embeddings_format(
-            generic_dataset,
-            extra_inputs=extra_inputs,
-            model_name=["test_model"],
-            model_selection_strategy=ModelSelectionStrategy.ROUND_ROBIN,
-        )
-
-        assert result is not None
-        assert "data" in result
-        assert len(result["data"]) == len(expected_result["data"])
-
-        for i, item in enumerate(expected_result["data"]):
-            assert "payload" in result["data"][i]
-            assert result["data"][i]["payload"] == item["payload"]
diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs_rankings.py b/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs_rankings.py
deleted file mode 100644
index bfe2be482..000000000
--- a/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs_rankings.py
+++ /dev/null
@@ -1,182 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from pathlib import Path
-from unittest.mock import mock_open, patch
-
-import pytest
-from genai_perf.llm_inputs.llm_inputs import LlmInputs, ModelSelectionStrategy
-
-
-class TestLlmInputsRankings:
-
-    def open_side_effects(filepath, *args, **kwargs):
-        queries_content = "\n".join(
-            [
-                '{"text": "What production company co-owned by Kevin Loader and Rodger Michell produced My Cousin Rachel?"}',
-                '{"text": "Who served as the 1st Vice President of Colombia under El Libertador?"}',
-                '{"text": "Are the Barton Mine and Hermiston-McCauley Mine located in The United States of America?"}',
-            ]
-        )
-        passages_content = "\n".join(
-            [
-                '{"text": "Eric Anderson (sociologist) Eric Anderson (born January 18, 1968) is an American sociologist"}',
-                '{"text": "Kevin Loader is a British film and television producer. "}',
-                '{"text": "Barton Mine, also known as Net Lake Mine, is an abandoned surface and underground mine in Northeastern Ontario"}',
-            ]
-        )
-
-        file_contents = {
-            "queries.jsonl": queries_content,
-            "passages.jsonl": passages_content,
-        }
-        return mock_open(
-            read_data=file_contents.get(filepath, file_contents["queries.jsonl"])
-        )()
-
-    mock_open_obj = mock_open()
-    mock_open_obj.side_effect = open_side_effects
-
-    @patch("pathlib.Path.exists", return_value=True)
-    @patch("builtins.open", mock_open_obj)
-    def test_get_input_dataset_from_rankings_file(self, mock_file):
-        queries_filename = Path("queries.jsonl")
-        passages_filename = Path("passages.jsonl")
-        batch_size = 2
-        dataset = LlmInputs._get_input_dataset_from_rankings_files(
-            queries_filename, passages_filename, batch_size, num_prompts=100
-        )
-
-        assert dataset is not None
-        assert len(dataset["rows"]) == 100
-        for row in dataset["rows"]:
-            assert "row" in row
-            assert "payload" in row["row"]
-            payload = row["row"]["payload"]
-            assert "query" in payload
-            assert "passages" in payload
-            assert isinstance(payload["passages"], list)
-            assert len(payload["passages"]) == batch_size
-
-        # Try error case where batch size is larger than the number of available texts
-        with pytest.raises(
-            ValueError,
-            match="Batch size cannot be larger than the number of available passages",
-        ):
-            LlmInputs._get_input_dataset_from_rankings_files(
-                queries_filename, passages_filename, 5, num_prompts=10
-            )
-
-    def test_convert_generic_json_to_openai_rankings_format(self):
-        generic_dataset = {
-            "rows": [
-                {
-                    "payload": {
-                        "query": {"text": "1"},
-                        "passages": [{"text": "2"}, {"text": "3"}, {"text": "4"}],
-                    }
-                }
-            ]
-        }
-
-        expected_result = {
-            "data": [
-                {
-                    "payload": [
-                        {
-                            "query": {"text": "1"},
-                            "passages": [{"text": "2"}, {"text": "3"}, {"text": "4"}],
-                            "model": "test_model",
-                        }
-                    ]
-                }
-            ]
-        }
-
-        result = LlmInputs._convert_generic_json_to_rankings_format(
-            generic_dataset,
-            extra_inputs={},
-            model_name=["test_model"],
-            model_selection_strategy=ModelSelectionStrategy.ROUND_ROBIN,
-        )
-
-        assert result is not None
-        assert "data" in result
-        assert len(result["data"]) == len(expected_result["data"])
-
-        for i, item in enumerate(expected_result["data"]):
-            assert "payload" in result["data"][i]
-            assert result["data"][i]["payload"] == item["payload"]
-
-    def test_convert_generic_json_to_openai_rankings_format_with_extra_inputs(self):
-        generic_dataset = {
-            "rows": [
-                {
-                    "payload": {
-                        "query": {"text": "1"},
-                        "passages": [{"text": "2"}, {"text": "3"}, {"text": "4"}],
-                    }
-                }
-            ]
-        }
-
-        extra_inputs = {
-            "encoding_format": "base64",
-            "truncate": "END",
-            "additional_key": "additional_value",
-        }
-
-        expected_result = {
-            "data": [
-                {
-                    "payload": [
-                        {
-                            "query": {"text": "1"},
-                            "passages": [{"text": "2"}, {"text": "3"}, {"text": "4"}],
-                            "model": "test_model",
-                            "encoding_format": "base64",
-                            "truncate": "END",
-                            "additional_key": "additional_value",
-                        }
-                    ]
-                }
-            ]
-        }
-
-        result = LlmInputs._convert_generic_json_to_rankings_format(
-            generic_dataset,
-            extra_inputs=extra_inputs,
-            model_name=["test_model"],
-            model_selection_strategy=ModelSelectionStrategy.ROUND_ROBIN,
-        )
-
-        assert result is not None
-        assert "data" in result
-        assert len(result["data"]) == len(expected_result["data"])
-
-        for i, item in enumerate(expected_result["data"]):
-            assert "payload" in result["data"][i]
-            assert result["data"][i]["payload"] == item["payload"]
diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_llm_metrics.py b/src/c++/perf_analyzer/genai-perf/tests/test_llm_metrics.py
deleted file mode 100644
index 689e366cd..000000000
--- a/src/c++/perf_analyzer/genai-perf/tests/test_llm_metrics.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import pytest
-from genai_perf.metrics import LLMMetrics
-
-
-class TestLLMMetrics:
-
-    def test_llm_metric_request_metrics(self) -> None:
-        """Test request_metrics property."""
-        m = LLMMetrics(
-            request_throughputs=[10.12, 11.33],
-            request_latencies=[3, 44],
-            time_to_first_tokens=[1, 2, 3],
-            inter_token_latencies=[4, 5],
-            output_token_throughputs=[22.13, 9423.02],
-            output_token_throughputs_per_request=[7, 8, 9],
-            output_sequence_lengths=[3, 4],
-            input_sequence_lengths=[12, 34],
-        )
-        req_metrics = m.request_metrics
-        assert len(req_metrics) == 6
-        assert req_metrics[0].name == "time_to_first_token"
-        assert req_metrics[0].unit == "ms"
-        assert req_metrics[1].name == "inter_token_latency"
-        assert req_metrics[1].unit == "ms"
-        assert req_metrics[2].name == "request_latency"
-        assert req_metrics[2].unit == "ms"
-        assert req_metrics[3].name == "output_token_throughput_per_request"
-        assert req_metrics[3].unit == "tokens/sec"
-        assert req_metrics[4].name == "output_sequence_length"
-        assert req_metrics[4].unit == "tokens"
-        assert req_metrics[5].name == "input_sequence_length"
-        assert req_metrics[5].unit == "tokens"
-
-    def test_llm_metric_system_metrics(self) -> None:
-        """Test system_metrics property."""
-        m = LLMMetrics(
-            request_throughputs=[10.12, 11.33],
-            request_latencies=[3, 44],
-            time_to_first_tokens=[1, 2, 3],
-            inter_token_latencies=[4, 5],
-            output_token_throughputs=[22.13, 9423.02],
-            output_token_throughputs_per_request=[7, 8, 9],
-            output_sequence_lengths=[3, 4],
-            input_sequence_lengths=[12, 34],
-        )
-
-        sys_metrics = m.system_metrics
-        assert len(sys_metrics) == 2
-        assert sys_metrics[0].name == "output_token_throughput"
-        assert sys_metrics[0].unit == "per sec"
-        assert sys_metrics[1].name == "request_throughput"
-        assert sys_metrics[1].unit == "per sec"
-
-    def test_llm_metrics_get_base_name(self) -> None:
-        """Test get_base_name method in LLMMetrics class."""
-        # initialize with dummy values
-        metrics = LLMMetrics(
-            request_throughputs=[10.12, 11.33],
-            request_latencies=[3, 44],
-            time_to_first_tokens=[1, 2, 3],
-            inter_token_latencies=[4, 5],
-            output_token_throughputs=[22.13, 9423.02],
-            output_token_throughputs_per_request=[7, 8, 9],
-            output_sequence_lengths=[3, 4],
-            input_sequence_lengths=[12, 34],
-        )
-        assert metrics.get_base_name("time_to_first_tokens") == "time_to_first_token"
-        assert metrics.get_base_name("inter_token_latencies") == "inter_token_latency"
-        assert (
-            metrics.get_base_name("output_token_throughputs_per_request")
-            == "output_token_throughput_per_request"
-        )
-        assert (
-            metrics.get_base_name("output_sequence_lengths") == "output_sequence_length"
-        )
-        assert (
-            metrics.get_base_name("input_sequence_lengths") == "input_sequence_length"
-        )
-        with pytest.raises(KeyError):
-            metrics.get_base_name("hello1234")
diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_llm_profile_data_parser.py b/src/c++/perf_analyzer/genai-perf/tests/test_llm_profile_data_parser.py
deleted file mode 100644
index d776a6a85..000000000
--- a/src/c++/perf_analyzer/genai-perf/tests/test_llm_profile_data_parser.py
+++ /dev/null
@@ -1,742 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import json
-from io import StringIO
-from pathlib import Path
-from typing import Any, List, Union
-
-import numpy as np
-import pytest
-from genai_perf.metrics import LLMMetrics
-from genai_perf.profile_data_parser import LLMProfileDataParser
-from genai_perf.tokenizer import DEFAULT_TOKENIZER, get_tokenizer
-
-
-def ns_to_sec(ns: int) -> Union[int, float]:
-    """Convert from nanosecond to second."""
-    return ns / 1e9
-
-
-class TestLLMProfileDataParser:
-    @pytest.fixture
-    def mock_read_write(self, monkeypatch: pytest.MonkeyPatch) -> List[str]:
-        """
-        This function will mock the open function for specific files:
-
-        - For "triton_profile_export.json", it will read and return the
-          contents of self.triton_profile_data
-        - For "openai_profile_export.json", it will read and return the
-          contents of self.openai_profile_data
-        - For "profile_export.csv", it will capture all data written to
-          the file, and return it as the return value of this function
-        - For all other files, it will behave like the normal open function
-        """
-
-        written_data = []
-
-        original_open = open
-
-        def custom_open(filename, *args, **kwargs):
-            def write(self: Any, content: str) -> int:
-                written_data.append(content)
-                return len(content)
-
-            if filename == "triton_profile_export.json":
-                tmp_file = StringIO(json.dumps(self.triton_profile_data))
-                return tmp_file
-            elif filename == "openai_profile_export.json":
-                tmp_file = StringIO(json.dumps(self.openai_profile_data))
-                return tmp_file
-            elif filename == "openai_vlm_profile_export.json":
-                tmp_file = StringIO(json.dumps(self.openai_vlm_profile_data))
-                return tmp_file
-            elif filename == "empty_profile_export.json":
-                tmp_file = StringIO(json.dumps(self.empty_profile_data))
-                return tmp_file
-            elif filename == "profile_export.csv":
-                tmp_file = StringIO()
-                tmp_file.write = write.__get__(tmp_file)
-                return tmp_file
-            else:
-                return original_open(filename, *args, **kwargs)
-
-        monkeypatch.setattr("builtins.open", custom_open)
-
-        return written_data
-
-    def test_triton_llm_profile_data(self, mock_read_write: pytest.MonkeyPatch) -> None:
-        """Collect LLM metrics from profile export data and check values.
-
-        Metrics
-        * time to first tokens
-            - experiment 1: [3 - 1, 4 - 2] = [2, 2]
-            - experiment 2: [7 - 5, 6 - 3] = [2, 3]
-        * inter token latencies
-            - experiment 1: [((8 - 1) - 2)/(3 - 1), ((11 - 2) - 2)/(6 - 1)]
-                          : [2.5, 1.4]
-                          : [2, 1]  # rounded
-            - experiment 2: [((18 - 5) - 2)/(4 - 1), ((11 - 3) - 3)/(6 - 1)]
-                          : [11/3, 1]
-                          : [4, 1]  # rounded
-        * output token throughputs per request
-            - experiment 1: [3/(8 - 1), 6/(11 - 2)] = [3/7, 6/9]
-            - experiment 2: [4/(18 - 5), 6/(11 - 3)] = [4/13, 6/8]
-        * output token throughputs
-            - experiment 1: [(3 + 6)/(11 - 1)] = [9/10]
-            - experiment 2: [(4 + 6)/(18 - 3)] = [2/3]
-        * output sequence lengths
-            - experiment 1: [3, 6]
-            - experiment 2: [4, 6]
-        * input sequence lengths
-            - experiment 1: [3, 4]
-            - experiment 2: [3, 4]
-        """
-        tokenizer = get_tokenizer(DEFAULT_TOKENIZER)
-        pd = LLMProfileDataParser(
-            filename=Path("triton_profile_export.json"),
-            tokenizer=tokenizer,
-        )
-
-        # experiment 1 metrics & statistics
-        stat_obj = pd.get_statistics(infer_mode="concurrency", load_level="10")
-        metrics = stat_obj.metrics
-        stat = stat_obj.stats_dict
-
-        assert isinstance(metrics, LLMMetrics)
-
-        assert metrics.time_to_first_tokens == [2, 2]
-        assert metrics.inter_token_latencies == [2, 1]
-        ottpr = [3 / ns_to_sec(7), 6 / ns_to_sec(9)]
-        assert metrics.output_token_throughputs_per_request == pytest.approx(ottpr)
-        ott = [9 / ns_to_sec(10)]
-        assert metrics.output_token_throughputs == pytest.approx(ott)
-        assert metrics.output_sequence_lengths == [3, 6]
-        assert metrics.input_sequence_lengths == [3, 4]
-
-        # Disable Pylance warnings for dynamically set attributes due to Statistics
-        # not having strict attributes listed.
-        assert stat["time_to_first_token"]["avg"] == 2  # type: ignore
-        assert stat["inter_token_latency"]["avg"] == 1.5  # type: ignore
-        assert stat["output_token_throughput_per_request"]["avg"] == pytest.approx(  # type: ignore
-            np.mean(ottpr)
-        )
-        assert stat["output_sequence_length"]["avg"] == 4.5  # type: ignore
-        assert stat["input_sequence_length"]["avg"] == 3.5  # type: ignore
-
-        assert stat["time_to_first_token"]["p50"] == 2  # type: ignore
-        assert stat["inter_token_latency"]["p50"] == 1.5  # type: ignore
-        assert stat["output_token_throughput_per_request"]["p50"] == pytest.approx(  # type: ignore
-            np.percentile(ottpr, 50)
-        )
-        assert stat["output_sequence_length"]["p50"] == 4.5  # type: ignore
-        assert stat["input_sequence_length"]["p50"] == 3.5  # type: ignore
-
-        assert stat["time_to_first_token"]["min"] == 2  # type: ignore
-        assert stat["inter_token_latency"]["min"] == 1  # type: ignore
-        min_ottpr = 3 / ns_to_sec(7)
-        assert stat["output_token_throughput_per_request"]["min"] == pytest.approx(min_ottpr)  # type: ignore
-        assert stat["output_sequence_length"]["min"] == 3  # type: ignore
-        assert stat["input_sequence_length"]["min"] == 3  # type: ignore
-
-        assert stat["time_to_first_token"]["max"] == 2  # type: ignore
-        assert stat["inter_token_latency"]["max"] == 2  # type: ignore
-        max_ottpr = 6 / ns_to_sec(9)
-        assert stat["output_token_throughput_per_request"]["max"] == pytest.approx(max_ottpr)  # type: ignore
-        assert stat["output_sequence_length"]["max"] == 6  # type: ignore
-        assert stat["input_sequence_length"]["max"] == 4  # type: ignore
-
-        assert stat["time_to_first_token"]["std"] == np.std([2, 2])  # type: ignore
-        assert stat["inter_token_latency"]["std"] == np.std([2, 1])  # type: ignore
-        assert stat["output_token_throughput_per_request"]["std"] == pytest.approx(  # type: ignore
-            np.std(ottpr)
-        )
-        assert stat["output_sequence_length"]["std"] == np.std([3, 6])  # type: ignore
-        assert stat["input_sequence_length"]["std"] == np.std([3, 4])  # type: ignore
-
-        oott = 9 / ns_to_sec(10)
-        assert stat["output_token_throughput"]["avg"] == pytest.approx(oott)  # type: ignore
-
-        # experiment 2 statistics
-        stat_obj = pd.get_statistics(infer_mode="request_rate", load_level="2.0")
-        metrics = stat_obj.metrics
-        stat = stat_obj.stats_dict
-        assert isinstance(metrics, LLMMetrics)
-
-        assert metrics.time_to_first_tokens == [2, 3]
-        assert metrics.inter_token_latencies == [4, 1]
-        ottpr = [4 / ns_to_sec(13), 6 / ns_to_sec(8)]
-        assert metrics.output_token_throughputs_per_request == pytest.approx(ottpr)
-        ott = [2 / ns_to_sec(3)]
-        assert metrics.output_token_throughputs == pytest.approx(ott)
-        assert metrics.output_sequence_lengths == [4, 6]
-        assert metrics.input_sequence_lengths == [3, 4]
-
-        assert stat["time_to_first_token"]["avg"] == pytest.approx(2.5)  # type: ignore
-        assert stat["inter_token_latency"]["avg"] == pytest.approx(2.5)  # type: ignore
-        assert stat["output_token_throughput_per_request"]["avg"] == pytest.approx(  # type: ignore
-            np.mean(ottpr)
-        )
-        assert stat["output_sequence_length"]["avg"] == 5  # type: ignore
-        assert stat["input_sequence_length"]["avg"] == 3.5  # type: ignore
-
-        assert stat["time_to_first_token"]["p50"] == pytest.approx(2.5)  # type: ignore
-        assert stat["inter_token_latency"]["p50"] == pytest.approx(2.5)  # type: ignore
-        assert stat["output_token_throughput_per_request"]["p50"] == pytest.approx(  # type: ignore
-            np.percentile(ottpr, 50)
-        )
-        assert stat["output_sequence_length"]["p50"] == 5  # type: ignore
-        assert stat["input_sequence_length"]["p50"] == 3.5  # type: ignore
-
-        assert stat["time_to_first_token"]["min"] == pytest.approx(2)  # type: ignore
-        assert stat["inter_token_latency"]["min"] == pytest.approx(1)  # type: ignore
-        min_ottpr = 4 / ns_to_sec(13)
-        assert stat["output_token_throughput_per_request"]["min"] == pytest.approx(min_ottpr)  # type: ignore
-        assert stat["output_sequence_length"]["min"] == 4  # type: ignore
-        assert stat["input_sequence_length"]["min"] == 3  # type: ignore
-
-        assert stat["time_to_first_token"]["max"] == pytest.approx(3)  # type: ignore
-        assert stat["inter_token_latency"]["max"] == pytest.approx(4)  # type: ignore
-        max_ottpr = 6 / ns_to_sec(8)
-        assert stat["output_token_throughput_per_request"]["max"] == pytest.approx(max_ottpr)  # type: ignore
-        assert stat["output_sequence_length"]["max"] == 6  # type: ignore
-        assert stat["input_sequence_length"]["max"] == 4  # type: ignore
-
-        assert stat["time_to_first_token"]["std"] == np.std([2, 3]) * (1)  # type: ignore
-        assert stat["inter_token_latency"]["std"] == np.std([4, 1]) * (1)  # type: ignore
-        assert stat["output_token_throughput_per_request"]["std"] == pytest.approx(  # type: ignore
-            np.std(ottpr)
-        )
-        assert stat["output_sequence_length"]["std"] == np.std([4, 6])  # type: ignore
-        assert stat["input_sequence_length"]["std"] == np.std([3, 4])  # type: ignore
-
-        oott = 2 / ns_to_sec(3)
-        assert stat["output_token_throughput"]["avg"] == pytest.approx(oott)  # type: ignore
-
-        # check non-existing profile data
-        with pytest.raises(KeyError):
-            pd.get_statistics(infer_mode="concurrency", load_level="30")
-
-    def test_openai_llm_profile_data(self, mock_read_write: pytest.MonkeyPatch) -> None:
-        """Collect LLM metrics from profile export data and check values.
-
-        Metrics
-        * time to first tokens
-            - experiment 1: [5 - 1, 7 - 2] = [4, 5]
-        * inter token latencies
-            - experiment 1: [((12 - 1) - 4)/(3 - 1), ((15 - 2) - 5)/(6 - 1)]
-                          : [3.5, 1.6]
-                          : [4, 2]  # rounded
-        * output token throughputs per request
-            - experiment 1: [3/(12 - 1), 6/(15 - 2)] = [3/11, 6/13]
-        * output token throughputs
-            - experiment 1: [(3 + 6)/(15 - 1)] = [9/14]
-        * output sequence lengths
-            - experiment 1: [3, 6]
-        * input sequence lengths
-            - experiment 1: [3, 4]
-        """
-        tokenizer = get_tokenizer(DEFAULT_TOKENIZER)
-        pd = LLMProfileDataParser(
-            filename=Path("openai_profile_export.json"),
-            tokenizer=tokenizer,
-        )
-
-        # experiment 1 statistics
-        stat_obj = pd.get_statistics(infer_mode="concurrency", load_level="10")
-        metrics = stat_obj.metrics
-        stat = stat_obj.stats_dict
-        assert isinstance(metrics, LLMMetrics)
-
-        assert metrics.time_to_first_tokens == [4, 5]
-        assert metrics.inter_token_latencies == [4, 2]
-        ottpr = [3 / ns_to_sec(11), 6 / ns_to_sec(13)]
-        assert metrics.output_token_throughputs_per_request == pytest.approx(ottpr)
-        ott = [9 / ns_to_sec(14)]
-        assert metrics.output_token_throughputs == pytest.approx(ott)
-        assert metrics.output_sequence_lengths == [3, 6]
-        assert metrics.input_sequence_lengths == [3, 4]
-
-        assert stat["time_to_first_token"]["avg"] == pytest.approx(4.5)  # type: ignore
-        assert stat["inter_token_latency"]["avg"] == pytest.approx(3)  # type: ignore
-        assert stat["output_token_throughput_per_request"]["avg"] == pytest.approx(  # type: ignore
-            np.mean(ottpr)
-        )
-        assert stat["output_sequence_length"]["avg"] == 4.5  # type: ignore
-        assert stat["input_sequence_length"]["avg"] == 3.5  # type: ignore
-
-        assert stat["time_to_first_token"]["p50"] == pytest.approx(4.5)  # type: ignore
-        assert stat["inter_token_latency"]["p50"] == pytest.approx(3)  # type: ignore
-        assert stat["output_token_throughput_per_request"]["p50"] == pytest.approx(  # type: ignore
-            np.percentile(ottpr, 50)
-        )
-        assert stat["output_sequence_length"]["p50"] == 4.5  # type: ignore
-        assert stat["input_sequence_length"]["p50"] == 3.5  # type: ignore
-
-        assert stat["time_to_first_token"]["min"] == pytest.approx(4)  # type: ignore
-        assert stat["inter_token_latency"]["min"] == pytest.approx(2)  # type: ignore
-        min_ottpr = 3 / ns_to_sec(11)
-        assert stat["output_token_throughput_per_request"]["min"] == pytest.approx(min_ottpr)  # type: ignore
-        assert stat["output_sequence_length"]["min"] == 3  # type: ignore
-        assert stat["input_sequence_length"]["min"] == 3  # type: ignore
-
-        assert stat["time_to_first_token"]["max"] == pytest.approx(5)  # type: ignore
-        assert stat["inter_token_latency"]["max"] == pytest.approx(4)  # type: ignore
-        max_ottpr = 6 / ns_to_sec(13)
-        assert stat["output_token_throughput_per_request"]["max"] == pytest.approx(max_ottpr)  # type: ignore
-        assert stat["output_sequence_length"]["max"] == 6  # type: ignore
-        assert stat["input_sequence_length"]["max"] == 4  # type: ignore
-
-        assert stat["time_to_first_token"]["std"] == np.std([4, 5]) * (1)  # type: ignore
-        assert stat["inter_token_latency"]["std"] == np.std([4, 2]) * (1)  # type: ignore
-        assert stat["output_token_throughput_per_request"]["std"] == pytest.approx(  # type: ignore
-            np.std(ottpr)
-        )
-        assert stat["output_sequence_length"]["std"] == np.std([3, 6])  # type: ignore
-        assert stat["input_sequence_length"]["std"] == np.std([3, 4])  # type: ignore
-
-        oott = 9 / ns_to_sec(14)
-        assert stat["output_token_throughput"]["avg"] == pytest.approx(oott)  # type: ignore
-
-        # check non-existing profile data
-        with pytest.raises(KeyError):
-            pd.get_statistics(infer_mode="concurrency", load_level="40")
-
-    def test_openai_vlm_profile_data(self, mock_read_write: pytest.MonkeyPatch) -> None:
-        """Collect LLM metrics from profile export data and check values.
-
-        Metrics
-        * time to first tokens
-            - experiment 1: [5 - 1, 7 - 2] = [4, 5]
-        * inter token latencies
-            - experiment 1: [((12 - 1) - 4)/(3 - 1), ((15 - 2) - 5)/(6 - 1)]
-                          : [3.5, 1.6]
-                          : [4, 2]  # rounded
-        * output token throughputs per request
-            - experiment 1: [3/(12 - 1), 6/(15 - 2)] = [3/11, 6/13]
-        * output token throughputs
-            - experiment 1: [(3 + 6)/(15 - 1)] = [9/14]
-        * output sequence lengths
-            - experiment 1: [3, 6]
-        * input sequence lengths
-            - experiment 1: [3, 4]
-        """
-        tokenizer = get_tokenizer(DEFAULT_TOKENIZER)
-        pd = LLMProfileDataParser(
-            filename=Path("openai_vlm_profile_export.json"),
-            tokenizer=tokenizer,
-        )
-
-        # experiment 1 statistics
-        stat_obj = pd.get_statistics(infer_mode="concurrency", load_level="10")
-        metrics = stat_obj.metrics
-        stat = stat_obj.stats_dict
-        assert isinstance(metrics, LLMMetrics)
-
-        assert metrics.time_to_first_tokens == [4, 5]
-        assert metrics.inter_token_latencies == [4, 2]
-        ottpr = [3 / ns_to_sec(11), 6 / ns_to_sec(13)]
-        assert metrics.output_token_throughputs_per_request == pytest.approx(ottpr)
-        ott = [9 / ns_to_sec(14)]
-        assert metrics.output_token_throughputs == pytest.approx(ott)
-        assert metrics.output_sequence_lengths == [3, 6]
-        assert metrics.input_sequence_lengths == [3, 4]
-
-        assert stat["time_to_first_token"]["avg"] == pytest.approx(4.5)  # type: ignore
-        assert stat["inter_token_latency"]["avg"] == pytest.approx(3)  # type: ignore
-        assert stat["output_token_throughput_per_request"]["avg"] == pytest.approx(  # type: ignore
-            np.mean(ottpr)
-        )
-        assert stat["output_sequence_length"]["avg"] == 4.5  # type: ignore
-        assert stat["input_sequence_length"]["avg"] == 3.5  # type: ignore
-
-        assert stat["time_to_first_token"]["p50"] == pytest.approx(4.5)  # type: ignore
-        assert stat["inter_token_latency"]["p50"] == pytest.approx(3)  # type: ignore
-        assert stat["output_token_throughput_per_request"]["p50"] == pytest.approx(  # type: ignore
-            np.percentile(ottpr, 50)
-        )
-        assert stat["output_sequence_length"]["p50"] == 4.5  # type: ignore
-        assert stat["input_sequence_length"]["p50"] == 3.5  # type: ignore
-
-        assert stat["time_to_first_token"]["min"] == pytest.approx(4)  # type: ignore
-        assert stat["inter_token_latency"]["min"] == pytest.approx(2)  # type: ignore
-        min_ottpr = 3 / ns_to_sec(11)
-        assert stat["output_token_throughput_per_request"]["min"] == pytest.approx(min_ottpr)  # type: ignore
-        assert stat["output_sequence_length"]["min"] == 3  # type: ignore
-        assert stat["input_sequence_length"]["min"] == 3  # type: ignore
-
-        assert stat["time_to_first_token"]["max"] == pytest.approx(5)  # type: ignore
-        assert stat["inter_token_latency"]["max"] == pytest.approx(4)  # type: ignore
-        max_ottpr = 6 / ns_to_sec(13)
-        assert stat["output_token_throughput_per_request"]["max"] == pytest.approx(max_ottpr)  # type: ignore
-        assert stat["output_sequence_length"]["max"] == 6  # type: ignore
-        assert stat["input_sequence_length"]["max"] == 4  # type: ignore
-
-        assert stat["time_to_first_token"]["std"] == np.std([4, 5]) * (1)  # type: ignore
-        assert stat["inter_token_latency"]["std"] == np.std([4, 2]) * (1)  # type: ignore
-        assert stat["output_token_throughput_per_request"]["std"] == pytest.approx(  # type: ignore
-            np.std(ottpr)
-        )
-        assert stat["output_sequence_length"]["std"] == np.std([3, 6])  # type: ignore
-        assert stat["input_sequence_length"]["std"] == np.std([3, 4])  # type: ignore
-
-        oott = 9 / ns_to_sec(14)
-        assert stat["output_token_throughput"]["avg"] == pytest.approx(oott)  # type: ignore
-
-        # check non-existing profile data
-        with pytest.raises(KeyError):
-            pd.get_statistics(infer_mode="concurrency", load_level="40")
-
-    def test_merged_sse_response(self, mock_read_write: pytest.MonkeyPatch) -> None:
-        """Test merging the multiple sse response."""
-        res_timestamps = [0, 1, 2, 3]
-        res_outputs = [
-            {
-                "response": 'data: {"choices":[{"delta":{"content":"aaa"}}],"object":"chat.completion.chunk"}\n\n'
-            },
-            {
-                "response": (
-                    'data: {"choices":[{"delta":{"content":"abc"}}],"object":"chat.completion.chunk"}\n\n'
-                    'data: {"choices":[{"delta":{"content":"1234"}}],"object":"chat.completion.chunk"}\n\n'
-                    'data: {"choices":[{"delta":{"content":"helloworld"}}],"object":"chat.completion.chunk"}\n\n'
-                )
-            },
-            {"response": "data: [DONE]\n\n"},
-        ]
-        expected_response = '{"choices": [{"delta": {"content": "abc1234helloworld"}}], "object": "chat.completion.chunk"}'
-
-        tokenizer = get_tokenizer(DEFAULT_TOKENIZER)
-        pd = LLMProfileDataParser(
-            filename=Path("openai_profile_export.json"),
-            tokenizer=tokenizer,
-        )
-
-        pd._preprocess_response(res_timestamps, res_outputs)
-        assert res_outputs[1]["response"] == expected_response
-
-    def test_openai_output_token_counts(
-        self, mock_read_write: pytest.MonkeyPatch
-    ) -> None:
-        output_texts = [
-            "Ad",
-            "idas",
-            " Orig",
-            "inals",
-            " are",
-            " now",
-            " available",
-            " in",
-            " more",
-            " than",
-        ]
-        res_outputs = []
-        for text in output_texts:
-            response = f'data: {{"choices":[{{"delta":{{"content":"{text}"}}}}],"object":"chat.completion.chunk"}}\n\n'
-            res_outputs.append({"response": response})
-
-        tokenizer = get_tokenizer(DEFAULT_TOKENIZER)
-        pd = LLMProfileDataParser(
-            filename=Path("openai_profile_export.json"),
-            tokenizer=tokenizer,
-        )
-
-        output_token_counts, total_output_token = pd._get_output_token_counts(
-            res_outputs
-        )
-        assert output_token_counts == [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]  # total 10
-        assert total_output_token == 9
-        assert total_output_token != sum(output_token_counts)
-
-    def test_triton_output_token_counts(
-        self, mock_read_write: pytest.MonkeyPatch
-    ) -> None:
-        output_texts = [
-            "Ad",
-            "idas",
-            " Orig",
-            "inals",
-            " are",
-            " now",
-            " available",
-            " in",
-            " more",
-            " than",
-        ]
-        res_outputs = []
-        for text in output_texts:
-            res_outputs.append({"text_output": text})
-
-        tokenizer = get_tokenizer(DEFAULT_TOKENIZER)
-        pd = LLMProfileDataParser(
-            filename=Path("triton_profile_export.json"),
-            tokenizer=tokenizer,
-        )
-
-        output_token_counts, total_output_token = pd._get_output_token_counts(
-            res_outputs
-        )
-        assert output_token_counts == [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]  # total 10
-        assert total_output_token == 9
-        assert total_output_token != sum(output_token_counts)
-
-    def test_empty_response(self, mock_read_write: pytest.MonkeyPatch) -> None:
-        """Check if it handles all empty responses."""
-        tokenizer = get_tokenizer(DEFAULT_TOKENIZER)
-
-        # Should not throw error
-        _ = LLMProfileDataParser(
-            filename=Path("empty_profile_export.json"),
-            tokenizer=tokenizer,
-        )
-
-    empty_profile_data = {
-        "service_kind": "openai",
-        "endpoint": "v1/chat/completions",
-        "experiments": [
-            {
-                "experiment": {
-                    "mode": "concurrency",
-                    "value": 10,
-                },
-                "requests": [
-                    {
-                        "timestamp": 1,
-                        "request_inputs": {
-                            "payload": '{"messages":[{"role":"user","content":"This is test"}],"model":"llama-2-7b","stream":true}',
-                        },
-                        "response_timestamps": [3, 5, 8],
-                        "response_outputs": [
-                            {
-                                "response": 'data: {"id":"abc","object":"chat.completion.chunk","created":123,"model":"llama-2-7b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n'
-                            },
-                            {
-                                "response": 'data: {"id":"abc","object":"chat.completion.chunk","created":123,"model":"llama-2-7b","choices":[{"index":0,"delta":{"content":""},"finish_reason":null}]}\n\n'
-                            },
-                            {"response": "data: [DONE]\n\n"},
-                        ],
-                    },
-                ],
-            },
-        ],
-    }
-
-    openai_profile_data = {
-        "service_kind": "openai",
-        "endpoint": "v1/chat/completions",
-        "experiments": [
-            {
-                "experiment": {
-                    "mode": "concurrency",
-                    "value": 10,
-                },
-                "requests": [
-                    {
-                        "timestamp": 1,
-                        "request_inputs": {
-                            "payload": '{"messages":[{"role":"user","content":"This is test"}],"model":"llama-2-7b","stream":true}',
-                        },
-                        # the first, and the last two responses will be ignored because they have no "content"
-                        "response_timestamps": [3, 5, 8, 12, 13, 14],
-                        "response_outputs": [
-                            {
-                                "response": 'data: {"id":"abc","object":"chat.completion.chunk","created":123,"model":"llama-2-7b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n'
-                            },
-                            {
-                                "response": 'data: {"id":"abc","object":"chat.completion.chunk","created":123,"model":"llama-2-7b","choices":[{"index":0,"delta":{"content":"I"},"finish_reason":null}]}\n\n'
-                            },
-                            {
-                                "response": 'data: {"id":"abc","object":"chat.completion.chunk","created":123,"model":"llama-2-7b","choices":[{"index":0,"delta":{"content":" like"},"finish_reason":null}]}\n\n'
-                            },
-                            {
-                                "response": 'data: {"id":"abc","object":"chat.completion.chunk","created":123,"model":"llama-2-7b","choices":[{"index":0,"delta":{"content":" dogs"},"finish_reason":null}]}\n\n'
-                            },
-                            {
-                                "response": 'data: {"id":"abc","object":"chat.completion.chunk","created":123,"model":"llama-2-7b","choices":[{"index":0,"delta":{},"finish_reason":null}]}\n\n'
-                            },
-                            {"response": "data: [DONE]\n\n"},
-                        ],
-                    },
-                    {
-                        "timestamp": 2,
-                        "request_inputs": {
-                            "payload": '{"messages":[{"role":"user","content":"This is test too"}],"model":"llama-2-7b","stream":true}',
-                        },
-                        # the first, and the last two responses will be ignored because they have no "content"
-                        "response_timestamps": [4, 7, 11, 15, 18, 19],
-                        "response_outputs": [
-                            {
-                                "response": 'data: {"id":"abc","object":"chat.completion.chunk","created":123,"model":"llama-2-7b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n'
-                            },
-                            {
-                                "response": 'data: {"id":"abc","object":"chat.completion.chunk","created":123,"model":"llama-2-7b","choices":[{"index":0,"delta":{"content":"I"},"finish_reason":null}]}\n\n'
-                            },
-                            {
-                                "response": 'data: {"id":"abc","object":"chat.completion.chunk","created":123,"model":"llama-2-7b","choices":[{"index":0,"delta":{"content":"don\'t"},"finish_reason":null}]}\n\n'
-                            },
-                            {
-                                "response": 'data: {"id":"abc","object":"chat.completion.chunk","created":123,"model":"llama-2-7b","choices":[{"index":0,"delta":{"content":"cook food"},"finish_reason":null}]}\n\n'
-                            },
-                            {
-                                "response": 'data: {"id":"abc","object":"chat.completion.chunk","created":123,"model":"llama-2-7b","choices":[{"index":0,"delta":{},"finish_reason":null}]}\n\n'
-                            },
-                            {"response": "data: [DONE]\n\n"},
-                        ],
-                    },
-                ],
-            },
-        ],
-    }
-
-    openai_vlm_profile_data = {
-        "service_kind": "openai",
-        "endpoint": "v1/chat/completions",
-        "experiments": [
-            {
-                "experiment": {
-                    "mode": "concurrency",
-                    "value": 10,
-                },
-                "requests": [
-                    {
-                        "timestamp": 1,
-                        "request_inputs": {
-                            "payload": '{"messages":[{"role":"user","content":[{"type":"text","text":"This is test"},{"type":"image_url","image_url":{"url":"data:image/png;base64,abcdef"}}]}],"model":"llava-1.6","stream":true}',
-                        },
-                        # the first, and the last two responses will be ignored because they have no "content"
-                        "response_timestamps": [3, 5, 8, 12, 13, 14],
-                        "response_outputs": [
-                            {
-                                "response": 'data: {"id":"abc","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n'
-                            },
-                            {
-                                "response": 'data: {"id":"abc","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"I"},"finish_reason":null}]}\n\n'
-                            },
-                            {
-                                "response": 'data: {"id":"abc","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" like"},"finish_reason":null}]}\n\n'
-                            },
-                            {
-                                "response": 'data: {"id":"abc","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" dogs"},"finish_reason":null}]}\n\n'
-                            },
-                            {
-                                "response": 'data: {"id":"abc","object":"chat.completion.chunk","choices":[{"index":0,"delta":{},"finish_reason":null}]}\n\n'
-                            },
-                            {"response": "data: [DONE]\n\n"},
-                        ],
-                    },
-                    {
-                        "timestamp": 2,
-                        "request_inputs": {
-                            "payload": '{"messages":[{"role":"user","content":[{"type":"text","text":"This is test too"},{"type":"image_url","image_url":{"url":"data:image/png;base64,abcdef"}}]}],"model":"llava-1.6","stream":true}',
-                        },
-                        # the first, and the last two responses will be ignored because they have no "content"
-                        "response_timestamps": [4, 7, 11, 15, 18, 19],
-                        "response_outputs": [
-                            {
-                                "response": 'data: {"id":"abc","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n'
-                            },
-                            {
-                                "response": 'data: {"id":"abc","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"I"},"finish_reason":null}]}\n\n'
-                            },
-                            {
-                                "response": 'data: {"id":"abc","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"don\'t"},"finish_reason":null}]}\n\n'
-                            },
-                            {
-                                "response": 'data: {"id":"abc","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"cook food"},"finish_reason":null}]}\n\n'
-                            },
-                            {
-                                "response": 'data: {"id":"abc","object":"chat.completion.chunk","choices":[{"index":0,"delta":{},"finish_reason":null}]}\n\n'
-                            },
-                            {"response": "data: [DONE]\n\n"},
-                        ],
-                    },
-                ],
-            },
-        ],
-    }
-
-    triton_profile_data = {
-        "service_kind": "triton",
-        "endpoint": "",
-        "experiments": [
-            {
-                "experiment": {
-                    "mode": "concurrency",
-                    "value": 10,
-                },
-                "requests": [
-                    {
-                        "timestamp": 1,
-                        "request_inputs": {"text_input": "This is test"},
-                        "response_timestamps": [3, 5, 8],
-                        "response_outputs": [
-                            {"text_output": "I"},
-                            {"text_output": " like"},
-                            {"text_output": " dogs"},
-                        ],
-                    },
-                    {
-                        "timestamp": 2,
-                        "request_inputs": {"text_input": "This is test too"},
-                        "response_timestamps": [4, 7, 11],
-                        "response_outputs": [
-                            {"text_output": "I"},
-                            {"text_output": " don't"},
-                            {"text_output": " cook food"},
-                        ],
-                    },
-                ],
-            },
-            {
-                "experiment": {
-                    "mode": "request_rate",
-                    "value": 2.0,
-                },
-                "requests": [
-                    {
-                        "timestamp": 5,
-                        "request_inputs": {"text_input": "This is test"},
-                        "response_timestamps": [7, 8, 13, 18],
-                        "response_outputs": [
-                            {"text_output": "cat"},
-                            {"text_output": " is"},
-                            {"text_output": " cool"},
-                            {"text_output": " too"},
-                        ],
-                    },
-                    {
-                        "timestamp": 3,
-                        "request_inputs": {"text_input": "This is test too"},
-                        "response_timestamps": [6, 8, 11],
-                        "response_outputs": [
-                            {"text_output": "it's"},
-                            {"text_output": " very"},
-                            {"text_output": " simple work"},
-                        ],
-                    },
-                ],
-            },
-        ],
-    }
diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_metrics.py b/src/c++/perf_analyzer/genai-perf/tests/test_metrics.py
deleted file mode 100644
index 2af489fc4..000000000
--- a/src/c++/perf_analyzer/genai-perf/tests/test_metrics.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import pytest
-from genai_perf.metrics import Metrics
-
-
-class TestMetrics:
-
-    def test_metric_request_metrics(self) -> None:
-        """Test request_metrics property."""
-        m = Metrics(
-            request_throughputs=[10.12, 11.33],
-            request_latencies=[3, 44],
-        )
-        req_metrics = m.request_metrics
-        assert len(req_metrics) == 1
-        assert req_metrics[0].name == "request_latency"
-        assert req_metrics[0].unit == "ms"
-
-    def test_metric_system_metrics(self) -> None:
-        """Test system_metrics property."""
-        m = Metrics(
-            request_throughputs=[10.12, 11.33],
-            request_latencies=[3, 44],
-        )
-        sys_metrics = m.system_metrics
-        assert len(sys_metrics) == 1
-        assert sys_metrics[0].name == "request_throughput"
-        assert sys_metrics[0].unit == "per sec"
-
-    def test_metrics_get_base_name(self) -> None:
-        """Test get_base_name method in Metrics class."""
-        metrics = Metrics(
-            request_throughputs=[10.12, 11.33],
-            request_latencies=[3, 44],
-        )
-        assert metrics.get_base_name("request_throughputs") == "request_throughput"
-        assert metrics.get_base_name("request_latencies") == "request_latency"
-        with pytest.raises(KeyError):
-            metrics.get_base_name("hello1234")
diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_plot_configs.py b/src/c++/perf_analyzer/genai-perf/tests/test_plot_configs.py
deleted file mode 100644
index 8a1dfee7a..000000000
--- a/src/c++/perf_analyzer/genai-perf/tests/test_plot_configs.py
+++ /dev/null
@@ -1,112 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from pathlib import Path
-
-# Skip type checking to avoid mypy error
-# Issue: https://github.com/python/mypy/issues/10632
-import yaml  # type: ignore
-from genai_perf.plots.plot_config import PlotType
-from genai_perf.plots.plot_config_parser import PlotConfigParser
-
-
-class TestPlotConfigParser:
-    yaml_config = """
-    plot1:
-      title: TTFT vs ITL
-      x_metric: time_to_first_tokens
-      y_metric: inter_token_latencies
-      x_label: TTFT (ms)
-      y_label: ITL (ms)
-      width: 1000
-      height: 3000
-      type: box
-      paths:
-        - run1/concurrency32.json
-        - run2/concurrency32.json
-        - run3/concurrency32.json
-      output: test_output_1
-
-    plot2:
-      title: Input Sequence Length vs Output Sequence Length
-      x_metric: input_sequence_lengths
-      y_metric: output_sequence_lengths
-      x_label: Input Sequence Length
-      y_label: Output Sequence Length
-      width: 1234
-      height: 5678
-      type: scatter
-      paths:
-        - run4/concurrency1.json
-      output: test_output_2
-    """
-
-    def test_generate_configs(self, monkeypatch) -> None:
-        monkeypatch.setattr(
-            "genai_perf.plots.plot_config_parser.load_yaml",
-            lambda _: yaml.safe_load(self.yaml_config),
-        )
-        monkeypatch.setattr(PlotConfigParser, "_get_statistics", lambda *_: {})
-        monkeypatch.setattr(PlotConfigParser, "_get_metric", lambda *_: [1, 2, 3])
-
-        config_parser = PlotConfigParser(Path("test_config.yaml"))
-        plot_configs = config_parser.generate_configs()
-
-        assert len(plot_configs) == 2
-        pc1, pc2 = plot_configs
-
-        # plot config 1
-        assert pc1.title == "TTFT vs ITL"
-        assert pc1.x_label == "TTFT (ms)"
-        assert pc1.y_label == "ITL (ms)"
-        assert pc1.width == 1000
-        assert pc1.height == 3000
-        assert pc1.type == PlotType.BOX
-        assert pc1.output == Path("test_output_1")
-
-        assert len(pc1.data) == 3  # profile run data
-        prd1, prd2, prd3 = pc1.data
-        assert prd1.name == "run1/concurrency32"
-        assert prd2.name == "run2/concurrency32"
-        assert prd3.name == "run3/concurrency32"
-        for prd in pc1.data:
-            assert prd.x_metric == [1, 2, 3]
-            assert prd.y_metric == [1, 2, 3]
-
-        # plot config 2
-        assert pc2.title == "Input Sequence Length vs Output Sequence Length"
-        assert pc2.x_label == "Input Sequence Length"
-        assert pc2.y_label == "Output Sequence Length"
-        assert pc2.width == 1234
-        assert pc2.height == 5678
-        assert pc2.type == PlotType.SCATTER
-        assert pc2.output == Path("test_output_2")
-
-        assert len(pc2.data) == 1  # profile run data
-        prd = pc2.data[0]
-        assert prd.name == "run4/concurrency1"
-        assert prd.x_metric == [1, 2, 3]
-        assert prd.y_metric == [1, 2, 3]
diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_profile_data_parser.py b/src/c++/perf_analyzer/genai-perf/tests/test_profile_data_parser.py
deleted file mode 100644
index fe303c514..000000000
--- a/src/c++/perf_analyzer/genai-perf/tests/test_profile_data_parser.py
+++ /dev/null
@@ -1,297 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import json
-from io import StringIO
-from pathlib import Path
-from typing import Any, List, Union
-
-import numpy as np
-import pytest
-from genai_perf.metrics import Metrics
-from genai_perf.profile_data_parser import ProfileDataParser
-
-
-def ns_to_sec(ns: int) -> Union[int, float]:
-    """Convert from nanosecond to second."""
-    return ns / 1e9
-
-
-class TestProfileDataParser:
-    @pytest.fixture
-    def mock_read_write(self, monkeypatch: pytest.MonkeyPatch) -> List[str]:
-        """
-        This function will mock the open function for specific files:
-
-        - For "triton_profile_export.json", it will read and return the
-          contents of self.triton_profile_data
-        - For "openai_profile_export.json", it will read and return the
-          contents of self.openai_profile_data
-        - For "profile_export.csv", it will capture all data written to
-          the file, and return it as the return value of this function
-        - For all other files, it will behave like the normal open function
-        """
-
-        written_data = []
-
-        original_open = open
-
-        def custom_open(filename, *args, **kwargs):
-            def write(self: Any, content: str) -> int:
-                written_data.append(content)
-                return len(content)
-
-            if filename == "embedding_profile_export.json":
-                tmp_file = StringIO(json.dumps(self.embedding_profile_data))
-                return tmp_file
-            elif filename == "ranking_profile_export.json":
-                tmp_file = StringIO(json.dumps(self.ranking_profile_data))
-                return tmp_file
-            elif filename == "huggingface_ranking_profile_export.json":
-                tmp_file = StringIO(json.dumps(self.huggingface_ranking_profile_data))
-                return tmp_file
-            elif filename == "profile_export.csv":
-                tmp_file = StringIO()
-                tmp_file.write = write.__get__(tmp_file)
-                return tmp_file
-            else:
-                return original_open(filename, *args, **kwargs)
-
-        monkeypatch.setattr("builtins.open", custom_open)
-
-        return written_data
-
-    # ================================================
-    # EMBEDDINGS API
-    # ================================================
-    embedding_profile_data = {
-        "service_kind": "openai",
-        "endpoint": "v1/embeddings",
-        "experiments": [
-            {
-                "experiment": {
-                    "mode": "concurrency",
-                    "value": 10,
-                },
-                "requests": [
-                    {
-                        "timestamp": 1,
-                        "request_inputs": {
-                            "payload": '{"input":"This is test","model":"NV-Embed-QA","input_type":"passage","encoding_format":"float","truncate":"NONE"}',
-                        },
-                        "response_timestamps": [3],
-                        "response_outputs": [
-                            {
-                                "response": '{"object":"list","data":[{"index":0,"embedding":[1, 2, 3],"object":"embedding"}],"model":"NV-Embed-QA","usage":{"prompt_tokens":7,"total_tokens":7}}'
-                            },
-                        ],
-                    },
-                    {
-                        "timestamp": 2,
-                        "request_inputs": {
-                            "payload": '{"input":"This is test too","model":"NV-Embed-QA","input_type":"passage","encoding_format":"float","truncate":"NONE"}',
-                        },
-                        "response_timestamps": [5],
-                        "response_outputs": [
-                            {
-                                "response": '{"object":"list","data":[{"index":0,"embedding":[1, 2, 3, 4],"object":"embedding"}],"model":"NV-Embed-QA","usage":{"prompt_tokens":8,"total_tokens":8}}'
-                            },
-                        ],
-                    },
-                ],
-            },
-        ],
-    }
-
-    def test_embedding_profile_data(self, mock_read_write: pytest.MonkeyPatch) -> None:
-        """Collect base metrics from profile export data and check values.
-
-        Metrics
-        * request latencies
-            - [3 - 1, 5 - 2] = [2, 3]
-        * request throughputs
-            - [2 / (5e-9 - 1e-9)] = [5e8]
-        """
-        pd = ProfileDataParser(filename=Path("embedding_profile_export.json"))
-
-        # experiment 1 statistics
-        stats = pd.get_statistics(infer_mode="concurrency", load_level="10")
-        metrics = stats.metrics
-        stats_dict = stats.stats_dict
-        assert isinstance(metrics, Metrics)
-
-        assert metrics.request_latencies == [2, 3]
-        assert metrics.request_throughputs == [pytest.approx(5e8)]
-
-        assert stats_dict["request_latency"]["avg"] == pytest.approx(2.5)  # type: ignore
-        assert stats_dict["request_latency"]["p50"] == pytest.approx(2.5)  # type: ignore
-        assert stats_dict["request_latency"]["min"] == pytest.approx(2)  # type: ignore
-        assert stats_dict["request_latency"]["max"] == pytest.approx(3)  # type: ignore
-        assert stats_dict["request_latency"]["std"] == np.std([2, 3])  # type: ignore
-
-        assert stats_dict["request_throughput"]["avg"] == pytest.approx(5e8)  # type: ignore
-
-    # ================================================
-    # RANKINGS API
-    # ================================================
-    ranking_profile_data = {
-        "service_kind": "openai",
-        "endpoint": "v1/ranking",
-        "experiments": [
-            {
-                "experiment": {
-                    "mode": "concurrency",
-                    "value": 10,
-                },
-                "requests": [
-                    {
-                        "timestamp": 1,
-                        "request_inputs": {
-                            "payload": '{"query":{"text":"This is a test."},"passages":[{"text":"test output one"},{"text":"test output two"},{"text":"test output three"}],"model":"nv-rerank-qa-mistral-4b:1","truncate":"END"}',
-                        },
-                        "response_timestamps": [3],
-                        "response_outputs": [
-                            {
-                                "response": '{"rankings":[{"index":0,"logit":-5.98828125},{"index":1,"logit":-6.828125},{"index":2,"logit":-7.60546875}]}'
-                            },
-                        ],
-                    },
-                    {
-                        "timestamp": 2,
-                        "request_inputs": {
-                            "payload": '{"query":{"text":"This is a test."},"passages":[{"text":"test output one"},{"text":"test output two"},{"text":"test output three"}],"model":"nv-rerank-qa-mistral-4b:1","truncate":"END"}',
-                        },
-                        "response_timestamps": [5],
-                        "response_outputs": [
-                            {
-                                "response": '{"rankings":[{"index":2,"logit":-6.15625},{"index":1,"logit":-7.83984375},{"index":0,"logit":-7.84765625}]}'
-                            },
-                        ],
-                    },
-                ],
-            },
-        ],
-    }
-
-    def test_ranking_profile_data(self, mock_read_write: pytest.MonkeyPatch) -> None:
-        """Collect base metrics from profile export data and check values.
-
-        Metrics
-        * request latencies
-            - [3 - 1, 5 - 2] = [2, 3]
-        * request throughputs
-            - [2 / (5e-9 - 1e-9)] = [5e8]
-        """
-        pd = ProfileDataParser(filename=Path("ranking_profile_export.json"))
-
-        # experiment 1 statistics
-        stats = pd.get_statistics(infer_mode="concurrency", load_level="10")
-        metrics = stats.metrics
-        stats_dict = stats.stats_dict
-        assert isinstance(metrics, Metrics)
-
-        assert metrics.request_latencies == [2, 3]
-        assert metrics.request_throughputs == [pytest.approx(5e8)]
-
-        assert stats_dict["request_latency"]["avg"] == pytest.approx(2.5)  # type: ignore
-        assert stats_dict["request_latency"]["p50"] == pytest.approx(2.5)  # type: ignore
-        assert stats_dict["request_latency"]["min"] == pytest.approx(2)  # type: ignore
-        assert stats_dict["request_latency"]["max"] == pytest.approx(3)  # type: ignore
-        assert stats_dict["request_latency"]["std"] == np.std([2, 3])  # type: ignore
-
-        assert stats_dict["request_throughput"]["avg"] == pytest.approx(5e8)  # type: ignore
-
-    # ================================================
-    # HUGGINGFACE RANKINGS API
-    # ================================================
-    huggingface_ranking_profile_data = {
-        "service_kind": "openai",
-        "endpoint": "rerank",
-        "experiments": [
-            {
-                "experiment": {
-                    "mode": "concurrency",
-                    "value": 10,
-                },
-                "requests": [
-                    {
-                        "timestamp": 1,
-                        "request_inputs": {
-                            "payload": '{"query":"What was the first car ever driven?","texts":["Daddys Home 2 Principal photography on the film began in Massachusetts in March 2017 and it was released in the United States by Paramount Pictures on November 10, 2017. Although the film received unfavorable reviews, it has grossed over $180 million worldwide on a $69 million budget.","Kevin Loader is a British film and television producer."]}'
-                        },
-                        "response_timestamps": [3],
-                        "response_outputs": [
-                            {
-                                "response": '[{"index":0,"score":0.0032476764},{"index":1,"score":0.00036117696}]'
-                            },
-                        ],
-                    },
-                    {
-                        "timestamp": 2,
-                        "request_inputs": {
-                            "payload": '{"query":"In what state did they film Shrek 2?","texts":["Francisco Antonio Zea Juan Francisco Antonio Hilari was a Colombian journalist, botanist, diplomat, politician, and statesman who served as the 1st Vice President of Colombia.","Daddys Home 2 Principal photography on the film began in Massachusetts in March 2017 and it was released in the United States by Paramount Pictures on November 10, 2017. Although the film received unfavorable reviews, it has grossed over $180 million worldwide on a $69 million budget."]}'
-                        },
-                        "response_timestamps": [5],
-                        "response_outputs": [
-                            {
-                                "response": '[{"index":0,"score":0.020177318},{"index":1,"score":0.01461567}]'
-                            },
-                        ],
-                    },
-                ],
-            },
-        ],
-    }
-
-    def test_huggingface_ranking_profile_data(
-        self, mock_read_write: pytest.MonkeyPatch
-    ) -> None:
-        """Collect base metrics from HuggingFace ranking profile export data and check values.
-
-        Metrics
-        * request latencies
-            - [3 - 1, 5 - 2] = [2, 3]
-        * request throughputs
-            - [2 / (5e-9 - 1e-9)] = [5e8]
-        """
-        pd = ProfileDataParser(filename=Path("huggingface_ranking_profile_export.json"))
-
-        # experiment 1 statistics
-        stats = pd.get_statistics(infer_mode="concurrency", load_level="10")
-        metrics = stats.metrics
-        stats_dict = stats.stats_dict
-        assert isinstance(metrics, Metrics)
-
-        assert metrics.request_latencies == [2, 3]
-        assert metrics.request_throughputs == [pytest.approx(5e8)]
-
-        assert stats_dict["request_latency"]["avg"] == pytest.approx(2.5)  # type: ignore
-        assert stats_dict["request_latency"]["p50"] == pytest.approx(2.5)  # type: ignore
-        assert stats_dict["request_latency"]["min"] == pytest.approx(2)  # type: ignore
-        assert stats_dict["request_latency"]["max"] == pytest.approx(3)  # type: ignore
-        assert stats_dict["request_latency"]["std"] == np.std([2, 3])  # type: ignore
-
-        assert stats_dict["request_throughput"]["avg"] == pytest.approx(5e8)  # type: ignore
diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_synthetic_image_generator.py b/src/c++/perf_analyzer/genai-perf/tests/test_synthetic_image_generator.py
deleted file mode 100644
index 5a79794bb..000000000
--- a/src/c++/perf_analyzer/genai-perf/tests/test_synthetic_image_generator.py
+++ /dev/null
@@ -1,123 +0,0 @@
-import base64
-import random
-from io import BytesIO
-
-import pytest
-from genai_perf.llm_inputs.synthetic_image_generator import (
-    ImageFormat,
-    SyntheticImageGenerator,
-)
-from PIL import Image
-
-
-def decode_image(base64_string):
-    _, data = base64_string.split(",")
-    decoded_data = base64.b64decode(data)
-    return Image.open(BytesIO(decoded_data))
-
-
-@pytest.mark.parametrize(
-    "expected_image_size",
-    [
-        (100, 100),
-        (200, 200),
-    ],
-)
-def test_different_image_size(expected_image_size):
-    expected_width, expected_height = expected_image_size
-    base64_string = SyntheticImageGenerator.create_synthetic_image(
-        image_width_mean=expected_width,
-        image_width_stddev=0,
-        image_height_mean=expected_height,
-        image_height_stddev=0,
-        image_format=ImageFormat.PNG,
-    )
-
-    image = decode_image(base64_string)
-    assert image.size == expected_image_size, "image not resized to the target size"
-
-
-def test_negative_size_is_not_selected():
-    # exception is raised, when PIL.Image.resize is called with negative values
-    _ = SyntheticImageGenerator.create_synthetic_image(
-        image_width_mean=-1,
-        image_width_stddev=10,
-        image_height_mean=-1,
-        image_height_stddev=10,
-        image_format=ImageFormat.PNG,
-    )
-
-
-@pytest.mark.parametrize(
-    "width_mean, width_stddev, height_mean, height_stddev",
-    [
-        (100, 15, 100, 15),
-        (123, 10, 456, 7),
-    ],
-)
-def test_generator_deterministic(width_mean, width_stddev, height_mean, height_stddev):
-    random.seed(123)
-    img1 = SyntheticImageGenerator.create_synthetic_image(
-        image_width_mean=width_mean,
-        image_width_stddev=width_stddev,
-        image_height_mean=height_mean,
-        image_height_stddev=height_stddev,
-        image_format=ImageFormat.PNG,
-    )
-
-    random.seed(123)
-    img2 = SyntheticImageGenerator.create_synthetic_image(
-        image_width_mean=width_mean,
-        image_width_stddev=width_stddev,
-        image_height_mean=height_mean,
-        image_height_stddev=height_stddev,
-        image_format=ImageFormat.PNG,
-    )
-
-    assert img1 == img2, "generator is nondererministic"
-
-
-@pytest.mark.parametrize("image_format", [ImageFormat.PNG, ImageFormat.JPEG])
-def test_base64_encoding_with_different_formats(image_format):
-    img_base64 = SyntheticImageGenerator.create_synthetic_image(
-        image_width_mean=100,
-        image_width_stddev=100,
-        image_height_mean=100,
-        image_height_stddev=100,
-        image_format=image_format,
-    )
-
-    # check prefix
-    expected_prefix = f"data:image/{image_format.name.lower()};base64,"
-    assert img_base64.startswith(expected_prefix), "unexpected prefix"
-
-    # check image format
-    data = img_base64[len(expected_prefix) :]
-    img_data = base64.b64decode(data)
-    img_bytes = BytesIO(img_data)
-    image = Image.open(img_bytes)
-    assert image.format == image_format.name
-
-
-def test_random_image_format():
-    random.seed(123)
-    img1 = SyntheticImageGenerator.create_synthetic_image(
-        image_width_mean=100,
-        image_width_stddev=100,
-        image_height_mean=100,
-        image_height_stddev=100,
-        image_format=None,
-    )
-
-    random.seed(456)
-    img2 = SyntheticImageGenerator.create_synthetic_image(
-        image_width_mean=100,
-        image_width_stddev=100,
-        image_height_mean=100,
-        image_height_stddev=100,
-        image_format=None,
-    )
-
-    # check prefix
-    assert img1.startswith("data:image/png")
-    assert img2.startswith("data:image/jpeg")
diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_tokenizer.py b/src/c++/perf_analyzer/genai-perf/tests/test_tokenizer.py
deleted file mode 100644
index 259389dcf..000000000
--- a/src/c++/perf_analyzer/genai-perf/tests/test_tokenizer.py
+++ /dev/null
@@ -1,76 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import pytest
-from genai_perf.exceptions import GenAIPerfException
-from genai_perf.tokenizer import DEFAULT_TOKENIZER, get_tokenizer
-
-
-class TestTokenizer:
-    def test_default_tokenizer(self):
-        tokenizer_model = DEFAULT_TOKENIZER
-        get_tokenizer(tokenizer_model)
-
-    def test_non_default_tokenizer(self):
-        tokenizer_model = "gpt2"
-        get_tokenizer(tokenizer_model)
-
-    def test_bad_tokenizer(self):
-        with pytest.raises(GenAIPerfException):
-            get_tokenizer("bad_tokenizer")
-
-    def test_default_args(self):
-        tokenizer_model = DEFAULT_TOKENIZER
-        tokenizer = get_tokenizer(tokenizer_model)
-
-        # There are 3 special tokens in the default tokenizer
-        #  - <unk>: 0  (unknown)
-        #  - <s>: 1  (beginning of sentence)
-        #  - </s>: 2  (end of sentence)
-        special_tokens = list(tokenizer._tokenizer.added_tokens_encoder.keys())
-        special_token_ids = list(tokenizer._tokenizer.added_tokens_encoder.values())
-
-        # special tokens are disabled by default
-        text = "This is test."
-        tokens = tokenizer(text)["input_ids"]
-        assert all([s not in tokens for s in special_token_ids])
-
-        tokens = tokenizer.encode(text)
-        assert all([s not in tokens for s in special_token_ids])
-
-        output = tokenizer.decode(tokens)
-        assert all([s not in output for s in special_tokens])
-
-        # check special tokens is enabled
-        text = "This is test."
-        tokens = tokenizer(text, add_special_tokens=True)["input_ids"]
-        assert any([s in tokens for s in special_token_ids])
-
-        tokens = tokenizer.encode(text, add_special_tokens=True)
-        assert any([s in tokens for s in special_token_ids])
-
-        output = tokenizer.decode(tokens, skip_special_tokens=False)
-        assert any([s in output for s in special_tokens])
diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_wrapper.py b/src/c++/perf_analyzer/genai-perf/tests/test_wrapper.py
deleted file mode 100644
index fd4c34b51..000000000
--- a/src/c++/perf_analyzer/genai-perf/tests/test_wrapper.py
+++ /dev/null
@@ -1,175 +0,0 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import subprocess
-from unittest.mock import MagicMock, patch
-
-import pytest
-from genai_perf import parser
-from genai_perf.constants import DEFAULT_GRPC_URL
-from genai_perf.wrapper import Profiler
-
-
-class TestWrapper:
-    @pytest.mark.parametrize(
-        "arg",
-        [
-            ([]),
-            (["-u", "testurl:1000"]),
-            (["--url", "testurl:1000"]),
-        ],
-    )
-    def test_url_exactly_once_triton(self, monkeypatch, arg):
-        args = [
-            "genai-perf",
-            "profile",
-            "-m",
-            "test_model",
-            "--service-kind",
-            "triton",
-        ] + arg
-        monkeypatch.setattr("sys.argv", args)
-        args, extra_args = parser.parse_args()
-        cmd = Profiler.build_cmd(args, extra_args)
-        cmd_string = " ".join(cmd)
-
-        number_of_url_args = cmd_string.count(" -u ") + cmd_string.count(" --url ")
-        assert number_of_url_args == 1
-
-    @pytest.mark.parametrize(
-        "arg, expected_filepath",
-        [
-            (
-                [],
-                "artifacts/test_model-triton-tensorrtllm-concurrency1/profile_export.json",
-            ),
-            (
-                ["--artifact-dir", "test_dir"],
-                "test_dir/profile_export.json",
-            ),
-            (
-                ["--artifact-dir", "test_dir", "--profile-export-file", "test.json"],
-                "test_dir/test.json",
-            ),
-        ],
-    )
-    def test_profile_export_filepath(self, monkeypatch, arg, expected_filepath):
-        args = [
-            "genai-perf",
-            "profile",
-            "-m",
-            "test_model",
-            "--service-kind",
-            "triton",
-        ] + arg
-        monkeypatch.setattr("sys.argv", args)
-        args, extra_args = parser.parse_args()
-        cmd = Profiler.build_cmd(args, extra_args)
-        cmd_string = " ".join(cmd)
-
-        expected_pattern = f"--profile-export-file {expected_filepath}"
-        assert expected_pattern in cmd_string
-
-    @pytest.mark.parametrize(
-        "arg",
-        [
-            (["--backend", "tensorrtllm"]),
-            (["--backend", "vllm"]),
-        ],
-    )
-    def test_service_triton(self, monkeypatch, arg):
-        args = [
-            "genai-perf",
-            "profile",
-            "-m",
-            "test_model",
-            "--service-kind",
-            "triton",
-        ] + arg
-        monkeypatch.setattr("sys.argv", args)
-        args, extra_args = parser.parse_args()
-        cmd = Profiler.build_cmd(args, extra_args)
-        cmd_string = " ".join(cmd)
-
-        # Ensure the correct arguments are appended.
-        assert cmd_string.count(" -i grpc") == 1
-        assert cmd_string.count(" --streaming") == 1
-        assert cmd_string.count(f"-u {DEFAULT_GRPC_URL}") == 1
-        if arg[1] == "tensorrtllm":
-            assert cmd_string.count("--shape max_tokens:1") == 1
-            assert cmd_string.count("--shape text_input:1") == 1
-
-    @pytest.mark.parametrize(
-        "arg",
-        [
-            (["--endpoint-type", "completions"]),
-            (["--endpoint-type", "chat"]),
-        ],
-    )
-    def test_service_openai(self, monkeypatch, arg):
-        args = [
-            "genai-perf",
-            "profile",
-            "-m",
-            "test_model",
-            "--service-kind",
-            "openai",
-        ] + arg
-        monkeypatch.setattr("sys.argv", args)
-        args, extra_args = parser.parse_args()
-        cmd = Profiler.build_cmd(args, extra_args)
-        cmd_string = " ".join(cmd)
-
-        # Ensure the correct arguments are appended.
-        assert cmd_string.count(" -i http") == 1
-
-    @patch("genai_perf.wrapper.subprocess.run")
-    def test_stdout_verbose(self, mock_subprocess_run):
-        args = MagicMock()
-        args.model = "test_model"
-        args.verbose = True
-        Profiler.run(args=args, extra_args=None)
-
-        # Check that standard output was not redirected.
-        for call_args in mock_subprocess_run.call_args_list:
-            _, kwargs = call_args
-            assert (
-                "stdout" not in kwargs or kwargs["stdout"] is None
-            ), "With the verbose flag, stdout should not be redirected."
-
-    @patch("genai_perf.wrapper.subprocess.run")
-    def test_stdout_not_verbose(self, mock_subprocess_run):
-        args = MagicMock()
-        args.model = "test_model"
-        args.verbose = False
-        Profiler.run(args=args, extra_args=None)
-
-        # Check that standard output was redirected.
-        for call_args in mock_subprocess_run.call_args_list:
-            _, kwargs = call_args
-            assert (
-                kwargs["stdout"] is subprocess.DEVNULL
-            ), "When the verbose flag is not passed, stdout should be redirected to /dev/null."
diff --git a/src/c++/perf_analyzer/ictx_id_tracker.h b/src/c++/perf_analyzer/ictx_id_tracker.h
deleted file mode 100644
index 8d85067eb..000000000
--- a/src/c++/perf_analyzer/ictx_id_tracker.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-
-namespace triton { namespace perfanalyzer {
-
-/// Interface for object that tracks context IDs
-///
-class ICtxIdTracker {
- public:
-  // Reset the tracker using the provided input count
-  //
-  virtual void Reset(size_t count) = 0;
-
-  // Restore the given ID into the tracker
-  //
-  virtual void Restore(size_t id) = 0;
-
-  // Pick and return a Ctx ID
-  //
-  virtual size_t Get() = 0;
-
-  // Returns true if there are Ctx IDs available to Get.
-  virtual bool IsAvailable() = 0;
-};
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/idle_timer.h b/src/c++/perf_analyzer/idle_timer.h
deleted file mode 100644
index 419789ec9..000000000
--- a/src/c++/perf_analyzer/idle_timer.h
+++ /dev/null
@@ -1,115 +0,0 @@
-// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-#include <chrono>
-#include <mutex>
-#include <stdexcept>
-
-namespace triton { namespace perfanalyzer {
-
-#ifndef DOCTEST_CONFIG_DISABLE
-class TestLoadManager;
-#endif
-
-
-/// Class to track idle periods of time
-///
-class IdleTimer {
- public:
-  void Start()
-  {
-    std::lock_guard<std::mutex> lk(mtx_);
-    StartImpl();
-  }
-
-  void Stop()
-  {
-    std::lock_guard<std::mutex> lk(mtx_);
-    StopImpl();
-  }
-
-  /// Reset the time counter, and restart the timer if it is active
-  ///
-  void Reset()
-  {
-    Restart();
-    idle_ns_ = 0;
-  }
-
-  /// Returns the number of nanoseconds this timer has counted as being idle
-  /// If the timer was already active, then it will first stop (and count the
-  /// pending time), and then start back up
-  ///
-  uint64_t GetIdleTime()
-  {
-    Restart();
-    return idle_ns_;
-  }
-
- private:
-  std::mutex mtx_;
-  uint64_t idle_ns_{0};
-  bool is_idle_{false};
-  std::chrono::_V2::steady_clock::time_point start_time_;
-
-  void Restart()
-  {
-    std::lock_guard<std::mutex> lk(mtx_);
-    if (is_idle_) {
-      StopImpl();
-      StartImpl();
-    }
-  }
-
-  void StartImpl()
-  {
-    if (is_idle_) {
-      throw std::runtime_error("Can't start a timer that is already active\n");
-    }
-
-    is_idle_ = true;
-    start_time_ = std::chrono::steady_clock::now();
-  }
-
-  void StopImpl()
-  {
-    if (!is_idle_) {
-      throw std::runtime_error("Can't stop a timer that isn't active\n");
-    }
-
-    is_idle_ = false;
-    auto end = std::chrono::steady_clock::now();
-    auto duration = end - start_time_;
-    idle_ns_ += duration.count();
-  }
-
-
-#ifndef DOCTEST_CONFIG_DISABLE
-  friend TestLoadManager;
-#endif
-};
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/iinfer_data_manager.h b/src/c++/perf_analyzer/iinfer_data_manager.h
deleted file mode 100644
index 33dd8ac8c..000000000
--- a/src/c++/perf_analyzer/iinfer_data_manager.h
+++ /dev/null
@@ -1,63 +0,0 @@
-// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "client_backend/client_backend.h"
-#include "constants.h"
-#include "data_loader.h"
-#include "infer_data.h"
-#include "model_parser.h"
-#include "perf_utils.h"
-
-namespace triton { namespace perfanalyzer {
-
-/// Interface for classes that manage infer data preparation for inference
-///
-class IInferDataManager {
- public:
-  /// Initialize this object. Must be called before any other functions
-  /// \return cb::Error object indicating success or failure.
-  virtual cb::Error Init() = 0;
-
-  /// Populate the target InferData object with input and output objects
-  /// according to the model's shape
-  /// \param infer_data The target InferData object.
-  /// \return cb::Error object indicating success or failure.
-  virtual cb::Error InitInferData(InferData& infer_data) = 0;
-
-  /// Updates the input and expected output data in the target infer_data for an
-  /// inference request
-  /// \param thread_id The ID of the calling thread
-  /// \param stream_index The data stream to use for next data
-  /// \param step_index The step index to use for next data
-  /// \param infer_data The target InferData object
-  /// \return cb::Error object indicating success or failure.
-  virtual cb::Error UpdateInferData(
-      size_t thread_id, int stream_index, int step_index,
-      InferData& infer_data) = 0;
-};
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/infer_context.cc b/src/c++/perf_analyzer/infer_context.cc
deleted file mode 100644
index aa868eba7..000000000
--- a/src/c++/perf_analyzer/infer_context.cc
+++ /dev/null
@@ -1,356 +0,0 @@
-// Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "infer_context.h"
-
-namespace triton { namespace perfanalyzer {
-
-void
-InferContext::Init()
-{
-  thread_stat_->status_ = infer_data_manager_->InitInferData(infer_data_);
-  if (!thread_stat_->status_.IsOk()) {
-    return;
-  }
-
-  if (streaming_) {
-    // Decoupled models should not collect client side statistics
-    thread_stat_->status_ = infer_backend_->StartStream(
-        async_callback_func_, (!parser_->IsDecoupled()));
-    if (!thread_stat_->status_.IsOk()) {
-      return;
-    }
-  }
-}
-
-void
-InferContext::SendInferRequest(bool delayed)
-{
-  // Update the inputs if required
-  if (using_json_data_) {
-    UpdateJsonData();
-  }
-  SendRequest(request_id_++, delayed);
-}
-
-void
-InferContext::SendSequenceInferRequest(uint32_t seq_stat_index, bool delayed)
-{
-  // Need lock to protect the order of dispatch across worker threads.
-  // This also helps in reporting the realistic latencies.
-  std::lock_guard<std::mutex> guard(
-      sequence_manager_->GetMutex(seq_stat_index));
-  if (!early_exit && execute_) {
-    sequence_manager_->SetInferSequenceOptions(
-        seq_stat_index, infer_data_.options_);
-
-    // Update the inputs if required
-    if (using_json_data_) {
-      UpdateSeqJsonData(seq_stat_index);
-    }
-
-    sequence_manager_->DecrementRemainingQueries(seq_stat_index);
-
-    SendRequest(
-        request_id_++, delayed,
-        sequence_manager_->GetSequenceID(seq_stat_index));
-  }
-}
-
-void
-InferContext::CompleteOngoingSequence(uint32_t seq_stat_index)
-{
-  std::lock_guard<std::mutex> guard(
-      sequence_manager_->GetMutex(seq_stat_index));
-
-  if (sequence_manager_->GetRemainingQueries(seq_stat_index) != 0) {
-    sequence_manager_->SetRemainingQueries(seq_stat_index, 1);
-    sequence_manager_->SetInferSequenceOptions(
-        seq_stat_index, infer_data_.options_);
-
-    if (using_json_data_) {
-      UpdateSeqJsonData(seq_stat_index);
-    }
-    sequence_manager_->DecrementRemainingQueries(seq_stat_index);
-
-    bool is_delayed = false;
-    SendRequest(
-        request_id_++, is_delayed,
-        sequence_manager_->GetSequenceID(seq_stat_index));
-  }
-}
-
-void
-InferContext::SendRequest(
-    const uint64_t request_id, const bool delayed, const uint64_t sequence_id)
-{
-  if (!thread_stat_->status_.IsOk()) {
-    return;
-  }
-
-  thread_stat_->num_sent_requests_++;
-
-  // Parse the request inputs to save in the profile export file
-  RequestRecord::RequestInput request_inputs{GetInputs()};
-
-  if (async_) {
-    uint64_t unique_request_id{(thread_id_ << 48) | ((request_id << 16) >> 16)};
-    infer_data_.options_->request_id_ = std::to_string(unique_request_id);
-    {
-      std::lock_guard<std::mutex> lock(thread_stat_->mu_);
-      auto it = async_req_map_
-                    .emplace(infer_data_.options_->request_id_, RequestRecord())
-                    .first;
-      it->second.request_inputs_ = {request_inputs};
-      it->second.start_time_ = std::chrono::system_clock::now();
-      it->second.sequence_end_ = infer_data_.options_->sequence_end_;
-      it->second.delayed_ = delayed;
-      it->second.sequence_id_ = sequence_id;
-    }
-
-    thread_stat_->idle_timer.Start();
-    if (streaming_) {
-      thread_stat_->status_ = infer_backend_->AsyncStreamInfer(
-          *(infer_data_.options_), infer_data_.valid_inputs_,
-          infer_data_.outputs_);
-    } else {
-      thread_stat_->status_ = infer_backend_->AsyncInfer(
-          async_callback_func_, *(infer_data_.options_),
-          infer_data_.valid_inputs_, infer_data_.outputs_);
-    }
-    thread_stat_->idle_timer.Stop();
-
-    total_ongoing_requests_++;
-  } else {
-    std::chrono::time_point<std::chrono::system_clock> start_time_sync,
-        end_time_sync;
-    thread_stat_->idle_timer.Start();
-    start_time_sync = std::chrono::system_clock::now();
-    cb::InferResult* results = nullptr;
-    thread_stat_->status_ = infer_backend_->Infer(
-        &results, *(infer_data_.options_), infer_data_.valid_inputs_,
-        infer_data_.outputs_);
-    thread_stat_->idle_timer.Stop();
-    RequestRecord::ResponseOutput response_outputs{};
-    if (results != nullptr) {
-      if (thread_stat_->status_.IsOk()) {
-        response_outputs = GetOutputs(*results);
-        thread_stat_->status_ = ValidateOutputs(results);
-      }
-      delete results;
-    }
-    if (!thread_stat_->status_.IsOk()) {
-      return;
-    }
-    end_time_sync = std::chrono::system_clock::now();
-    std::vector<std::chrono::time_point<std::chrono::system_clock>>
-        end_time_syncs{end_time_sync};
-    {
-      // Add the request record to thread request records vector with proper
-      // locking
-      std::lock_guard<std::mutex> lock(thread_stat_->mu_);
-      auto total = end_time_sync - start_time_sync;
-      thread_stat_->request_records_.emplace_back(RequestRecord(
-          start_time_sync, std::move(end_time_syncs), {request_inputs},
-          {response_outputs}, infer_data_.options_->sequence_end_, delayed,
-          sequence_id, false));
-      thread_stat_->status_ =
-          infer_backend_->ClientInferStat(&(thread_stat_->contexts_stat_[id_]));
-      if (!thread_stat_->status_.IsOk()) {
-        return;
-      }
-    }
-  }
-}
-
-const RequestRecord::RequestInput
-InferContext::GetInputs()
-{
-  RequestRecord::RequestInput input{};
-  for (const auto& request_input : infer_data_.valid_inputs_) {
-    std::string data_type{request_input->Datatype()};
-    const uint8_t* buf{nullptr};
-    size_t byte_size{0};
-    request_input->RawData(&buf, &byte_size);
-
-    // The first 4 bytes of BYTES data is a 32-bit integer to indicate the size
-    // of the rest of the data (which we already know based on byte_size). It
-    // should be ignored here, as it isn't part of the actual request
-    if (data_type == "BYTES" && byte_size >= 4) {
-      buf += 4;
-      byte_size -= 4;
-    }
-    input.emplace(request_input->Name(), RecordData(buf, byte_size, data_type));
-  }
-  return input;
-}
-
-const RequestRecord::ResponseOutput
-InferContext::GetOutputs(const cb::InferResult& infer_result)
-{
-  RequestRecord::ResponseOutput output{};
-  for (const auto& requested_output : infer_data_.outputs_) {
-    std::string data_type{requested_output->Datatype()};
-    const uint8_t* buf{nullptr};
-    size_t byte_size{0};
-    infer_result.RawData(requested_output->Name(), &buf, &byte_size);
-
-    // The first 4 bytes of BYTES data is a 32-bit integer to indicate the size
-    // of the rest of the data (which we already know based on byte_size). It
-    // should be ignored here, as it isn't part of the actual response
-    if (data_type == "BYTES" && byte_size >= 4) {
-      buf += 4;
-      byte_size -= 4;
-    }
-    output.emplace(
-        requested_output->Name(), RecordData(buf, byte_size, data_type));
-  }
-  return output;
-}
-
-void
-InferContext::UpdateJsonData()
-{
-  int step_id = (data_step_id_ * batch_size_) % data_loader_->GetTotalSteps(0);
-  data_step_id_ += GetNumActiveThreads();
-  thread_stat_->status_ =
-      infer_data_manager_->UpdateInferData(thread_id_, 0, step_id, infer_data_);
-}
-
-void
-InferContext::UpdateSeqJsonData(size_t seq_stat_index)
-{
-  const size_t sequence_length{
-      sequence_manager_->GetSequenceLength(seq_stat_index)};
-  const size_t remaining_queries{
-      sequence_manager_->GetRemainingQueries(seq_stat_index)};
-  const uint64_t data_stream_id{
-      sequence_manager_->GetDataStreamID(seq_stat_index)};
-  const size_t total_steps{data_loader_->GetTotalSteps(data_stream_id)};
-  int step_id = (sequence_length - remaining_queries) % total_steps;
-  thread_stat_->status_ = infer_data_manager_->UpdateInferData(
-      thread_id_, data_stream_id, step_id, infer_data_);
-}
-
-cb::Error
-InferContext::ValidateOutputs(const cb::InferResult* result_ptr)
-{
-  // Validate output if set
-  if (!infer_data_.expected_outputs_.empty()) {
-    for (size_t i = 0; i < infer_data_.expected_outputs_.size(); ++i) {
-      const uint8_t* buf = nullptr;
-      size_t byte_size = 0;
-      for (const auto& expected : infer_data_.expected_outputs_[i]) {
-        // Request output by validation output's name explicitly, rather than
-        // relying on the array indices being sorted equally in both arrays.
-        result_ptr->RawData(expected.name, &buf, &byte_size);
-        if (!expected.is_valid) {
-          return cb::Error(
-              "Expected output can't be invalid", pa::GENERIC_ERROR);
-        }
-        if (byte_size < expected.batch1_size) {
-          return cb::Error(
-              "Output size doesn't match expected size", pa::GENERIC_ERROR);
-        } else if (memcmp(buf, expected.data_ptr, expected.batch1_size) != 0) {
-          return cb::Error(
-              "Output doesn't match expected output", pa::GENERIC_ERROR);
-        } else {
-          buf += expected.batch1_size;
-          byte_size -= expected.batch1_size;
-        }
-      }
-      if (byte_size != 0) {
-        return cb::Error(
-            "Output size doesn't match expected size", pa::GENERIC_ERROR);
-      }
-    }
-  }
-  return cb::Error::Success;
-}
-
-void
-InferContext::AsyncCallbackFuncImpl(cb::InferResult* result)
-{
-  std::shared_ptr<cb::InferResult> result_ptr(result);
-  bool is_final_response{true};
-  if (thread_stat_->cb_status_.IsOk()) {
-    // Add the request record to thread request records vector with
-    // proper locking
-    std::lock_guard<std::mutex> lock(thread_stat_->mu_);
-    thread_stat_->cb_status_ = result_ptr->RequestStatus();
-    if (thread_stat_->cb_status_.IsOk()) {
-      std::string request_id;
-      thread_stat_->cb_status_ = result_ptr->Id(&request_id);
-      const auto& it = async_req_map_.find(request_id);
-      if (it != async_req_map_.end()) {
-        bool is_null_response{false};
-        thread_stat_->cb_status_ =
-            result_ptr->IsNullResponse(&is_null_response);
-        if (thread_stat_->cb_status_.IsOk() == false) {
-          return;
-        }
-        it->second.response_timestamps_.push_back(
-            std::chrono::system_clock::now());
-        it->second.response_outputs_.push_back(GetOutputs(*result));
-        num_responses_++;
-        if (is_null_response == true) {
-          it->second.has_null_last_response_ = true;
-        }
-        thread_stat_->cb_status_ =
-            result_ptr->IsFinalResponse(&is_final_response);
-        if (thread_stat_->cb_status_.IsOk() == false) {
-          return;
-        }
-        if (is_final_response) {
-          has_received_final_response_ = is_final_response;
-          thread_stat_->request_records_.emplace_back(
-              it->second.start_time_, it->second.response_timestamps_,
-              it->second.request_inputs_, it->second.response_outputs_,
-              it->second.sequence_end_, it->second.delayed_,
-              it->second.sequence_id_, it->second.has_null_last_response_);
-          infer_backend_->ClientInferStat(&(thread_stat_->contexts_stat_[id_]));
-          thread_stat_->cb_status_ = ValidateOutputs(result);
-          async_req_map_.erase(request_id);
-        }
-      }
-    }
-  }
-
-  if (worker_callback_) {
-    worker_callback_(id_);
-  }
-
-  if (is_final_response) {
-    total_ongoing_requests_--;
-    num_responses_ = 0;
-
-    if (async_callback_finalize_func_ != nullptr) {
-      async_callback_finalize_func_(id_);
-    }
-  }
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/infer_context.h b/src/c++/perf_analyzer/infer_context.h
deleted file mode 100644
index 7bacb16d5..000000000
--- a/src/c++/perf_analyzer/infer_context.h
+++ /dev/null
@@ -1,222 +0,0 @@
-// Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <atomic>
-#include <functional>
-#include <memory>
-#include <mutex>
-#include <vector>
-
-#include "client_backend/client_backend.h"
-#include "data_loader.h"
-#include "idle_timer.h"
-#include "iinfer_data_manager.h"
-#include "infer_data.h"
-#include "perf_utils.h"
-#include "request_record.h"
-#include "sequence_manager.h"
-
-namespace triton { namespace perfanalyzer {
-
-// Holds the running status of the thread.
-struct ThreadStat {
-  ThreadStat() {}
-
-  // The status of the worker thread
-  cb::Error status_;
-  // The status of the callback thread for async requests
-  cb::Error cb_status_;
-  // TODO REFACTOR TMA-1046 -- This should be in the InferContext class
-  // The statistics of the InferContext
-  std::vector<cb::InferStat> contexts_stat_;
-
-  // Tracks the amount of time this thread spent sleeping or waiting
-  IdleTimer idle_timer;
-
-  // A vector of request records
-  std::vector<RequestRecord> request_records_;
-  // A lock to protect thread data
-  std::mutex mu_;
-  // The number of sent requests by this thread.
-  std::atomic<size_t> num_sent_requests_{0};
-};
-
-#ifndef DOCTEST_CONFIG_DISABLE
-class NaggyMockInferContext;
-#endif
-
-/// Sends inference requests to the server
-class InferContext {
- public:
-  InferContext(
-      const size_t thread_id, const uint32_t id, const bool async,
-      const bool streaming, const bool on_sequence_model,
-      const bool using_json_data, const int32_t batch_size,
-      std::shared_ptr<ThreadStat> thread_stat,
-      std::shared_ptr<DataLoader> data_loader,
-      std::shared_ptr<ModelParser> parser,
-      std::shared_ptr<cb::ClientBackendFactory> factory, const bool& execute,
-      const std::shared_ptr<IInferDataManager>& infer_data_manager,
-      std::shared_ptr<SequenceManager> sequence_manager)
-      : thread_id_(thread_id), id_(id), async_(async), streaming_(streaming),
-        on_sequence_model_(on_sequence_model),
-        using_json_data_(using_json_data), batch_size_(batch_size),
-        thread_stat_(thread_stat), data_loader_(data_loader), parser_(parser),
-        factory_(factory), data_step_id_(thread_id), execute_(execute),
-        infer_data_manager_(infer_data_manager),
-        sequence_manager_(sequence_manager)
-  {
-    thread_stat_->status_ = factory_->CreateClientBackend(&infer_backend_);
-    infer_data_.options_.reset(new cb::InferOptions(parser_->ModelName()));
-    infer_data_.options_->model_version_ = parser_->ModelVersion();
-    infer_data_.options_->model_signature_name_ = parser_->ModelSignatureName();
-
-    thread_stat_->contexts_stat_.emplace_back();
-  }
-
-  InferContext(InferContext&&) = delete;
-  InferContext(const InferContext&) = delete;
-
-  // Initialize the context. Must be done before any inferences are sent
-  void Init();
-
-  // Send a single inference request to the server
-  void SendInferRequest(bool delayed = false);
-
-  // Send a single sequence inference request to the server
-  void SendSequenceInferRequest(uint32_t seq_index, bool delayed = false);
-
-  // Finish the active sequence at the given seq_stat_index
-  void CompleteOngoingSequence(uint32_t seq_stat_index);
-
-  // Returns the total number of async requests that have been sent by this
-  // object and have not returned
-  uint GetNumOngoingRequests() { return total_ongoing_requests_; }
-
-  // Returns the number of responses for the current request
-  uint64_t GetNumResponsesForCurrentRequest() { return num_responses_; }
-
-  // Register a function that will get called after every async request returns
-  void RegisterAsyncCallbackFinalize(std::function<void(uint32_t)> callback)
-  {
-    async_callback_finalize_func_ = callback;
-  }
-
-  void RegisterWorkerCallback(std::function<void(uint32_t)> worker_callback)
-  {
-    worker_callback_ = worker_callback;
-  }
-
-  // TODO REFACTOR TMA-1043 this should be in memory class
-  void SetNumActiveThreads(size_t num_threads)
-  {
-    num_active_threads_ = num_threads;
-  }
-
-  bool HasReceivedFinalResponse() { return has_received_final_response_; }
-
- protected:
-  /// A helper function to issue inference request to the server.
-  /// \param request_id The unique id to be associated with the request.
-  /// \param delayed Whether the request fell behind its scheduled time.
-  /// \param sequence_id Sequence ID of the request. Note that the default of
-  /// `0` means the request is not a sequence.
-  virtual void SendRequest(
-      const uint64_t request_id, const bool delayed,
-      const uint64_t sequence_id = 0);
-
-  /// Update inputs based on custom json data
-  void UpdateJsonData();
-
-  /// Update inputs based on custom json data for the given sequence
-  void UpdateSeqJsonData(size_t seq_stat_index);
-
-  cb::Error ValidateOutputs(const cb::InferResult* result_ptr);
-
-  // Callback function for handling asynchronous requests
-  void AsyncCallbackFuncImpl(cb::InferResult* result);
-
-  bool async_{false};
-  bool streaming_{false};
-  const bool on_sequence_model_{false};
-  bool using_json_data_{false};
-  const int32_t batch_size_{0};
-
-  std::shared_ptr<ThreadStat> thread_stat_;
-  std::shared_ptr<DataLoader> data_loader_;
-  std::shared_ptr<ModelParser> parser_;
-  std::shared_ptr<cb::ClientBackendFactory> factory_;
-  std::shared_ptr<IInferDataManager> infer_data_manager_;
-
-  uint64_t request_id_ = 0;
-  std::map<std::string, RequestRecord> async_req_map_;
-  std::atomic<uint> total_ongoing_requests_{0};
-  size_t data_step_id_;
-
-  // Function pointer to the async callback function implementation
-  std::function<void(cb::InferResult*)> async_callback_func_ = std::bind(
-      &InferContext::AsyncCallbackFuncImpl, this, std::placeholders::_1);
-
-  // Function pointer to registered async callbacks
-  std::function<void(uint32_t)> async_callback_finalize_func_ = nullptr;
-
- private:
-  const RequestRecord::RequestInput GetInputs();
-
-  const RequestRecord::ResponseOutput GetOutputs(
-      const cb::InferResult& infer_result);
-
-  const uint32_t id_{0};
-  const size_t thread_id_{0};
-
-  size_t GetNumActiveThreads() { return num_active_threads_; }
-
-  size_t num_active_threads_{0};
-
-  // The backend to communicate with the server
-  std::unique_ptr<cb::ClientBackend> infer_backend_;
-  InferData infer_data_;
-
-  // FIXME: update build to use C++17 instead of C++14. This is a workaround
-  // since C++14 doesn't have std::optional, but C++17 does.
-  const bool execute_placeholder_{false};
-  std::reference_wrapper<const bool> execute_{execute_placeholder_};
-
-  std::shared_ptr<SequenceManager> sequence_manager_{nullptr};
-  uint64_t num_responses_{0};
-  std::function<void(uint32_t)> worker_callback_{nullptr};
-  bool has_received_final_response_{false};
-
-#ifndef DOCTEST_CONFIG_DISABLE
-  friend NaggyMockInferContext;
-
- public:
-  InferContext() = default;
-#endif
-};
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/infer_data.h b/src/c++/perf_analyzer/infer_data.h
deleted file mode 100644
index abc52bb82..000000000
--- a/src/c++/perf_analyzer/infer_data.h
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "client_backend/client_backend.h"
-#include "tensor_data.h"
-
-namespace triton { namespace perfanalyzer {
-
-/// Holds all the data needed to send an inference request
-struct InferData {
-  ~InferData()
-  {
-    for (const auto input : inputs_) {
-      delete input;
-    }
-    for (const auto output : outputs_) {
-      delete output;
-    }
-  }
-
-  // The vector of pointers to InferInput objects for all possible inputs,
-  // potentially including optional inputs with no provided data.
-  std::vector<cb::InferInput*> inputs_;
-  // The vector of pointers to InferInput objects to be
-  // used for inference request.
-  std::vector<cb::InferInput*> valid_inputs_;
-  // The vector of pointers to InferRequestedOutput objects
-  // to be used with the inference request.
-  std::vector<const cb::InferRequestedOutput*> outputs_;
-  // If not empty, the expected output data in the same order as 'outputs_'
-  // The outer vector is per-output. The inner vector is for batching of each
-  // output
-  std::vector<std::vector<TensorData>> expected_outputs_;
-  // The InferOptions object holding the details of the
-  // inference.
-  std::unique_ptr<cb::InferOptions> options_;
-};
-
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/infer_data_manager.cc b/src/c++/perf_analyzer/infer_data_manager.cc
deleted file mode 100644
index fe5e9fcd8..000000000
--- a/src/c++/perf_analyzer/infer_data_manager.cc
+++ /dev/null
@@ -1,210 +0,0 @@
-// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "infer_data_manager.h"
-
-#include <algorithm>
-
-namespace triton { namespace perfanalyzer {
-
-cb::Error
-InferDataManager::Init()
-{
-  RETURN_IF_ERROR(CreateAndPopulateInputs());
-  return cb::Error::Success;
-}
-
-cb::Error
-InferDataManager::CreateAndPopulateInputs()
-{
-  // All combinations of thread + input + stream + step
-  //
-  for (size_t thread_id = 0; thread_id < max_threads_; thread_id++) {
-    for (const auto& input : *(parser_->Inputs())) {
-      const std::string& name = input.first;
-      const ModelTensor& tensor = input.second;
-      for (int stream_id = 0;
-           stream_id < (int)data_loader_->GetDataStreamsCount(); stream_id++) {
-        for (int step_id = 0;
-             step_id < (int)data_loader_->GetTotalSteps(stream_id);
-             step_id += 1) {
-          RETURN_IF_ERROR(CreateAndPopulateInput(
-              thread_id, name, tensor, stream_id, step_id));
-        }
-      }
-    }
-  }
-  return cb::Error::Success;
-}
-
-cb::Error
-InferDataManager::CreateAndPopulateInput(
-    const size_t thread_id, const std::string& name, const ModelTensor& tensor,
-    int stream_id, int step_id)
-{
-  std::vector<TensorData> input_datas;
-  size_t count = 0;
-
-  RETURN_IF_ERROR(GetInputData(name, tensor, stream_id, step_id, input_datas));
-
-  if (tensor.is_shape_tensor_) {
-    RETURN_IF_ERROR(
-        ValidateShapeTensor(tensor, stream_id, step_id, input_datas));
-  }
-
-  std::vector<int64_t> shape;
-  RETURN_IF_ERROR(
-      data_loader_->GetInputShape(tensor, stream_id, step_id, &shape));
-  if (!shape.empty()) {
-    if ((parser_->MaxBatchSize() != 0) && (!tensor.is_shape_tensor_)) {
-      shape.insert(shape.begin(), (int64_t)batch_size_);
-    }
-  }
-
-  cb::InferInput* input;
-  RETURN_IF_ERROR(
-      CreateInferInput(&input, backend_kind_, name, shape, tensor.datatype_));
-
-
-  // Number of missing pieces of data for optional inputs
-  int missing_data_cnt = 0;
-  int total_cnt = input_datas.size();
-
-  for (size_t i = 0; i < total_cnt; i++) {
-    if (!input_datas[i].is_valid) {
-      missing_data_cnt++;
-    } else {
-      RETURN_IF_ERROR(input->AppendRaw(
-          input_datas[i].data_ptr, input_datas[i].batch1_size));
-    }
-  }
-
-  // If all optional inputs had data provided, this is a valid input. But if
-  // some inferences in the batch provided data for an optional input and
-  // some inferences did not, this is an invalid case and an error is
-  // thrown.
-  if (missing_data_cnt == 0) {
-    inputs_.insert({{thread_id, name, stream_id, step_id}, input});
-  } else if (missing_data_cnt > 0 && missing_data_cnt < total_cnt) {
-    return cb::Error(
-        "For batch sizes larger than 1, the same set of inputs must be "
-        "specified for each batch. You cannot use different set of "
-        "optional inputs for each individual batch.");
-  }
-
-  return cb::Error::Success;
-}
-
-cb::InferInput*
-InferDataManager::GetInput(
-    const size_t thread_id, const std::string& name, int stream_id, int step_id)
-{
-  auto input = inputs_.find({thread_id, name, stream_id, step_id});
-  if (input == inputs_.end()) {
-    return nullptr;
-  } else {
-    return input->second;
-  }
-}
-
-
-cb::Error
-InferDataManager::InitInferDataInput(
-    const std::string& name, const ModelTensor& model_tensor,
-    InferData& infer_data)
-{
-  std::vector<int64_t> shape;
-  RETURN_IF_ERROR(data_loader_->GetInputShape(model_tensor, 0, 0, &shape));
-  if (shape.empty() && (backend_kind_ == cb::BackendKind::TRITON)) {
-    return cb::Error("unable to set shape for the input", pa::GENERIC_ERROR);
-  }
-
-  if ((parser_->MaxBatchSize() != 0) && (!model_tensor.is_shape_tensor_)) {
-    shape.insert(shape.begin(), (int64_t)batch_size_);
-  }
-
-  cb::InferInput* infer_input;
-  RETURN_IF_ERROR(CreateInferInput(
-      &infer_input, backend_kind_, name, shape, model_tensor.datatype_));
-  infer_data.inputs_.push_back(infer_input);
-
-
-  TensorData input_data;
-  RETURN_IF_ERROR(data_loader_->GetInputData(model_tensor, 0, 0, input_data));
-
-  // Add optional input to request if data was found
-  if (input_data.is_valid) {
-    infer_data.valid_inputs_.push_back(infer_input);
-  }
-
-  if (!shape.empty()) {
-    size_t max_count = (parser_->MaxBatchSize() == 0) ? 1 : batch_size_;
-    for (size_t i = 0; i < max_count; ++i) {
-      RETURN_IF_ERROR(
-          infer_input->AppendRaw(input_data.data_ptr, input_data.batch1_size));
-    }
-  }
-
-  AddInferDataParameters(infer_data);
-
-  return cb::Error::Success;
-}
-
-cb::Error
-InferDataManager::InitInferDataOutput(
-    const std::string& name, const ModelTensor& model_tensor,
-    InferData& infer_data)
-{
-  cb::InferRequestedOutput* requested_output;
-  RETURN_IF_ERROR(cb::InferRequestedOutput::Create(
-      &requested_output, backend_kind_, name, model_tensor.datatype_));
-  infer_data.outputs_.push_back(requested_output);
-
-  return cb::Error::Success;
-}
-
-cb::Error
-InferDataManager::UpdateInputs(
-    const size_t thread_id, const int stream_index, const int step_index,
-    InferData& infer_data)
-{
-  // Reset inputs for this inference request
-  infer_data.valid_inputs_.clear();
-
-  for (const auto& input : infer_data.inputs_) {
-    const auto& name = input->Name();
-
-    cb::InferInput* tmp_input =
-        GetInput(thread_id, name, stream_index, step_index);
-    if (tmp_input != nullptr) {
-      infer_data.valid_inputs_.push_back(tmp_input);
-    }
-  }
-  return cb::Error::Success;
-}
-
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/infer_data_manager.h b/src/c++/perf_analyzer/infer_data_manager.h
deleted file mode 100644
index ccde8d2f8..000000000
--- a/src/c++/perf_analyzer/infer_data_manager.h
+++ /dev/null
@@ -1,96 +0,0 @@
-// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "client_backend/client_backend.h"
-#include "constants.h"
-#include "data_loader.h"
-#include "infer_data.h"
-#include "infer_data_manager_base.h"
-#include "model_parser.h"
-#include "perf_utils.h"
-
-namespace triton { namespace perfanalyzer {
-
-/// Manages infer data to prepare an inference request and the resulting
-/// inference output from triton server
-class InferDataManager : public InferDataManagerBase {
- public:
-  InferDataManager(
-      const size_t max_threads, const int32_t batch_size,
-      const std::unordered_map<std::string, cb::RequestParameter>&
-          request_parameters,
-      const std::shared_ptr<ModelParser>& parser,
-      const std::shared_ptr<cb::ClientBackendFactory>& factory,
-      const std::shared_ptr<DataLoader>& data_loader)
-      : max_threads_(max_threads),
-        InferDataManagerBase(
-            batch_size, request_parameters, parser, factory, data_loader)
-  {
-  }
-
-  /// Initialize this object. Must be called before any other functions
-  /// \return cb::Error object indicating success or failure.
-  cb::Error Init() override;
-
- protected:
-  const size_t max_threads_{1};
-  std::map<std::tuple<size_t, std::string, int, int>, cb::InferInput*> inputs_;
-
-  cb::Error CreateAndPopulateInputs();
-  cb::Error CreateAndPopulateInput(
-      const size_t thread_id, const std::string& name,
-      const ModelTensor& model_tensor, int stream_id, int step_id);
-
-  cb::InferInput* GetInput(
-      const size_t thread_id, const std::string& name, int stream_id,
-      int step_id);
-
-  cb::Error InitInferDataInput(
-      const std::string& name, const ModelTensor& model_tensor,
-      InferData& infer_data) override;
-
-  cb::Error InitInferDataOutput(
-      const std::string& name, const ModelTensor& model_tensor,
-      InferData& infer_data) override;
-
-  /// Helper function to update the inputs
-  /// \param thread_id The ID of the calling thread
-  /// \param stream_index The data stream to use for next data
-  /// \param step_index The step index to use for next data
-  /// \param infer_data The target InferData object
-  /// \return cb::Error object indicating success or failure.
-  cb::Error UpdateInputs(
-      const size_t thread_id, const int stream_index, const int step_index,
-      InferData& infer_data);
-
-#ifndef DOCTEST_CONFIG_DISABLE
- public:
-  InferDataManager() = default;
-#endif
-};
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/infer_data_manager_base.cc b/src/c++/perf_analyzer/infer_data_manager_base.cc
deleted file mode 100644
index 9a06f86b0..000000000
--- a/src/c++/perf_analyzer/infer_data_manager_base.cc
+++ /dev/null
@@ -1,189 +0,0 @@
-// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "infer_data_manager_base.h"
-
-#include <algorithm>
-
-namespace triton { namespace perfanalyzer {
-
-cb::Error
-InferDataManagerBase::GetInputData(
-    const std::string& name, const ModelTensor& tensor, int stream_id,
-    int step_id, std::vector<TensorData>& input_datas)
-{
-  size_t max_count = tensor.is_shape_tensor_ ? 1 : batch_size_;
-  std::vector<int64_t> shape;
-  std::vector<int64_t> prev_shape;
-
-  for (size_t count = 0; count < max_count; count++) {
-    int local_step_id =
-        (step_id + count) % data_loader_->GetTotalSteps(stream_id);
-
-    TensorData input_data;
-
-    RETURN_IF_ERROR(
-        data_loader_->GetInputShape(tensor, stream_id, local_step_id, &shape));
-    if (!shape.empty()) {
-      if (count == 0) {
-        prev_shape = shape;
-      } else {
-        if (!std::equal(shape.begin(), shape.end(), prev_shape.begin())) {
-          return cb::Error(
-              "can not batch tensors with different shapes together "
-              "(input '" +
-                  name + "' expected shape " + ShapeVecToString(prev_shape) +
-                  " and received " + ShapeVecToString(shape),
-              pa::GENERIC_ERROR);
-        }
-      }
-    }
-
-    RETURN_IF_ERROR(data_loader_->GetInputData(
-        tensor, stream_id, local_step_id, input_data));
-
-    input_datas.push_back(input_data);
-  }
-
-  return cb::Error::Success;
-}
-
-cb::Error
-InferDataManagerBase::ValidateShapeTensor(
-    const ModelTensor& tensor, int stream_id, int step_id,
-    const std::vector<TensorData>& input_datas)
-{
-  // Validate that steps 1 through N are exactly the same as step 0, since step
-  // 0 is the only one we send for shape tensors
-  for (size_t count = 1; count < batch_size_; count++) {
-    int local_step_id =
-        (step_id + count) % data_loader_->GetTotalSteps(stream_id);
-
-    TensorData input_data;
-    RETURN_IF_ERROR(data_loader_->GetInputData(
-        tensor, stream_id, local_step_id, input_data));
-
-    if (input_data.batch1_size != input_datas.back().batch1_size) {
-      return cb::Error(
-          "The shape tensors should be identical in a batch (mismatch "
-          "in size)",
-          pa::GENERIC_ERROR);
-    }
-
-    for (size_t data_idx = 0; data_idx < input_data.batch1_size; data_idx++) {
-      if (*(input_data.data_ptr + data_idx) !=
-          *(input_datas.back().data_ptr + data_idx)) {
-        return cb::Error(
-            "The shape tensors should be identical in a batch "
-            "(mismatch in content)",
-            pa::GENERIC_ERROR);
-      }
-    }
-  }
-  return cb::Error::Success;
-}
-
-cb::Error
-InferDataManagerBase::InitInferData(InferData& infer_data)
-{
-  // Initialize inputs
-  for (const auto& input : *(parser_->Inputs())) {
-    RETURN_IF_ERROR(InitInferDataInput(input.first, input.second, infer_data));
-  }
-
-  for (const auto& output : *(parser_->Outputs())) {
-    RETURN_IF_ERROR(
-        InitInferDataOutput(output.first, output.second, infer_data));
-  }
-
-  return cb::Error::Success;
-}
-
-cb::Error
-InferDataManagerBase::UpdateInferData(
-    size_t thread_id, int stream_index, int step_index, InferData& infer_data)
-{
-  RETURN_IF_ERROR(data_loader_->ValidateIndexes(stream_index, step_index));
-  RETURN_IF_ERROR(
-      UpdateInputs(thread_id, stream_index, step_index, infer_data));
-  RETURN_IF_ERROR(
-      UpdateValidationOutputs(stream_index, step_index, infer_data));
-  return cb::Error::Success;
-}
-
-cb::Error
-InferDataManagerBase::UpdateValidationOutputs(
-    int stream_index, int step_index, InferData& infer_data)
-{
-  RETURN_IF_ERROR(data_loader_->ValidateIndexes(stream_index, step_index));
-
-  infer_data.expected_outputs_.clear();
-
-  for (const auto& output : infer_data.outputs_) {
-    const auto& model_output = (*(parser_->Outputs()))[output->Name()];
-
-    TensorData output_data;
-    const int* set_shape_values = nullptr;
-    int set_shape_value_cnt = 0;
-
-    std::vector<TensorData> outputs;
-    for (size_t i = 0; i < batch_size_; ++i) {
-      RETURN_IF_ERROR(data_loader_->GetOutputData(
-          output->Name(), stream_index,
-          (step_index + i) % data_loader_->GetTotalSteps(0), output_data));
-      if (!output_data.is_valid) {
-        break;
-      }
-
-      outputs.emplace_back(output_data);
-      // Shape tensor only need the first batch element
-      if (model_output.is_shape_tensor_) {
-        break;
-      }
-    }
-    if (!outputs.empty()) {
-      infer_data.expected_outputs_.emplace_back(std::move(outputs));
-    }
-  }
-  return cb::Error::Success;
-}
-
-cb::Error
-InferDataManagerBase::CreateInferInput(
-    cb::InferInput** infer_input, const cb::BackendKind kind,
-    const std::string& name, const std::vector<int64_t>& dims,
-    const std::string& datatype)
-{
-  return cb::InferInput::Create(infer_input, kind, name, dims, datatype);
-}
-
-void
-InferDataManagerBase::AddInferDataParameters(InferData& infer_data)
-{
-  infer_data.options_->request_parameters_ = request_parameters_;
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/infer_data_manager_base.h b/src/c++/perf_analyzer/infer_data_manager_base.h
deleted file mode 100644
index d92499067..000000000
--- a/src/c++/perf_analyzer/infer_data_manager_base.h
+++ /dev/null
@@ -1,152 +0,0 @@
-// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "client_backend/client_backend.h"
-#include "constants.h"
-#include "data_loader.h"
-#include "iinfer_data_manager.h"
-#include "infer_data.h"
-#include "model_parser.h"
-#include "perf_utils.h"
-#include "tensor_data.h"
-
-namespace triton { namespace perfanalyzer {
-
-/// Base class for Infer Data managers
-///
-class InferDataManagerBase : public IInferDataManager {
- public:
-  InferDataManagerBase(
-      const int32_t batch_size,
-      const std::unordered_map<std::string, cb::RequestParameter>&
-          request_parameters,
-      const std::shared_ptr<ModelParser>& parser,
-      const std::shared_ptr<cb::ClientBackendFactory>& factory,
-      const std::shared_ptr<DataLoader>& data_loader)
-      : batch_size_(batch_size), request_parameters_(request_parameters),
-        parser_(parser), factory_(factory), data_loader_(data_loader),
-        backend_kind_(factory->Kind())
-  {
-  }
-
-  /// Populate the target InferData object with input and output objects
-  /// according to the model's shape
-  /// \param infer_data The target InferData object.
-  /// \return cb::Error object indicating success or failure.
-  cb::Error InitInferData(InferData& infer_data) override;
-
-  /// Updates the input data to use for inference request
-  /// \param thread_id The ID of the calling thread
-  /// \param stream_index The data stream to use for next data
-  /// \param step_index The step index to use for next data
-  /// \param infer_data The target InferData object
-  /// \return cb::Error object indicating success or failure.
-  cb::Error UpdateInferData(
-      size_t thread_id, int stream_index, int step_index,
-      InferData& infer_data) override;
-
- protected:
-  size_t batch_size_;
-  std::shared_ptr<ModelParser> parser_;
-  std::shared_ptr<cb::ClientBackendFactory> factory_;
-  std::shared_ptr<DataLoader> data_loader_;
-  std::unique_ptr<cb::ClientBackend> backend_;
-  cb::BackendKind backend_kind_;
-  std::unordered_map<std::string, cb::RequestParameter> request_parameters_;
-
-  /// Gets the input data for the specified input for the specified batch size
-  ///
-  /// \param name The name of the input to get data for
-  /// \param tensor The ModelTensor of the input to get data for
-  /// \param stream_id The ID of the stream to get data for
-  /// \param step_id The ID of the step within the stream
-  /// \param input_datas The returned vector of TensorDatas
-  /// \return cb::Error object indicating success or failure.
-  cb::Error GetInputData(
-      const std::string& name, const ModelTensor& tensor, int stream_id,
-      int step_id, std::vector<TensorData>& input_datas);
-
-  /// For the case of an input with is_shape_tensor true, validate that
-  /// it follows all rules, and throw an error if it does not
-  /// \param tensor The ModelTensor of the input to validate
-  /// \param stream_id The ID of the stream to validate
-  /// \param step_id The ID of the step within the stream
-  /// \param input_datas vector of TensorDatas to validate
-  /// \return cb::Error object indicating success or failure.
-  cb::Error ValidateShapeTensor(
-      const ModelTensor& tensor, int stream_id, int step_id,
-      const std::vector<TensorData>& input_datas);
-
-  /// Helper function to update the inputs
-  /// \param thread_id The ID of the calling thread
-  /// \param stream_index The data stream to use for next data
-  /// \param step_index The step index to use for next data
-  /// \param infer_data The target InferData object
-  /// \return cb::Error object indicating success or failure.
-  virtual cb::Error UpdateInputs(
-      const size_t thread_id, const int stream_index, const int step_index,
-      InferData& infer_data) = 0;
-
-  /// Updates the expected output data to use for inference request. Empty
-  /// vector will be returned if there is no expected output associated to the
-  /// step.
-  /// \param stream_index The data stream to use for next data
-  /// \param step_index The step index to use for next data
-  /// \param infer_data The target InferData object
-  /// \return cb::Error object indicating success or failure.
-  cb::Error UpdateValidationOutputs(
-      int stream_index, int step_index, InferData& infer_data);
-
-  /// Creates inference input object
-  /// \param infer_input Output parameter storing newly created inference input
-  /// \param kind Backend kind
-  /// \param name Name of inference input
-  /// \param dims Shape of inference input
-  /// \param datatype Data type of inference input
-  /// \return cb::Error object indicating success or failure.
-  virtual cb::Error CreateInferInput(
-      cb::InferInput** infer_input, const cb::BackendKind kind,
-      const std::string& name, const std::vector<int64_t>& dims,
-      const std::string& datatype);
-
-  virtual cb::Error InitInferDataInput(
-      const std::string& name, const ModelTensor& model_tensor,
-      InferData& infer_data) = 0;
-
-  virtual cb::Error InitInferDataOutput(
-      const std::string& name, const ModelTensor& model_tensor,
-      InferData& infer_data) = 0;
-
-  void AddInferDataParameters(InferData& infer_data);
-
-#ifndef DOCTEST_CONFIG_DISABLE
- public:
-  InferDataManagerBase() = default;
-#endif
-};
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/infer_data_manager_factory.h b/src/c++/perf_analyzer/infer_data_manager_factory.h
deleted file mode 100644
index 6bf24bef8..000000000
--- a/src/c++/perf_analyzer/infer_data_manager_factory.h
+++ /dev/null
@@ -1,88 +0,0 @@
-// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "data_loader.h"
-#include "iinfer_data_manager.h"
-#include "infer_data_manager.h"
-#include "infer_data_manager_shm.h"
-#include "model_parser.h"
-#include "perf_utils.h"
-
-namespace triton { namespace perfanalyzer {
-
-class InferDataManagerFactory {
- public:
-  static std::shared_ptr<IInferDataManager> CreateInferDataManager(
-      const size_t max_threads, const int32_t batch_size,
-      const SharedMemoryType shared_memory_type, const size_t output_shm_size,
-      const std::unordered_map<std::string, cb::RequestParameter>&
-          request_parameters,
-      const std::shared_ptr<ModelParser>& parser,
-      const std::shared_ptr<cb::ClientBackendFactory>& factory,
-      const std::shared_ptr<DataLoader>& data_loader)
-  {
-    if (shared_memory_type == SharedMemoryType::NO_SHARED_MEMORY) {
-      return CreateInferDataManagerNoShm(
-          max_threads, batch_size, request_parameters, parser, factory,
-          data_loader);
-    } else {
-      return CreateInferDataManagerShm(
-          batch_size, shared_memory_type, output_shm_size, request_parameters,
-          parser, factory, data_loader);
-    }
-  }
-
- private:
-  static std::shared_ptr<IInferDataManager> CreateInferDataManagerNoShm(
-      const size_t max_threads, const int32_t batch_size,
-      const std::unordered_map<std::string, cb::RequestParameter>&
-          request_parameters,
-      const std::shared_ptr<ModelParser>& parser,
-      const std::shared_ptr<cb::ClientBackendFactory>& factory,
-      const std::shared_ptr<DataLoader>& data_loader)
-  {
-    return std::make_shared<InferDataManager>(
-        max_threads, batch_size, request_parameters, parser, factory,
-        data_loader);
-  }
-
-  static std::shared_ptr<IInferDataManager> CreateInferDataManagerShm(
-      const int32_t batch_size, const SharedMemoryType shared_memory_type,
-      const size_t output_shm_size,
-      const std::unordered_map<std::string, cb::RequestParameter>&
-          request_parameters,
-      const std::shared_ptr<ModelParser>& parser,
-      const std::shared_ptr<cb::ClientBackendFactory>& factory,
-      const std::shared_ptr<DataLoader>& data_loader)
-  {
-    return std::make_shared<InferDataManagerShm>(
-        batch_size, shared_memory_type, output_shm_size, request_parameters,
-        parser, factory, data_loader);
-  }
-};
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/infer_data_manager_shm.cc b/src/c++/perf_analyzer/infer_data_manager_shm.cc
deleted file mode 100644
index 8df7041eb..000000000
--- a/src/c++/perf_analyzer/infer_data_manager_shm.cc
+++ /dev/null
@@ -1,384 +0,0 @@
-// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "infer_data_manager_shm.h"
-
-#include <algorithm>
-
-namespace triton { namespace perfanalyzer {
-
-InferDataManagerShm::~InferDataManagerShm()
-{
-  cb::Error err;
-  if (backend_.get() != nullptr) {
-    err = backend_->UnregisterAllSharedMemory();
-    if (!err.IsOk()) {
-      std::cerr << "Unable to unregister all shared memory regions"
-                << std::endl;
-    }
-    if (shared_memory_type_ == SharedMemoryType::SYSTEM_SHARED_MEMORY) {
-      for (auto& region : shared_memory_regions_) {
-        if (factory_->Kind() !=
-            triton::perfanalyzer::clientbackend::BackendKind::TRITON_C_API) {
-          err = backend_->UnmapSharedMemory(
-              shared_memory_regions_[region.first].data_.get(),
-              shared_memory_regions_[region.first].byte_size_);
-          if (!err.IsOk()) {
-            std::cerr << "Unable to unmap shared memory with key ("
-                      << region.first << "): Starting: "
-                      << static_cast<void*>(
-                             shared_memory_regions_[region.first].data_.get())
-                      << ", size: "
-                      << shared_memory_regions_[region.first].byte_size_
-                      << std::endl;
-          }
-          err = backend_->UnlinkSharedMemoryRegion(region.first);
-          if (!err.IsOk()) {
-            std::cerr << "Unable to unlink shared memory with key: "
-                      << region.first << std::endl;
-          }
-        }
-      }
-    }
-  }
-}
-
-
-cb::Error
-InferDataManagerShm::Init()
-{
-  // TMA-1062 remove the factory from this class and use only the backend
-  RETURN_IF_ERROR(factory_->CreateClientBackend(&backend_));
-  // Calling this function for the clean start
-  backend_->UnregisterAllSharedMemory();
-
-  RETURN_IF_ERROR(CreateOutputMemoryRegions());
-  RETURN_IF_ERROR(CreateAndPopulateInputMemoryRegions());
-
-  return cb::Error::Success;
-}
-
-cb::Error
-InferDataManagerShm::CreateOutputMemoryRegions()
-{
-  // Allocate the shared memory for outputs
-  for (const auto& output : *(parser_->Outputs())) {
-    const std::string& name = output.first;
-    const ModelTensor& tensor = output.second;
-    int64_t batch1_bytesize = ByteSize(tensor.shape_, tensor.datatype_);
-    if (batch1_bytesize < 0) {
-      batch1_bytesize = output_shm_size_;
-    }
-    uint8_t* output_shm_ptr;
-    size_t alloc_size = batch1_bytesize * batch_size_;
-    std::string region_name(TensorToRegionName(name));
-    RETURN_IF_ERROR(CreateMemoryRegion(
-        region_name, shared_memory_type_, alloc_size,
-        reinterpret_cast<void**>(&output_shm_ptr)));
-  }
-  return cb::Error::Success;
-}
-
-cb::Error
-InferDataManagerShm::CreateAndPopulateInputMemoryRegions()
-{
-  // All combinations of input + stream + step
-  //
-  for (const auto& input : *(parser_->Inputs())) {
-    const std::string& name = input.first;
-    const ModelTensor& tensor = input.second;
-    for (int stream_id = 0;
-         stream_id < (int)data_loader_->GetDataStreamsCount(); stream_id++) {
-      for (int step_id = 0;
-           step_id < (int)data_loader_->GetTotalSteps(stream_id);
-           step_id += 1) {
-        RETURN_IF_ERROR(CreateAndPopulateInputMemoryRegion(
-            name, tensor, stream_id, step_id));
-      }
-    }
-  }
-  return cb::Error::Success;
-}
-
-cb::Error
-InferDataManagerShm::CreateAndPopulateInputMemoryRegion(
-    const std::string& name, const ModelTensor& tensor, int stream_id,
-    int step_id)
-{
-  std::vector<TensorData> input_datas;
-  size_t count = 0;
-
-  RETURN_IF_ERROR(GetInputData(name, tensor, stream_id, step_id, input_datas));
-
-  if (tensor.is_shape_tensor_) {
-    RETURN_IF_ERROR(
-        ValidateShapeTensor(tensor, stream_id, step_id, input_datas));
-  }
-
-  size_t alloc_size = 0;
-  for (size_t i = 0; i < input_datas.size(); i++) {
-    if (!input_datas[i].is_valid) {
-      return cb::Error(
-          "Shared memory support in Perf Analyzer does not support "
-          "optional inputs at this time");
-    }
-    alloc_size += input_datas[i].batch1_size;
-  }
-
-  // Generate the shared memory region name
-  std::string region_name(
-      TensorToRegionName(name) + "_" + std::to_string(stream_id) + "_" +
-      std::to_string(step_id));
-  uint8_t* input_shm_ptr;
-  RETURN_IF_ERROR(CreateMemoryRegion(
-      region_name, shared_memory_type_, alloc_size,
-      reinterpret_cast<void**>(&input_shm_ptr)));
-  RETURN_IF_ERROR(CopySharedMemory(
-      input_shm_ptr, input_datas, tensor.is_shape_tensor_, region_name));
-
-  return cb::Error::Success;
-}
-
-cb::Error
-InferDataManagerShm::CreateMemoryRegion(
-    const std::string& shm_region_name, const SharedMemoryType& memory_type,
-    const size_t byte_size, void** ptr)
-{
-  if (memory_type == SharedMemoryType::SYSTEM_SHARED_MEMORY) {
-    if (factory_->Kind() ==
-        triton::perfanalyzer::clientbackend::BackendKind::TRITON_C_API) {
-      *ptr = new uint8_t[byte_size];
-      RETURN_IF_ERROR(
-          backend_->RegisterSystemMemory(shm_region_name, *ptr, byte_size));
-
-      // Set free as the destructor.
-      shared_memory_regions_.emplace(
-          std::piecewise_construct, std::forward_as_tuple(shm_region_name),
-          std::forward_as_tuple(SharedMemoryData(
-              byte_size,
-              std::unique_ptr<uint8_t, std::function<void(uint8_t*)>>(
-                  reinterpret_cast<uint8_t*>(*ptr),
-                  [](uint8_t* memory) { free(memory); }))));
-    } else {
-      std::string shm_key("/" + shm_region_name);
-      int shm_fd_op;
-      RETURN_IF_ERROR(
-          backend_->CreateSharedMemoryRegion(shm_key, byte_size, &shm_fd_op));
-      RETURN_IF_ERROR(backend_->MapSharedMemory(shm_fd_op, 0, byte_size, ptr));
-
-      RETURN_IF_ERROR(backend_->RegisterSystemSharedMemory(
-          shm_region_name, shm_key, byte_size));
-
-      // No-op destruction
-      shared_memory_regions_.emplace(
-          std::piecewise_construct, std::forward_as_tuple(shm_region_name),
-          std::forward_as_tuple(SharedMemoryData(
-              byte_size,
-              std::unique_ptr<uint8_t, std::function<void(uint8_t*)>>(
-                  reinterpret_cast<uint8_t*>(*ptr), [](uint8_t* memory) {}))));
-    }
-  } else if (memory_type == SharedMemoryType::CUDA_SHARED_MEMORY) {
-#ifdef TRITON_ENABLE_GPU
-    cudaError_t cuda_err = cudaMalloc((void**)ptr, byte_size);
-    if (cuda_err != cudaSuccess) {
-      return cb::Error(
-          "unable to allocate memory of " + std::to_string(byte_size) +
-              " bytes on gpu for output: " +
-              std::string(cudaGetErrorString(cuda_err)),
-          pa::GENERIC_ERROR);
-    }
-
-    if (factory_->Kind() ==
-        triton::perfanalyzer::clientbackend::BackendKind::TRITON_C_API) {
-      RETURN_IF_ERROR(
-          backend_->RegisterCudaMemory(shm_region_name, *ptr, byte_size));
-
-      // Set cudaFree as the destructor
-      shared_memory_regions_.emplace(
-          std::piecewise_construct, std::forward_as_tuple(shm_region_name),
-          std::forward_as_tuple(SharedMemoryData(
-              byte_size,
-              std::unique_ptr<uint8_t, std::function<void(uint8_t*)>>(
-                  reinterpret_cast<uint8_t*>(*ptr),
-                  [shm_region_name, byte_size](uint8_t* memory) {
-                    cudaError_t cuda_err = cudaFree(memory);
-                    if (cuda_err != cudaSuccess) {
-                      std::cerr
-                          << "Unable to free cuda shared memory for "
-                          << shm_region_name
-                          << ": Starting: " << static_cast<void*>(memory)
-                          << ", size: " << byte_size
-                          << " bytes, Details: " << cudaGetErrorString(cuda_err)
-                          << std::endl;
-                    }
-                  }))));
-    } else {
-      cudaIpcMemHandle_t cuda_handle;
-      RETURN_IF_ERROR(
-          CreateCUDAIPCHandle(&cuda_handle, reinterpret_cast<void*>(*ptr)));
-      RETURN_IF_ERROR(backend_->RegisterCudaSharedMemory(
-          shm_region_name, cuda_handle, byte_size));
-
-      // No operation required for deleting the memory
-      shared_memory_regions_.emplace(
-          std::piecewise_construct, std::forward_as_tuple(shm_region_name),
-          std::forward_as_tuple(SharedMemoryData(
-              byte_size,
-              std::unique_ptr<uint8_t, std::function<void(uint8_t*)>>(
-                  reinterpret_cast<uint8_t*>(*ptr), [](uint8_t* memory) {}))));
-    }
-#endif  // TRITON_ENABLE_GPU
-  } else {
-    return cb::Error(
-        "CreateMemoryRegion called with invalid memory region type.",
-        pa::GENERIC_ERROR);
-  }
-
-  return cb::Error::Success;
-}
-
-cb::Error
-InferDataManagerShm::CopySharedMemory(
-    uint8_t* input_shm_ptr, const std::vector<TensorData>& tensor_datas,
-    bool is_shape_tensor, std::string& region_name)
-{
-  if (shared_memory_type_ == SharedMemoryType::SYSTEM_SHARED_MEMORY) {
-    // Populate the region with data
-    size_t count = 0;
-    size_t offset = 0;
-    size_t max_count = is_shape_tensor ? 1 : batch_size_;
-    while (count < max_count) {
-      memcpy(
-          input_shm_ptr + offset, tensor_datas[count].data_ptr,
-          tensor_datas[count].batch1_size);
-      offset += tensor_datas[count].batch1_size;
-      count++;
-    }
-  } else {
-#ifdef TRITON_ENABLE_GPU
-    // Populate the region with data
-    size_t count = 0;
-    size_t offset = 0;
-    size_t max_count = is_shape_tensor ? 1 : batch_size_;
-    while (count < max_count) {
-      cudaError_t cuda_err = cudaMemcpy(
-          (void*)(input_shm_ptr + offset), (void*)tensor_datas[count].data_ptr,
-          tensor_datas[count].batch1_size, cudaMemcpyHostToDevice);
-      if (cuda_err != cudaSuccess) {
-        return cb::Error(
-            "Failed to copy data to cuda shared memory for " + region_name +
-                " : " + std::string(cudaGetErrorString(cuda_err)),
-            pa::GENERIC_ERROR);
-      }
-      offset += tensor_datas[count].batch1_size;
-      count++;
-    }
-#endif  // TRITON_ENABLE_GPU
-  }
-  return cb::Error::Success;
-}
-
-cb::Error
-InferDataManagerShm::InitInferDataInput(
-    const std::string& name, const ModelTensor& model_tensor,
-    InferData& infer_data)
-{
-  std::vector<int64_t> shape;
-  RETURN_IF_ERROR(data_loader_->GetInputShape(model_tensor, 0, 0, &shape));
-  if (!shape.empty()) {
-    if ((parser_->MaxBatchSize() != 0) && (!model_tensor.is_shape_tensor_)) {
-      shape.insert(shape.begin(), (int64_t)batch_size_);
-    }
-  } else {
-    return cb::Error("unable to set shape for the input", pa::GENERIC_ERROR);
-  }
-
-  cb::InferInput* infer_input;
-  RETURN_IF_ERROR(CreateInferInput(
-      &infer_input, backend_kind_, name, shape, model_tensor.datatype_));
-  infer_data.inputs_.push_back(infer_input);
-
-  // FIXME: TMA-765 - Shared memory mode does not support optional inputs,
-  // currently, and will be implemented in the associated story.
-  infer_data.valid_inputs_.push_back(infer_input);
-
-  std::string region_name(
-      TensorToRegionName(name) + "_" + std::to_string(0) + "_" +
-      std::to_string(0));
-  RETURN_IF_ERROR(infer_input->SetSharedMemory(
-      region_name, shared_memory_regions_[region_name].byte_size_));
-
-  AddInferDataParameters(infer_data);
-
-  return cb::Error::Success;
-}
-
-cb::Error
-InferDataManagerShm::InitInferDataOutput(
-    const std::string& name, const ModelTensor& model_tensor,
-    InferData& infer_data)
-{
-  cb::InferRequestedOutput* requested_output;
-  RETURN_IF_ERROR(cb::InferRequestedOutput::Create(
-      &requested_output, backend_kind_, name, model_tensor.datatype_));
-  infer_data.outputs_.push_back(requested_output);
-
-  std::string region_name(TensorToRegionName(name));
-  RETURN_IF_ERROR(requested_output->SetSharedMemory(
-      region_name, shared_memory_regions_[region_name].byte_size_));
-
-  return cb::Error::Success;
-}
-
-cb::Error
-InferDataManagerShm::UpdateInputs(
-    const size_t thread_id, const int stream_index, const int step_index,
-    InferData& infer_data)
-{
-  for (const auto& input : infer_data.inputs_) {
-    RETURN_IF_ERROR(input->Reset());
-    const auto& model_input = (*(parser_->Inputs()))[input->Name()];
-
-    std::string region_name(
-        TensorToRegionName(input->Name()) + '_' + std::to_string(stream_index) +
-        "_" + std::to_string(step_index));
-
-    std::vector<int64_t> shape;
-    RETURN_IF_ERROR(data_loader_->GetInputShape(
-        model_input, stream_index, step_index, &shape));
-    if (!shape.empty()) {
-      if ((parser_->MaxBatchSize() != 0) && (!model_input.is_shape_tensor_)) {
-        shape.insert(shape.begin(), (int64_t)batch_size_);
-      }
-      input->SetShape(shape);
-    }
-    RETURN_IF_ERROR(input->SetSharedMemory(
-        region_name, shared_memory_regions_[region_name].byte_size_));
-  }
-  return cb::Error::Success;
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/infer_data_manager_shm.h b/src/c++/perf_analyzer/infer_data_manager_shm.h
deleted file mode 100644
index 6a5ac9db6..000000000
--- a/src/c++/perf_analyzer/infer_data_manager_shm.h
+++ /dev/null
@@ -1,164 +0,0 @@
-// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "client_backend/client_backend.h"
-#include "constants.h"
-#include "data_loader.h"
-#include "infer_data.h"
-#include "infer_data_manager_base.h"
-#include "model_parser.h"
-#include "perf_utils.h"
-
-namespace triton { namespace perfanalyzer {
-
-namespace {
-
-#ifdef TRITON_ENABLE_GPU
-
-#include <cuda_runtime_api.h>
-
-#define RETURN_IF_CUDA_ERR(FUNC)                               \
-  {                                                            \
-    const cudaError_t result = FUNC;                           \
-    if (result != cudaSuccess) {                               \
-      return cb::Error(                                        \
-          "CUDA exception (line " + std::to_string(__LINE__) + \
-              "): " + cudaGetErrorName(result) + " (" +        \
-              cudaGetErrorString(result) + ")",                \
-          pa::GENERIC_ERROR);                                  \
-    }                                                          \
-  }
-
-cb::Error
-CreateCUDAIPCHandle(
-    cudaIpcMemHandle_t* cuda_handle, void* input_d_ptr, int device_id = 0)
-{
-  // Set the GPU device to the desired GPU
-  RETURN_IF_CUDA_ERR(cudaSetDevice(device_id));
-
-  //  Create IPC handle for data on the gpu
-  RETURN_IF_CUDA_ERR(cudaIpcGetMemHandle(cuda_handle, input_d_ptr));
-
-  return cb::Error::Success;
-}
-
-#endif  // TRITON_ENABLE_GPU
-
-}  // namespace
-
-/// Holds information about the shared memory locations
-struct SharedMemoryData {
-  SharedMemoryData(
-      size_t byte_size,
-      std::unique_ptr<uint8_t, std::function<void(uint8_t*)>> data)
-      : byte_size_(byte_size), data_(std::move(data))
-  {
-  }
-
-  SharedMemoryData() {}
-
-  // Byte size
-  size_t byte_size_;
-
-  // Unique pointer holding the shared memory data
-  std::unique_ptr<uint8_t, std::function<void(uint8_t*)>> data_;
-};
-
-/// Manages infer data to prepare an inference request and the resulting
-/// inference output from triton server
-class InferDataManagerShm : public InferDataManagerBase {
- public:
-  InferDataManagerShm(
-      const int32_t batch_size, const SharedMemoryType shared_memory_type,
-      const size_t output_shm_size,
-      const std::unordered_map<std::string, cb::RequestParameter>&
-          request_parameters,
-      const std::shared_ptr<ModelParser>& parser,
-      const std::shared_ptr<cb::ClientBackendFactory>& factory,
-      const std::shared_ptr<DataLoader>& data_loader)
-      : shared_memory_type_(shared_memory_type),
-        output_shm_size_(output_shm_size),
-        InferDataManagerBase(
-            batch_size, request_parameters, parser, factory, data_loader)
-  {
-  }
-
-  ~InferDataManagerShm();
-
-  /// Initialize this object. Must be called before any other functions
-  /// \return cb::Error object indicating success or failure.
-  cb::Error Init() override;
-
- protected:
-  cb::Error CreateOutputMemoryRegions();
-  cb::Error CreateAndPopulateInputMemoryRegions();
-  cb::Error CreateAndPopulateInputMemoryRegion(
-      const std::string& name, const ModelTensor& tensor, int stream_id,
-      int step_id);
-
-  /// Create a memory region.
-  /// \return cb::Error object indicating success or failure.
-  cb::Error CreateMemoryRegion(
-      const std::string& shm_region_name, const SharedMemoryType& memory_type,
-      const size_t byte_size, void** ptr);
-
-  /// \brief Helper function to handle copying shared memory to the correct
-  /// memory region
-  /// \param input_shm_ptr Pointer to the shared memory for a specific input
-  /// \param input_datas The TensorDatas to be copied
-  /// \param is_shape_tensor Is the input a shape tensor
-  /// \param region_name Name of the shared memory region
-  /// \return cb::Error object indicating success or failure
-  virtual cb::Error CopySharedMemory(
-      uint8_t* input_shm_ptr, const std::vector<TensorData>& input_datas,
-      bool is_shape_tensor, std::string& region_name);
-
-  cb::Error InitInferDataInput(
-      const std::string& name, const ModelTensor& model_tensor,
-      InferData& infer_data) override;
-
-  cb::Error InitInferDataOutput(
-      const std::string& name, const ModelTensor& model_tensor,
-      InferData& infer_data) override;
-
-  /// Helper function to update the inputs
-  /// \param thread_id The ID of the calling thread
-  /// \param stream_index The data stream to use for next data
-  /// \param step_index The step index to use for next data
-  /// \param infer_data The target InferData object
-  /// \return cb::Error object indicating success or failure.
-  virtual cb::Error UpdateInputs(
-      size_t thread_id, const int stream_index, const int step_index,
-      InferData& infer_data) override;
-
-  SharedMemoryType shared_memory_type_;
-  size_t output_shm_size_;
-  // Map from shared memory key to its starting address and size
-  std::unordered_map<std::string, SharedMemoryData> shared_memory_regions_;
-};
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/inference_profiler.cc b/src/c++/perf_analyzer/inference_profiler.cc
deleted file mode 100644
index a36f51c10..000000000
--- a/src/c++/perf_analyzer/inference_profiler.cc
+++ /dev/null
@@ -1,1867 +0,0 @@
-// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "inference_profiler.h"
-
-#include <math.h>
-
-#include <algorithm>
-#include <iomanip>
-#include <iostream>
-#include <limits>
-#include <queue>
-#include <sstream>
-#include <stdexcept>
-
-#include "client_backend/client_backend.h"
-#include "constants.h"
-#include "doctest.h"
-
-namespace triton { namespace perfanalyzer {
-cb::Error
-ReportPrometheusMetrics(const Metrics& metrics)
-{
-  const size_t max_num_gpus_in_stdout{16};
-  if (metrics.gpu_utilization_per_gpu.size() > max_num_gpus_in_stdout ||
-      metrics.gpu_power_usage_per_gpu.size() > max_num_gpus_in_stdout ||
-      metrics.gpu_memory_used_bytes_per_gpu.size() > max_num_gpus_in_stdout ||
-      metrics.gpu_memory_total_bytes_per_gpu.size() > max_num_gpus_in_stdout) {
-    std::cout << "Too many GPUs on system to print out individual Prometheus "
-                 "metrics, use the CSV output feature to see metrics."
-              << std::endl;
-    return cb::Error::Success;
-  }
-
-  std::cout << "    Avg GPU Utilization:" << std::endl;
-  for (const auto& gpu_uuid_metric_pair : metrics.gpu_utilization_per_gpu) {
-    const auto gpu_uuid{gpu_uuid_metric_pair.first};
-    const auto metric{gpu_uuid_metric_pair.second};
-    std::cout << "      " << gpu_uuid << " : " << (metric * 100.0) << "%"
-              << std::endl;
-  }
-
-  std::cout << "    Avg GPU Power Usage:" << std::endl;
-  for (const auto& gpu_uuid_metric_pair : metrics.gpu_power_usage_per_gpu) {
-    const auto gpu_uuid{gpu_uuid_metric_pair.first};
-    const auto metric{gpu_uuid_metric_pair.second};
-    std::cout << "      " << gpu_uuid << " : " << metric << " watts"
-              << std::endl;
-  }
-
-  std::cout << "    Max GPU Memory Usage:" << std::endl;
-  for (const auto& gpu_uuid_metric_pair :
-       metrics.gpu_memory_used_bytes_per_gpu) {
-    const auto gpu_uuid{gpu_uuid_metric_pair.first};
-    const auto metric{gpu_uuid_metric_pair.second};
-    std::cout << "      " << gpu_uuid << " : " << metric << " bytes"
-              << std::endl;
-  }
-
-  std::cout << "    Total GPU Memory:" << std::endl;
-  for (const auto& gpu_uuid_metric_pair :
-       metrics.gpu_memory_total_bytes_per_gpu) {
-    const auto gpu_uuid{gpu_uuid_metric_pair.first};
-    const auto metric{gpu_uuid_metric_pair.second};
-    std::cout << "      " << gpu_uuid << " : " << metric << " bytes"
-              << std::endl;
-  }
-
-  return cb::Error::Success;
-}
-
-namespace {
-
-inline uint64_t
-AverageDurationInUs(const uint64_t total_time_in_ns, const uint64_t cnt)
-{
-  if (cnt == 0) {
-    return 0;
-  }
-  return total_time_in_ns / (cnt * 1000);
-}
-
-EnsembleDurations
-GetTotalEnsembleDurations(const ServerSideStats& stats)
-{
-  EnsembleDurations result;
-  // Calculate avg cache hit latency and cache miss latency for ensemble model
-  // in case top level response caching is enabled.
-  const uint64_t ensemble_cache_hit_cnt = stats.cache_hit_count;
-  const uint64_t ensemble_cache_miss_cnt = stats.cache_miss_count;
-  result.total_cache_hit_time_avg_us +=
-      AverageDurationInUs(stats.cache_hit_time_ns, ensemble_cache_hit_cnt);
-  result.total_cache_miss_time_avg_us +=
-      AverageDurationInUs(stats.cache_miss_time_ns, ensemble_cache_miss_cnt);
-  for (const auto& model_stats : stats.composing_models_stat) {
-    if (model_stats.second.composing_models_stat.empty()) {
-      // Cache hit count covers cache hits, not related to compute times
-      const uint64_t cache_hit_cnt = model_stats.second.cache_hit_count;
-      // cache_miss_cnt should either equal infer_cnt or be zero if
-      // cache is disabled or not supported for the model/scheduler type
-      const uint64_t cache_miss_cnt = model_stats.second.cache_miss_count;
-
-      result.total_queue_time_avg_us += AverageDurationInUs(
-          model_stats.second.queue_time_ns, model_stats.second.queue_count);
-      const uint64_t compute_time = model_stats.second.compute_input_time_ns +
-                                    model_stats.second.compute_infer_time_ns +
-                                    model_stats.second.compute_output_time_ns;
-      if (model_stats.second.compute_input_count !=
-              model_stats.second.compute_infer_count ||
-          model_stats.second.compute_infer_count !=
-              model_stats.second.compute_output_count) {
-        throw std::runtime_error(
-            "Server side statistics compute counts must be the same.");
-      }
-      const uint64_t compute_cnt = model_stats.second.compute_input_count;
-      result.total_compute_time_avg_us +=
-          AverageDurationInUs(compute_time, compute_cnt);
-      result.total_cache_hit_time_avg_us += AverageDurationInUs(
-          model_stats.second.cache_hit_time_ns, cache_hit_cnt);
-      result.total_cache_miss_time_avg_us += AverageDurationInUs(
-          model_stats.second.cache_miss_time_ns, cache_miss_cnt);
-      // Track combined cache/compute total avg for reporting latency with cache
-      // enabled
-      result.total_combined_cache_compute_time_avg_us += AverageDurationInUs(
-          compute_time + model_stats.second.cache_hit_time_ns +
-              model_stats.second.cache_miss_time_ns,
-          compute_cnt + cache_hit_cnt);
-    } else {
-      const auto this_ensemble_duration =
-          GetTotalEnsembleDurations(model_stats.second);
-      result.total_queue_time_avg_us +=
-          this_ensemble_duration.total_queue_time_avg_us;
-      result.total_compute_time_avg_us +=
-          this_ensemble_duration.total_compute_time_avg_us;
-      result.total_cache_hit_time_avg_us +=
-          this_ensemble_duration.total_cache_hit_time_avg_us;
-      result.total_cache_miss_time_avg_us +=
-          this_ensemble_duration.total_cache_miss_time_avg_us;
-      result.total_combined_cache_compute_time_avg_us +=
-          this_ensemble_duration.total_combined_cache_compute_time_avg_us;
-    }
-  }
-  return result;
-}
-
-
-size_t
-GetOverheadDuration(size_t total_time, size_t queue_time, size_t compute_time)
-{
-  return (total_time > queue_time + compute_time)
-             ? (total_time - queue_time - compute_time)
-             : 0;
-}
-
-cb::Error
-ReportServerSideStats(
-    const ServerSideStats& stats, const int iteration,
-    const std::shared_ptr<ModelParser>& parser)
-{
-  const std::string ident = std::string(2 * iteration, ' ');
-
-  // Infer/exec counts cover compute time done in inference backends,
-  // not related to cache hit times
-  const uint64_t exec_cnt = stats.execution_count;
-  const uint64_t infer_cnt = stats.inference_count;
-  // Cache hit count covers cache hits, not related to compute times
-  const uint64_t cache_hit_cnt = stats.cache_hit_count;
-  const uint64_t cache_miss_cnt = stats.cache_miss_count;
-
-  // Success count covers all successful requests, cumulative time, queue
-  // time, compute, and cache
-  const uint64_t cnt = stats.success_count;
-  if (cnt == 0) {
-    std::cout << ident << "  Request count: " << cnt << std::endl;
-    return cb::Error::Success;
-  }
-
-  const uint64_t cumm_avg_us = AverageDurationInUs(stats.cumm_time_ns, cnt);
-
-  std::cout << ident << "  Inference count: " << infer_cnt << std::endl
-            << ident << "  Execution count: " << exec_cnt << std::endl;
-  if (parser->ResponseCacheEnabled()) {
-    std::cout << ident << "  Cache hit count: " << cache_hit_cnt << std::endl;
-    std::cout << ident << "  Cache miss count: " << cache_miss_cnt << std::endl;
-  }
-  std::cout << ident << "  Successful request count: " << cnt << std::endl
-            << ident << "  Avg request latency: " << cumm_avg_us << " usec";
-
-  // Non-ensemble model
-  if (stats.composing_models_stat.empty()) {
-    const uint64_t queue_avg_us =
-        AverageDurationInUs(stats.queue_time_ns, stats.queue_count);
-    const uint64_t compute_input_avg_us = AverageDurationInUs(
-        stats.compute_input_time_ns, stats.compute_input_count);
-    const uint64_t compute_infer_avg_us = AverageDurationInUs(
-        stats.compute_infer_time_ns, stats.compute_infer_count);
-    const uint64_t compute_output_avg_us = AverageDurationInUs(
-        stats.compute_output_time_ns, stats.compute_output_count);
-    const uint64_t compute_time = stats.compute_input_time_ns +
-                                  stats.compute_infer_time_ns +
-                                  stats.compute_output_time_ns;
-    if (stats.compute_input_count != stats.compute_infer_count ||
-        stats.compute_infer_count != stats.compute_output_count) {
-      throw std::runtime_error(
-          "Server side statistics compute counts must be the same.");
-    }
-    const uint64_t compute_cnt = stats.compute_input_count;
-    const uint64_t compute_avg_us =
-        AverageDurationInUs(compute_time, compute_cnt);
-    const uint64_t cache_hit_avg_us =
-        AverageDurationInUs(stats.cache_hit_time_ns, cache_hit_cnt);
-    const uint64_t cache_miss_avg_us =
-        AverageDurationInUs(stats.cache_miss_time_ns, cache_miss_cnt);
-    const uint64_t total_compute_time_ns = stats.compute_input_time_ns +
-                                           stats.compute_infer_time_ns +
-                                           stats.compute_output_time_ns;
-    // Get the average of cache hits and misses across successful requests
-    const uint64_t combined_cache_compute_avg_us = AverageDurationInUs(
-        stats.cache_hit_time_ns + stats.cache_miss_time_ns +
-            total_compute_time_ns,
-        compute_cnt + cache_hit_cnt);
-
-    if (parser->ResponseCacheEnabled()) {
-      const uint64_t overhead_avg_us = GetOverheadDuration(
-          cumm_avg_us, queue_avg_us, combined_cache_compute_avg_us);
-      std::cout << " (overhead " << overhead_avg_us << " usec + "
-                << "queue " << queue_avg_us << " usec + "
-                << "cache hit/miss " << combined_cache_compute_avg_us
-                << " usec)" << std::endl;
-      std::cout << ident << ident
-                << "  Average Cache Hit Latency: " << cache_hit_avg_us
-                << " usec" << std::endl;
-      std::cout << ident << ident << "  Average Cache Miss Latency: "
-                << cache_miss_avg_us + compute_avg_us << " usec "
-                << "(cache lookup/insertion " << cache_miss_avg_us << " usec + "
-                << "compute input " << compute_input_avg_us << " usec + "
-                << "compute infer " << compute_infer_avg_us << " usec + "
-                << "compute output " << compute_output_avg_us << " usec)"
-                << std::endl
-                << std::endl;
-    }
-    // Response Cache Disabled
-    else {
-      std::cout << " (overhead "
-                << GetOverheadDuration(
-                       cumm_avg_us, queue_avg_us, compute_avg_us)
-                << " usec + "
-                << "queue " << queue_avg_us << " usec + "
-                << "compute input " << compute_input_avg_us << " usec + "
-                << "compute infer " << compute_infer_avg_us << " usec + "
-                << "compute output " << compute_output_avg_us << " usec)"
-                << std::endl
-                << std::endl;
-
-      if (cache_hit_avg_us > 0 || cache_miss_avg_us > 0) {
-        std::cerr << "Response Cache is disabled for model ["
-                  << parser->ModelName()
-                  << "] but cache hit/miss latency is non-zero." << std::endl;
-      }
-    }
-  }
-  // Ensemble Model
-  else {
-    const auto ensemble_times = GetTotalEnsembleDurations(stats);
-    // Response Cache Enabled
-    if (parser->ResponseCacheEnabled()) {
-      const uint64_t overhead_avg_us = GetOverheadDuration(
-          cumm_avg_us, ensemble_times.total_queue_time_avg_us,
-          ensemble_times.total_combined_cache_compute_time_avg_us);
-      // FIXME - Refactor these calculations in case of ensemble top level
-      // response cache is enabled
-      if (!parser->TopLevelResponseCachingEnabled()) {
-        std::cout << " (overhead " << overhead_avg_us << " usec + "
-                  << "queue " << ensemble_times.total_queue_time_avg_us
-                  << " usec + "
-                  << "cache hit/miss "
-                  << ensemble_times.total_combined_cache_compute_time_avg_us
-                  << " usec)" << std::endl;
-      } else {
-        std::cout << std::endl;
-      }
-      std::cout << ident << ident << "  Average Cache Hit Latency: "
-                << ensemble_times.total_cache_hit_time_avg_us << " usec"
-                << std::endl;
-      std::cout << ident << ident << "  Average Cache Miss Latency: "
-                << ensemble_times.total_cache_miss_time_avg_us +
-                       ensemble_times.total_compute_time_avg_us
-                << " usec " << std::endl
-                << std::endl;
-    }
-    // Response Cache Disabled
-    else {
-      std::cout << " (overhead "
-                << GetOverheadDuration(
-                       cumm_avg_us, ensemble_times.total_queue_time_avg_us,
-                       ensemble_times.total_compute_time_avg_us)
-                << " usec + "
-                << "queue " << ensemble_times.total_queue_time_avg_us
-                << " usec + "
-                << "compute " << ensemble_times.total_compute_time_avg_us
-                << " usec)" << std::endl
-                << std::endl;
-    }
-
-    // List out composing models of ensemble model
-    std::cout << ident << "Composing models: " << std::endl;
-    for (const auto& model_stats : stats.composing_models_stat) {
-      const auto& model_identifier = model_stats.first;
-      std::cout << ident << model_identifier.first
-                << ", version: " << model_identifier.second << std::endl;
-      ReportServerSideStats(model_stats.second, iteration + 1, parser);
-    }
-  }
-
-  return cb::Error::Success;
-}
-
-cb::Error
-ReportClientSideStats(
-    const ClientSideStats& stats, const int64_t percentile,
-    const cb::ProtocolType protocol, const bool verbose,
-    const bool on_sequence_model, const bool include_lib_stats,
-    const double overhead_pct, const double send_request_rate,
-    const bool is_decoupled_model)
-{
-  const uint64_t avg_latency_us = stats.avg_latency_ns / 1000;
-  const uint64_t std_us = stats.std_us;
-
-  const uint64_t avg_request_time_us = stats.avg_request_time_ns / 1000;
-  const uint64_t avg_send_time_us = stats.avg_send_time_ns / 1000;
-  const uint64_t avg_receive_time_us = stats.avg_receive_time_ns / 1000;
-  const uint64_t avg_response_wait_time_us =
-      avg_request_time_us - avg_send_time_us - avg_receive_time_us;
-
-  std::string client_library_detail = "    ";
-  if (include_lib_stats) {
-    if (protocol == cb::ProtocolType::GRPC) {
-      client_library_detail +=
-          "Avg gRPC time: " + std::to_string(avg_request_time_us) + " usec (";
-      if (!verbose) {
-        client_library_detail +=
-            "(un)marshal request/response " +
-            std::to_string(avg_send_time_us + avg_receive_time_us) +
-            " usec + response wait " +
-            std::to_string(avg_response_wait_time_us) + " usec)";
-      } else {
-        client_library_detail += "marshal " + std::to_string(avg_send_time_us) +
-                                 " usec + response wait " +
-                                 std::to_string(avg_response_wait_time_us) +
-                                 " usec + unmarshal " +
-                                 std::to_string(avg_receive_time_us) + " usec)";
-      }
-    } else if (protocol == cb::ProtocolType::HTTP) {
-      client_library_detail +=
-          "Avg HTTP time: " + std::to_string(avg_request_time_us) + " usec (";
-      if (!verbose) {
-        client_library_detail +=
-            "send/recv " +
-            std::to_string(avg_send_time_us + avg_receive_time_us) +
-            " usec + response wait " +
-            std::to_string(avg_response_wait_time_us) + " usec)";
-      } else {
-        client_library_detail += "send " + std::to_string(avg_send_time_us) +
-                                 " usec + response wait " +
-                                 std::to_string(avg_response_wait_time_us) +
-                                 " usec + receive " +
-                                 std::to_string(avg_receive_time_us) + " usec)";
-      }
-    }
-  }
-
-  std::cout << "    Request count: " << stats.request_count << std::endl;
-  double delay_pct =
-      ((double)stats.delayed_request_count / stats.request_count) * 100;
-  if (delay_pct > DELAY_PCT_THRESHOLD) {
-    std::cout << "    "
-              << "Avg send request rate: " << std::fixed << std::setprecision(2)
-              << send_request_rate << " infer/sec" << std::endl;
-    std::cout << "    "
-              << "[WARNING] Perf Analyzer was not able to keep up with the "
-                 "desired request rate. ";
-    std::cout << delay_pct << "% of the requests were delayed. " << std::endl;
-  }
-  if (on_sequence_model) {
-    std::cout << "    Sequence count: " << stats.sequence_count << " ("
-              << stats.sequence_per_sec << " seq/sec)" << std::endl;
-  }
-  std::cout << "    Throughput: " << stats.infer_per_sec << " infer/sec"
-            << std::endl;
-  if (is_decoupled_model) {
-    std::cout << "    Response Throughput: " << stats.responses_per_sec
-              << " infer/sec" << std::endl;
-  }
-
-  if (verbose) {
-    std::stringstream client_overhead{""};
-    client_overhead << "    "
-                    << "Avg client overhead: " << std::fixed
-                    << std::setprecision(2) << overhead_pct << "%";
-    std::cout << client_overhead.str() << std::endl;
-  }
-
-  if (percentile == -1) {
-    std::cout << "    Avg latency: " << avg_latency_us << " usec"
-              << " (standard deviation " << std_us << " usec)" << std::endl;
-  }
-  for (const auto& percentile : stats.percentile_latency_ns) {
-    std::cout << "    p" << percentile.first
-              << " latency: " << (percentile.second / 1000) << " usec"
-              << std::endl;
-  }
-
-  std::cout << client_library_detail << std::endl;
-
-  return cb::Error::Success;
-}
-
-cb::Error
-Report(
-    const PerfStatus& summary, const int64_t percentile,
-    const cb::ProtocolType protocol, const bool verbose,
-    const bool include_lib_stats, const bool include_server_stats,
-    const std::shared_ptr<ModelParser>& parser,
-    const bool should_collect_metrics, const double overhead_pct_threshold)
-{
-  std::cout << "  Client: " << std::endl;
-  ReportClientSideStats(
-      summary.client_stats, percentile, protocol, verbose,
-      summary.on_sequence_model, include_lib_stats, summary.overhead_pct,
-      summary.send_request_rate, parser->IsDecoupled());
-
-  if (include_server_stats) {
-    std::cout << "  Server: " << std::endl;
-    ReportServerSideStats(summary.server_stats, 1, parser);
-  }
-
-  if (should_collect_metrics) {
-    std::cout << "  Server Prometheus Metrics: " << std::endl;
-    ReportPrometheusMetrics(summary.metrics.front());
-  }
-
-  if (summary.overhead_pct > overhead_pct_threshold) {
-    std::cout << "[WARNING] Perf Analyzer is not able to keep up with the "
-                 "desired load. The results may not be accurate."
-              << std::endl;
-  }
-  return cb::Error::Success;
-}
-
-}  // namespace
-
-cb::Error
-InferenceProfiler::Create(
-    const bool verbose, const double stability_threshold,
-    const uint64_t measurement_window_ms, const size_t max_trials,
-    const int64_t percentile, const uint64_t latency_threshold_ms_,
-    const cb::ProtocolType protocol, std::shared_ptr<ModelParser>& parser,
-    std::shared_ptr<cb::ClientBackend> profile_backend,
-    std::unique_ptr<LoadManager> manager,
-    std::unique_ptr<InferenceProfiler>* profiler,
-    uint64_t measurement_request_count, MeasurementMode measurement_mode,
-    std::shared_ptr<MPIDriver> mpi_driver, const uint64_t metrics_interval_ms,
-    const bool should_collect_metrics, const double overhead_pct_threshold,
-    const bool async_mode,
-    const std::shared_ptr<ProfileDataCollector> collector,
-    const bool should_collect_profile_data)
-{
-  std::unique_ptr<InferenceProfiler> local_profiler(new InferenceProfiler(
-      verbose, stability_threshold, measurement_window_ms, max_trials,
-      (percentile != -1), percentile, latency_threshold_ms_, protocol, parser,
-      profile_backend, std::move(manager), measurement_request_count,
-      measurement_mode, mpi_driver, metrics_interval_ms, should_collect_metrics,
-      overhead_pct_threshold, async_mode, collector,
-      should_collect_profile_data));
-
-  *profiler = std::move(local_profiler);
-  return cb::Error::Success;
-}
-
-InferenceProfiler::InferenceProfiler(
-    const bool verbose, const double stability_threshold,
-    const int32_t measurement_window_ms, const size_t max_trials,
-    const bool extra_percentile, const size_t percentile,
-    const uint64_t latency_threshold_ms_, const cb::ProtocolType protocol,
-    std::shared_ptr<ModelParser>& parser,
-    std::shared_ptr<cb::ClientBackend> profile_backend,
-    std::unique_ptr<LoadManager> manager, uint64_t measurement_request_count,
-    MeasurementMode measurement_mode, std::shared_ptr<MPIDriver> mpi_driver,
-    const uint64_t metrics_interval_ms, const bool should_collect_metrics,
-    const double overhead_pct_threshold, const bool async_mode,
-    const std::shared_ptr<ProfileDataCollector> collector,
-    const bool should_collect_profile_data)
-    : verbose_(verbose), measurement_window_ms_(measurement_window_ms),
-      max_trials_(max_trials), extra_percentile_(extra_percentile),
-      percentile_(percentile), latency_threshold_ms_(latency_threshold_ms_),
-      protocol_(protocol), parser_(parser), profile_backend_(profile_backend),
-      manager_(std::move(manager)),
-      measurement_request_count_(measurement_request_count),
-      measurement_mode_(measurement_mode), mpi_driver_(mpi_driver),
-      should_collect_metrics_(should_collect_metrics),
-      overhead_pct_threshold_(overhead_pct_threshold), async_mode_(async_mode),
-      collector_(collector),
-      should_collect_profile_data_(should_collect_profile_data)
-{
-  load_parameters_.stability_threshold = stability_threshold;
-  load_parameters_.stability_window = 3;
-  if (profile_backend_->Kind() == cb::BackendKind::TRITON ||
-      profile_backend_->Kind() == cb::BackendKind::TRITON_C_API) {
-    // Measure and report client library stats only when the model
-    // is not decoupled.
-    include_lib_stats_ = (!parser_->IsDecoupled());
-    // Measure and report server statistics only when the server
-    // supports the statistics extension.
-    std::set<std::string> extensions;
-    profile_backend_->ServerExtensions(&extensions);
-    include_server_stats_ = (extensions.find("statistics") != extensions.end());
-  } else {
-    include_lib_stats_ = true;
-    include_server_stats_ = false;
-  }
-  if (should_collect_metrics_) {
-    metrics_manager_ =
-        std::make_shared<MetricsManager>(profile_backend, metrics_interval_ms);
-  }
-}
-
-cb::Error
-InferenceProfiler::Profile(
-    const size_t concurrent_request_count, const size_t request_count,
-    std::vector<PerfStatus>& perf_statuses, bool& meets_threshold,
-    bool& is_stable)
-{
-  cb::Error err;
-  PerfStatus perf_status{};
-
-  perf_status.concurrency = concurrent_request_count;
-
-  is_stable = false;
-  meets_threshold = true;
-
-  RETURN_IF_ERROR(
-      dynamic_cast<ConcurrencyManager*>(manager_.get())
-          ->ChangeConcurrencyLevel(concurrent_request_count, request_count));
-
-  err = ProfileHelper(perf_status, request_count, &is_stable);
-  if (err.IsOk()) {
-    uint64_t stabilizing_latency_ms =
-        perf_status.stabilizing_latency_ns / NANOS_PER_MILLIS;
-    if ((stabilizing_latency_ms >= latency_threshold_ms_) &&
-        (latency_threshold_ms_ != NO_LIMIT)) {
-      std::cerr << "Measured latency went over the set limit of "
-                << latency_threshold_ms_ << " msec. " << std::endl;
-      meets_threshold = false;
-    } else if (!is_stable) {
-      if (measurement_mode_ == MeasurementMode::TIME_WINDOWS) {
-        std::cerr << "Failed to obtain stable measurement within "
-                  << max_trials_ << " measurement windows for concurrency "
-                  << concurrent_request_count << ". Please try to "
-                  << "increase the --measurement-interval." << std::endl;
-      } else if (measurement_mode_ == MeasurementMode::COUNT_WINDOWS) {
-        std::cerr << "Failed to obtain stable measurement within "
-                  << max_trials_ << " measurement windows for concurrency "
-                  << concurrent_request_count << ". Please try to "
-                  << "increase the --measurement-request-count." << std::endl;
-      }
-      meets_threshold = false;
-    } else {
-      perf_statuses.push_back(perf_status);
-      err = Report(
-          perf_status, percentile_, protocol_, verbose_, include_lib_stats_,
-          include_server_stats_, parser_, should_collect_metrics_,
-          overhead_pct_threshold_);
-      if (!err.IsOk()) {
-        std::cerr << err;
-        meets_threshold = false;
-      }
-    }
-  } else {
-    return err;
-  }
-
-  return cb::Error::Success;
-}
-
-cb::Error
-InferenceProfiler::Profile(
-    const double request_rate, const size_t request_count,
-    std::vector<PerfStatus>& perf_statuses, bool& meets_threshold,
-    bool& is_stable)
-{
-  cb::Error err;
-  PerfStatus perf_status{};
-
-  perf_status.request_rate = request_rate;
-
-  is_stable = false;
-  meets_threshold = true;
-
-  RETURN_IF_ERROR(dynamic_cast<RequestRateManager*>(manager_.get())
-                      ->ChangeRequestRate(request_rate, request_count));
-  std::cout << "Request Rate: " << request_rate
-            << " inference requests per seconds" << std::endl;
-
-  err = ProfileHelper(perf_status, request_count, &is_stable);
-  if (err.IsOk()) {
-    uint64_t stabilizing_latency_ms =
-        perf_status.stabilizing_latency_ns / NANOS_PER_MILLIS;
-    if ((stabilizing_latency_ms >= latency_threshold_ms_) &&
-        (latency_threshold_ms_ != NO_LIMIT)) {
-      std::cerr << "Measured latency went over the set limit of "
-                << latency_threshold_ms_ << " msec. " << std::endl;
-      meets_threshold = false;
-    } else if (!is_stable) {
-      std::cerr << "Failed to obtain stable measurement." << std::endl;
-      meets_threshold = false;
-    } else {
-      perf_statuses.push_back(perf_status);
-      err = Report(
-          perf_status, percentile_, protocol_, verbose_, include_lib_stats_,
-          include_server_stats_, parser_, should_collect_metrics_,
-          overhead_pct_threshold_);
-      if (!err.IsOk()) {
-        std::cerr << err;
-        meets_threshold = false;
-      }
-    }
-  } else {
-    return err;
-  }
-
-  return cb::Error::Success;
-}
-
-cb::Error
-InferenceProfiler::Profile(
-    const size_t request_count, std::vector<PerfStatus>& perf_statuses,
-    bool& meets_threshold, bool& is_stable)
-{
-  cb::Error err;
-  PerfStatus perf_status{};
-
-  RETURN_IF_ERROR(dynamic_cast<CustomLoadManager*>(manager_.get())
-                      ->InitCustomIntervals(request_count));
-  RETURN_IF_ERROR(dynamic_cast<CustomLoadManager*>(manager_.get())
-                      ->GetCustomRequestRate(&perf_status.request_rate));
-
-  is_stable = false;
-  meets_threshold = true;
-
-  err = ProfileHelper(perf_status, request_count, &is_stable);
-  if (err.IsOk()) {
-    uint64_t stabilizing_latency_ms =
-        perf_status.stabilizing_latency_ns / NANOS_PER_MILLIS;
-    if ((stabilizing_latency_ms >= latency_threshold_ms_) &&
-        (latency_threshold_ms_ != NO_LIMIT)) {
-      std::cerr << "Measured latency went over the set limit of "
-                << latency_threshold_ms_ << " msec. " << std::endl;
-      meets_threshold = false;
-    } else if (!is_stable) {
-      std::cerr << "Failed to obtain stable measurement." << std::endl;
-      meets_threshold = false;
-    } else {
-      perf_statuses.push_back(perf_status);
-      err = Report(
-          perf_status, percentile_, protocol_, verbose_, include_lib_stats_,
-          include_server_stats_, parser_, should_collect_metrics_,
-          overhead_pct_threshold_);
-      if (!err.IsOk()) {
-        std::cerr << err;
-        meets_threshold = false;
-      }
-    }
-  } else {
-    return err;
-  }
-
-  return cb::Error::Success;
-}
-
-cb::Error
-InferenceProfiler::ProfileHelper(
-    PerfStatus& experiment_perf_status, size_t request_count, bool* is_stable)
-{
-  // Start measurement
-  LoadStatus load_status;
-  size_t completed_trials = 0;
-  std::queue<cb::Error> error;
-  std::deque<PerfStatus> measurement_perf_statuses;
-  all_request_records_.clear();
-  previous_window_end_ns_ = 0;
-
-  // Start with a fresh empty request records vector in the manager
-  //
-  std::vector<RequestRecord> empty_request_records;
-  RETURN_IF_ERROR(manager_->SwapRequestRecords(empty_request_records));
-
-  do {
-    PerfStatus measurement_perf_status;
-    measurement_perf_status.concurrency = experiment_perf_status.concurrency;
-    measurement_perf_status.request_rate = experiment_perf_status.request_rate;
-    RETURN_IF_ERROR(manager_->CheckHealth());
-
-    MeasureConfig measure_config;
-    if (measurement_mode_ == MeasurementMode::TIME_WINDOWS) {
-      measure_config.measurement_window = measurement_window_ms_;
-      measure_config.is_count_based = false;
-    } else {
-      measure_config.measurement_window = measurement_request_count_;
-      measure_config.is_count_based = true;
-    }
-
-    // When request_count is not 0, the experiment will run for exactly X
-    // requests. In that case, we are not measuring based on window stability,
-    // and instead need to clamp the windows to be from the start of the
-    // first request to the end of the last request of the request count
-    //
-    measure_config.clamp_window = (request_count != 0);
-    error.push(Measure(measurement_perf_status, measure_config));
-    measurement_perf_statuses.push_back(measurement_perf_status);
-
-    if (error.size() > load_parameters_.stability_window) {
-      error.pop();
-      measurement_perf_statuses.pop_front();
-    }
-
-    if (error.back().IsOk()) {
-      load_status.infer_per_sec.push_back(
-          measurement_perf_status.client_stats.infer_per_sec);
-      load_status.latencies.push_back(
-          measurement_perf_status.stabilizing_latency_ns);
-    } else {
-      load_status.infer_per_sec.push_back(0);
-      load_status.latencies.push_back(std::numeric_limits<uint64_t>::max());
-    }
-
-    load_status.avg_ips +=
-        load_status.infer_per_sec.back() / load_parameters_.stability_window;
-    load_status.avg_latency +=
-        load_status.latencies.back() / load_parameters_.stability_window;
-    if (verbose_) {
-      if (error.back().IsOk()) {
-        std::cout << "  Pass [" << (completed_trials + 1)
-                  << "] throughput: " << load_status.infer_per_sec.back()
-                  << " infer/sec. ";
-        if (extra_percentile_) {
-          std::cout << "p" << percentile_ << " latency: "
-                    << (measurement_perf_status.client_stats
-                            .percentile_latency_ns.find(percentile_)
-                            ->second /
-                        1000)
-                    << " usec" << std::endl;
-        } else {
-          std::cout << "Avg latency: "
-                    << (measurement_perf_status.client_stats.avg_latency_ns /
-                        1000)
-                    << " usec (std "
-                    << measurement_perf_status.client_stats.std_us << " usec). "
-                    << std::endl;
-        }
-      } else {
-        std::cout << "  Pass [" << (completed_trials + 1)
-                  << "] cb::Error: " << error.back().Message() << std::endl;
-      }
-    }
-
-    // If request-count is specified, then only measure one window and exit
-    if (request_count != 0) {
-      *is_stable = true;
-      break;
-    }
-
-    *is_stable = DetermineStability(load_status);
-
-    if (IsDoneProfiling(load_status, is_stable)) {
-      break;
-    }
-
-    completed_trials++;
-  } while ((!early_exit) && (completed_trials < max_trials_));
-
-  // For async requests, print a warning if the latency threshold is not met.
-  if (async_mode_ && !*is_stable && DetermineStability(load_status, false)) {
-    std::cerr << "Warning: Request latency is not stabilizing. "
-                 "Please try lowering the request rate."
-              << std::endl;
-    *is_stable = true;
-  }
-
-  if (should_collect_metrics_) {
-    metrics_manager_->StopQueryingMetrics();
-  }
-
-  // return the appropriate error which might have occurred in the
-  // stability_window for its proper handling.
-  while (!error.empty()) {
-    if (!error.front().IsOk()) {
-      return error.front();
-    } else {
-      error.pop();
-    }
-  }
-
-  // Only merge the results if the results have stabilized.
-  if (*is_stable) {
-    RETURN_IF_ERROR(MergePerfStatusReports(
-        measurement_perf_statuses, experiment_perf_status));
-  }
-
-  if (early_exit) {
-    return cb::Error("Received exit signal.", pa::GENERIC_ERROR);
-  }
-  return cb::Error::Success;
-}
-
-bool
-InferenceProfiler::DetermineStability(
-    LoadStatus& load_status, bool check_latency)
-{
-  bool stable = false;
-  if (load_status.infer_per_sec.size() >= load_parameters_.stability_window) {
-    stable = true;
-    size_t idx =
-        load_status.infer_per_sec.size() - load_parameters_.stability_window;
-
-    for (size_t i = idx; i < load_status.infer_per_sec.size(); i++) {
-      if (load_status.infer_per_sec[i] == 0) {
-        stable = false;
-      }
-    }
-
-    stable = stable && CheckWindowForStability(idx, load_status, check_latency);
-  }
-  return stable;
-}
-
-bool
-InferenceProfiler::CheckWindowForStability(
-    size_t idx, LoadStatus& load_status, bool check_latency)
-{
-  return IsInferWindowStable(idx, load_status) &&
-         (!check_latency || IsLatencyWindowStable(idx, load_status));
-}
-
-bool
-InferenceProfiler::IsInferWindowStable(size_t idx, LoadStatus& load_status)
-{
-  auto infer_start = std::begin(load_status.infer_per_sec) + idx;
-  auto infer_per_sec_measurements = std::minmax_element(
-      infer_start, infer_start + load_parameters_.stability_window);
-
-  auto max_infer_per_sec = *infer_per_sec_measurements.second;
-  auto min_infer_per_sec = *infer_per_sec_measurements.first;
-
-  return max_infer_per_sec / min_infer_per_sec <=
-         1 + load_parameters_.stability_threshold;
-}
-
-bool
-InferenceProfiler::IsLatencyWindowStable(size_t idx, LoadStatus& load_status)
-{
-  auto latency_start = std::begin(load_status.latencies) + idx;
-  auto latencies_per_sec_measurements = std::minmax_element(
-      latency_start, latency_start + load_parameters_.stability_window);
-
-  double max_latency = *latencies_per_sec_measurements.second;
-  double min_latency = *latencies_per_sec_measurements.first;
-
-  auto is_stable =
-      max_latency / min_latency <= 1 + load_parameters_.stability_threshold;
-  return max_latency / min_latency <= 1 + load_parameters_.stability_threshold;
-}
-
-bool
-InferenceProfiler::IsDoneProfiling(LoadStatus& load_status, bool* is_stable)
-{
-  bool done = false;
-  bool within_threshold = true;
-  if (load_status.infer_per_sec.size() >= load_parameters_.stability_window) {
-    size_t idx =
-        load_status.infer_per_sec.size() - load_parameters_.stability_window;
-
-    for (; idx < load_status.infer_per_sec.size(); idx++) {
-      within_threshold &= CheckWithinThreshold(idx, load_status);
-    }
-  }
-
-  if (mpi_driver_->IsMPIRun()) {
-    if (AllMPIRanksAreStable(*is_stable)) {
-      done = true;
-    }
-  } else if (*is_stable) {
-    done = true;
-  }
-  if ((!within_threshold) && (latency_threshold_ms_ != NO_LIMIT)) {
-    done = true;
-  }
-  return done;
-}
-
-bool
-InferenceProfiler::CheckWithinThreshold(size_t idx, LoadStatus& load_status)
-{
-  return load_status.latencies[idx] <
-         (latency_threshold_ms_ * NANOS_PER_MILLIS);
-}
-
-cb::Error
-InferenceProfiler::MergeServerSideStats(
-    std::vector<ServerSideStats>& server_side_stats,
-    ServerSideStats& server_side_summary)
-{
-  auto& server_side_stat = server_side_stats[0];
-
-  // Make sure that the perf status reports profiling settings match with each
-  // other.
-  for (size_t i = 1; i < server_side_stats.size(); i++) {
-    if (server_side_stats[i].composing_models_stat.size() !=
-        server_side_stat.composing_models_stat.size()) {
-      return cb::Error(
-          "Inconsistent ensemble setting detected between the trials.",
-          pa::GENERIC_ERROR);
-    }
-  }
-
-  // Initialize the server stats for the merged report.
-  server_side_summary.inference_count = 0;
-  server_side_summary.execution_count = 0;
-  server_side_summary.cache_hit_count = 0;
-  server_side_summary.cache_miss_count = 0;
-  server_side_summary.success_count = 0;
-  server_side_summary.queue_count = 0;
-  server_side_summary.compute_input_count = 0;
-  server_side_summary.compute_output_count = 0;
-  server_side_summary.compute_infer_count = 0;
-  server_side_summary.cumm_time_ns = 0;
-  server_side_summary.queue_time_ns = 0;
-  server_side_summary.compute_input_time_ns = 0;
-  server_side_summary.compute_infer_time_ns = 0;
-  server_side_summary.compute_output_time_ns = 0;
-  server_side_summary.cache_hit_time_ns = 0;
-  server_side_summary.cache_miss_time_ns = 0;
-  server_side_summary.composing_models_stat.clear();
-  for (auto& composing_model_stat : server_side_stat.composing_models_stat) {
-    std::vector<ServerSideStats> composing_model_stats;
-    for (auto& server_side_stat : server_side_stats) {
-      composing_model_stats.push_back(
-          server_side_stat.composing_models_stat[composing_model_stat.first]);
-    }
-
-    ServerSideStats merged_composing_model_stats;
-    RETURN_IF_ERROR(MergeServerSideStats(
-        composing_model_stats, merged_composing_model_stats));
-    server_side_summary.composing_models_stat.insert(
-        {composing_model_stat.first, merged_composing_model_stats});
-  }
-
-  for (auto& server_side_stat : server_side_stats) {
-    // Aggregated Server Stats
-    server_side_summary.inference_count += server_side_stat.inference_count;
-    server_side_summary.execution_count += server_side_stat.execution_count;
-    server_side_summary.cache_hit_count += server_side_stat.cache_hit_count;
-    server_side_summary.cache_miss_count += server_side_stat.cache_miss_count;
-    server_side_summary.success_count += server_side_stat.success_count;
-    server_side_summary.queue_count += server_side_stat.queue_count;
-    server_side_summary.compute_input_count +=
-        server_side_stat.compute_input_count;
-    server_side_summary.compute_infer_count +=
-        server_side_stat.compute_infer_count;
-    server_side_summary.compute_output_count +=
-        server_side_stat.compute_output_count;
-    server_side_summary.cumm_time_ns += server_side_stat.cumm_time_ns;
-    server_side_summary.queue_time_ns += server_side_stat.queue_time_ns;
-    server_side_summary.compute_input_time_ns +=
-        server_side_stat.compute_input_time_ns;
-    server_side_summary.compute_infer_time_ns +=
-        server_side_stat.compute_infer_time_ns;
-    server_side_summary.compute_output_time_ns +=
-        server_side_stat.compute_output_time_ns;
-    server_side_summary.cache_hit_time_ns += server_side_stat.cache_hit_time_ns;
-    server_side_summary.cache_miss_time_ns +=
-        server_side_stat.cache_miss_time_ns;
-  }
-
-  return cb::Error::Success;
-}
-
-cb::Error
-InferenceProfiler::MergePerfStatusReports(
-    std::deque<PerfStatus>& perf_status_reports,
-    PerfStatus& experiment_perf_status)
-{
-  auto& perf_status = perf_status_reports[0];
-
-  // Make sure that the perf status reports profiling settings match with each
-  // other.
-  for (size_t i = 1; i < perf_status_reports.size(); i++) {
-    perf_status.concurrency = experiment_perf_status.concurrency;
-    perf_status.request_rate = experiment_perf_status.request_rate;
-
-    if (perf_status_reports[i].on_sequence_model !=
-        perf_status.on_sequence_model) {
-      return cb::Error(
-          "Inconsistent sequence setting detected.", pa::GENERIC_ERROR);
-    }
-
-    if (perf_status_reports[i].batch_size != perf_status.batch_size) {
-      return cb::Error("Inconsistent batch size detected.", pa::GENERIC_ERROR);
-    }
-
-    if (perf_status_reports[i].server_stats.composing_models_stat.size() !=
-        perf_status.server_stats.composing_models_stat.size()) {
-      return cb::Error(
-          "Inconsistent ensemble setting detected between the trials.",
-          pa::GENERIC_ERROR);
-    }
-  }
-
-  experiment_perf_status.batch_size = perf_status.batch_size;
-  experiment_perf_status.on_sequence_model = perf_status.on_sequence_model;
-
-  // Initialize the client stats for the merged report.
-  experiment_perf_status.client_stats.request_count = 0;
-  experiment_perf_status.client_stats.sequence_count = 0;
-  experiment_perf_status.client_stats.delayed_request_count = 0;
-  experiment_perf_status.client_stats.duration_ns = 0;
-  experiment_perf_status.client_stats.avg_latency_ns = 0;
-  experiment_perf_status.client_stats.percentile_latency_ns.clear();
-  experiment_perf_status.client_stats.latencies.clear();
-  experiment_perf_status.client_stats.std_us = 0;
-  experiment_perf_status.client_stats.avg_request_time_ns = 0;
-  experiment_perf_status.client_stats.avg_send_time_ns = 0;
-  experiment_perf_status.client_stats.avg_receive_time_ns = 0;
-  experiment_perf_status.client_stats.infer_per_sec = 0;
-  experiment_perf_status.client_stats.sequence_per_sec = 0;
-  experiment_perf_status.client_stats.completed_count = 0;
-  experiment_perf_status.stabilizing_latency_ns = 0;
-  experiment_perf_status.overhead_pct = 0;
-  experiment_perf_status.send_request_rate = 0.0;
-
-  std::vector<ServerSideStats> server_side_stats;
-  for (auto& perf_status : perf_status_reports) {
-    // Aggregated Client Stats
-    experiment_perf_status.client_stats.request_count +=
-        perf_status.client_stats.request_count;
-    experiment_perf_status.client_stats.sequence_count +=
-        perf_status.client_stats.sequence_count;
-    experiment_perf_status.client_stats.delayed_request_count +=
-        perf_status.client_stats.delayed_request_count;
-    experiment_perf_status.client_stats.response_count +=
-        perf_status.client_stats.response_count;
-    experiment_perf_status.client_stats.duration_ns +=
-        perf_status.client_stats.duration_ns;
-
-    server_side_stats.push_back(perf_status.server_stats);
-
-    experiment_perf_status.client_stats.latencies.insert(
-        experiment_perf_status.client_stats.latencies.end(),
-        perf_status.client_stats.latencies.begin(),
-        perf_status.client_stats.latencies.end());
-    // Accumulate the overhead percentage and send rate here to remove extra
-    // traversals over the perf_status_reports
-    experiment_perf_status.overhead_pct += perf_status.overhead_pct;
-    experiment_perf_status.send_request_rate += perf_status.send_request_rate;
-  }
-
-  // Calculate the average overhead_pct for the experiment.
-  experiment_perf_status.overhead_pct /= perf_status_reports.size();
-  experiment_perf_status.send_request_rate /= perf_status_reports.size();
-
-  if (include_lib_stats_) {
-    for (auto& perf_status : perf_status_reports) {
-      experiment_perf_status.client_stats.completed_count +=
-          perf_status.client_stats.completed_count;
-
-      experiment_perf_status.client_stats.avg_request_time_ns +=
-          perf_status.client_stats.avg_request_time_ns *
-          perf_status.client_stats.completed_count;
-
-      experiment_perf_status.client_stats.avg_send_time_ns +=
-          perf_status.client_stats.avg_send_time_ns *
-          perf_status.client_stats.completed_count;
-
-      experiment_perf_status.client_stats.avg_receive_time_ns +=
-          perf_status.client_stats.avg_receive_time_ns *
-          perf_status.client_stats.completed_count;
-    }
-
-    if (experiment_perf_status.client_stats.completed_count != 0) {
-      experiment_perf_status.client_stats.avg_request_time_ns =
-          experiment_perf_status.client_stats.avg_request_time_ns /
-          experiment_perf_status.client_stats.completed_count;
-
-      experiment_perf_status.client_stats.avg_send_time_ns =
-          experiment_perf_status.client_stats.avg_send_time_ns /
-          experiment_perf_status.client_stats.completed_count;
-
-      experiment_perf_status.client_stats.avg_receive_time_ns =
-          experiment_perf_status.client_stats.avg_receive_time_ns /
-          experiment_perf_status.client_stats.completed_count;
-    }
-  }
-
-  RETURN_IF_ERROR(MergeServerSideStats(
-      server_side_stats, experiment_perf_status.server_stats));
-
-  std::sort(
-      experiment_perf_status.client_stats.latencies.begin(),
-      experiment_perf_status.client_stats.latencies.end());
-
-  float client_duration_sec =
-      (float)experiment_perf_status.client_stats.duration_ns / NANOS_PER_SECOND;
-  experiment_perf_status.client_stats.sequence_per_sec =
-      experiment_perf_status.client_stats.sequence_count / client_duration_sec;
-  experiment_perf_status.client_stats.infer_per_sec =
-      (experiment_perf_status.client_stats.request_count *
-       experiment_perf_status.batch_size) /
-      client_duration_sec;
-  experiment_perf_status.client_stats.responses_per_sec =
-      experiment_perf_status.client_stats.response_count / client_duration_sec;
-  RETURN_IF_ERROR(SummarizeLatency(
-      experiment_perf_status.client_stats.latencies, experiment_perf_status));
-
-  if (should_collect_metrics_) {
-    // Put all Metric objects in a flat vector so they're easier to merge
-    std::vector<std::reference_wrapper<const Metrics>> all_metrics{};
-    std::for_each(
-        perf_status_reports.begin(), perf_status_reports.end(),
-        [&all_metrics](const PerfStatus& p) {
-          std::for_each(
-              p.metrics.begin(), p.metrics.end(),
-              [&all_metrics](const Metrics& m) { all_metrics.push_back(m); });
-        });
-
-    Metrics merged_metrics{};
-    RETURN_IF_ERROR(MergeMetrics(all_metrics, merged_metrics));
-    experiment_perf_status.metrics.push_back(std::move(merged_metrics));
-  }
-
-  return cb::Error::Success;
-}
-
-cb::Error
-InferenceProfiler::GetServerSideStatus(
-    std::map<cb::ModelIdentifier, cb::ModelStatistics>* model_stats)
-{
-  if ((parser_->SchedulerType() == ModelParser::ENSEMBLE) ||
-      (parser_->SchedulerType() == ModelParser::ENSEMBLE_SEQUENCE)) {
-    RETURN_IF_ERROR(profile_backend_->ModelInferenceStatistics(model_stats));
-  } else {
-    RETURN_IF_ERROR(profile_backend_->ModelInferenceStatistics(
-        model_stats, parser_->ModelName(), parser_->ModelVersion()));
-  }
-  return cb::Error::Success;
-}
-
-// Used for measurement
-cb::Error
-InferenceProfiler::Measure(PerfStatus& perf_status, MeasureConfig config)
-{
-  std::map<cb::ModelIdentifier, cb::ModelStatistics> start_status;
-  std::map<cb::ModelIdentifier, cb::ModelStatistics> end_status;
-  cb::InferStat start_stat;
-  cb::InferStat end_stat;
-
-  manager_->ResetIdleTime();
-
-  // Set current window start time to end of previous window. For first
-  // measurement window, capture start time, server side stats, and client side
-  // stats.
-  uint64_t window_start_ns = previous_window_end_ns_;
-  start_stat = prev_client_side_stats_;
-  start_status = prev_server_side_stats_;
-  if (window_start_ns == 0) {
-    window_start_ns = std::chrono::duration_cast<std::chrono::nanoseconds>(
-                          std::chrono::system_clock::now().time_since_epoch())
-                          .count();
-    if (should_collect_metrics_) {
-      metrics_manager_->StartQueryingMetrics();
-    }
-    if (include_server_stats_) {
-      RETURN_IF_ERROR(GetServerSideStatus(&start_status));
-    }
-    RETURN_IF_ERROR(manager_->GetAccumulatedClientStat(&start_stat));
-  }
-
-  if (should_collect_metrics_) {
-    try {
-      metrics_manager_->CheckQueryingStatus();
-    }
-    catch (const std::exception& e) {
-      return cb::Error(e.what(), pa::GENERIC_ERROR);
-    }
-  }
-
-  if (!config.is_count_based) {
-    // Wait for specified time interval in msec
-    std::this_thread::sleep_for(
-        std::chrono::milliseconds((uint64_t)(config.measurement_window * 1.2)));
-  } else {
-    do {
-      // Check the health of the worker threads.
-      RETURN_IF_ERROR(manager_->CheckHealth());
-
-      // Wait for 1s until enough samples have been collected.
-      std::this_thread::sleep_for(std::chrono::milliseconds((uint64_t)1000));
-    } while (manager_->CountCollectedRequests() < config.measurement_window);
-  }
-
-  uint64_t window_end_ns =
-      std::chrono::duration_cast<std::chrono::nanoseconds>(
-          std::chrono::system_clock::now().time_since_epoch())
-          .count();
-  previous_window_end_ns_ = window_end_ns;
-
-  if (should_collect_metrics_) {
-    metrics_manager_->GetLatestMetrics(perf_status.metrics);
-  }
-
-  // Get server status and then print report on difference between
-  // before and after status.
-  if (include_server_stats_) {
-    RETURN_IF_ERROR(GetServerSideStatus(&end_status));
-    prev_server_side_stats_ = end_status;
-  }
-
-  RETURN_IF_ERROR(manager_->GetAccumulatedClientStat(&end_stat));
-  prev_client_side_stats_ = end_stat;
-
-  std::vector<RequestRecord> current_request_records;
-  RETURN_IF_ERROR(manager_->SwapRequestRecords(current_request_records));
-  all_request_records_.insert(
-      all_request_records_.end(), current_request_records.begin(),
-      current_request_records.end());
-
-  RETURN_IF_ERROR(Summarize(
-      start_status, end_status, start_stat, end_stat, perf_status,
-      window_start_ns, window_end_ns, config.clamp_window));
-
-  return cb::Error::Success;
-}
-
-cb::Error
-InferenceProfiler::Summarize(
-    const std::map<cb::ModelIdentifier, cb::ModelStatistics>& start_status,
-    const std::map<cb::ModelIdentifier, cb::ModelStatistics>& end_status,
-    const cb::InferStat& start_stat, const cb::InferStat& end_stat,
-    PerfStatus& summary, uint64_t window_start_ns, uint64_t window_end_ns,
-    bool clamp_window)
-{
-  size_t valid_sequence_count = 0;
-  size_t delayed_request_count = 0;
-  size_t response_count = 0;
-
-  // Get measurement from requests that fall within the time interval
-  std::pair<uint64_t, uint64_t> valid_range{window_start_ns, window_end_ns};
-  std::vector<uint64_t> latencies;
-  std::vector<RequestRecord> valid_requests{};
-  ValidLatencyMeasurement(
-      valid_range, valid_sequence_count, delayed_request_count, &latencies,
-      response_count, valid_requests);
-
-
-  if (clamp_window) {
-    auto [start, end] = ClampWindow(valid_requests);
-  }
-
-  uint64_t window_duration_ns = window_end_ns - window_start_ns;
-
-  if (should_collect_profile_data_) {
-    CollectData(
-        summary, window_start_ns, window_end_ns, std::move(valid_requests));
-  }
-
-  RETURN_IF_ERROR(SummarizeLatency(latencies, summary));
-  RETURN_IF_ERROR(SummarizeClientStat(
-      start_stat, end_stat, window_duration_ns, latencies.size(),
-      valid_sequence_count, delayed_request_count, response_count, summary));
-  summary.client_stats.latencies = std::move(latencies);
-
-  SummarizeOverhead(window_duration_ns, manager_->GetIdleTime(), summary);
-
-  double window_duration_s{
-      window_duration_ns / static_cast<double>(NANOS_PER_SECOND)};
-
-  SummarizeSendRequestRate(
-      window_duration_s, manager_->GetAndResetNumSentRequests(), summary);
-
-  if (include_server_stats_) {
-    RETURN_IF_ERROR(SummarizeServerStats(
-        start_status, end_status, &(summary.server_stats)));
-  }
-
-  return cb::Error::Success;
-}
-
-void
-InferenceProfiler::ValidLatencyMeasurement(
-    const std::pair<uint64_t, uint64_t>& valid_range,
-    size_t& valid_sequence_count, size_t& delayed_request_count,
-    std::vector<uint64_t>* valid_latencies, size_t& response_count,
-    std::vector<RequestRecord>& valid_requests)
-{
-  valid_latencies->clear();
-  valid_sequence_count = 0;
-  response_count = 0;
-  std::vector<size_t> erase_indices{};
-  for (size_t i = 0; i < all_request_records_.size(); i++) {
-    const auto& request_record = all_request_records_[i];
-    uint64_t request_start_ns = CHRONO_TO_NANOS(request_record.start_time_);
-    uint64_t request_end_ns;
-
-    if (request_record.has_null_last_response_ == false) {
-      request_end_ns =
-          CHRONO_TO_NANOS(request_record.response_timestamps_.back());
-    } else if (request_record.response_timestamps_.size() > 1) {
-      size_t last_response_idx{request_record.response_timestamps_.size() - 2};
-      request_end_ns = CHRONO_TO_NANOS(
-          request_record.response_timestamps_[last_response_idx]);
-    } else {
-      erase_indices.push_back(i);
-      continue;
-    }
-
-    if (request_start_ns <= request_end_ns) {
-      // Only counting requests that end within the time interval
-      if ((request_end_ns >= valid_range.first) &&
-          (request_end_ns <= valid_range.second)) {
-        valid_latencies->push_back(request_end_ns - request_start_ns);
-        response_count += request_record.response_timestamps_.size();
-        if (request_record.has_null_last_response_) {
-          response_count--;
-        }
-        erase_indices.push_back(i);
-        if (request_record.sequence_end_) {
-          valid_sequence_count++;
-        }
-        if (request_record.delayed_) {
-          delayed_request_count++;
-        }
-      }
-    }
-  }
-
-  std::for_each(
-      erase_indices.begin(), erase_indices.end(),
-      [this, &valid_requests](size_t i) {
-        valid_requests.push_back(std::move(this->all_request_records_[i]));
-      });
-
-  // Iterate through erase indices backwards so that erases from
-  // `all_request_records_` happen from the back to the front to avoid using
-  // wrong indices after subsequent erases
-  std::for_each(erase_indices.rbegin(), erase_indices.rend(), [this](size_t i) {
-    this->all_request_records_.erase(this->all_request_records_.begin() + i);
-  });
-
-  // Always sort measured latencies as percentile will be reported as default
-  std::sort(valid_latencies->begin(), valid_latencies->end());
-}
-
-std::pair<uint64_t, uint64_t>
-InferenceProfiler::ClampWindow(std::vector<RequestRecord>& requests)
-{
-  auto earliest_start =
-      std::chrono::time_point<std::chrono::system_clock>::max();
-  auto latest_end = std::chrono::time_point<std::chrono::system_clock>::min();
-
-  for (auto x : requests) {
-    earliest_start = std::min(earliest_start, x.start_time_);
-    latest_end = std::max(latest_end, x.response_timestamps_.back());
-  }
-
-  return std::make_pair(
-      earliest_start.time_since_epoch().count(),
-      latest_end.time_since_epoch().count());
-}
-
-
-void
-InferenceProfiler::CollectData(
-    PerfStatus& summary, uint64_t window_start_ns, uint64_t window_end_ns,
-    std::vector<RequestRecord>&& request_records)
-{
-  InferenceLoadMode id{summary.concurrency, summary.request_rate};
-  collector_->AddWindow(id, window_start_ns, window_end_ns);
-  collector_->AddData(id, std::move(request_records));
-}
-
-cb::Error
-InferenceProfiler::SummarizeLatency(
-    const std::vector<uint64_t>& latencies, PerfStatus& summary)
-{
-  if (latencies.size() == 0) {
-    return cb::Error(
-        "No valid requests recorded within time interval."
-        " Please use a larger time window.",
-        pa::OPTION_ERROR);
-  }
-
-  std::tie(summary.client_stats.avg_latency_ns, summary.client_stats.std_us) =
-      GetMeanAndStdDev(latencies);
-
-  // retrieve other interesting percentile
-  summary.client_stats.percentile_latency_ns.clear();
-  std::set<size_t> percentiles{50, 90, 95, 99};
-  if (extra_percentile_) {
-    percentiles.emplace(percentile_);
-  }
-
-  for (const auto percentile : percentiles) {
-    size_t index = (percentile / 100.0) * (latencies.size() - 1) + 0.5;
-    summary.client_stats.percentile_latency_ns.emplace(
-        percentile, latencies[index]);
-  }
-
-  if (extra_percentile_) {
-    summary.stabilizing_latency_ns =
-        summary.client_stats.percentile_latency_ns.find(percentile_)->second;
-  } else {
-    summary.stabilizing_latency_ns = summary.client_stats.avg_latency_ns;
-  }
-
-  return cb::Error::Success;
-}
-
-std::tuple<uint64_t, uint64_t>
-InferenceProfiler::GetMeanAndStdDev(const std::vector<uint64_t>& latencies)
-{
-  uint64_t avg_latency_ns{0};
-  uint64_t std_dev_latency_us{0};
-
-  // calculate mean of latencies
-  uint64_t tol_latency_ns{
-      std::accumulate(latencies.begin(), latencies.end(), 0ULL)};
-  avg_latency_ns = tol_latency_ns / latencies.size();
-
-  // calculate sample standard deviation of latencies
-  uint64_t sq_sum_latency_avg_diff_ns{0};
-  std::for_each(
-      latencies.begin(), latencies.end(),
-      [avg_latency_ns, &sq_sum_latency_avg_diff_ns](uint64_t l) {
-        sq_sum_latency_avg_diff_ns += static_cast<int64_t>(l - avg_latency_ns) *
-                                      static_cast<int64_t>(l - avg_latency_ns);
-      });
-  if (latencies.size() > 1) {
-    std_dev_latency_us =
-        std::sqrt(sq_sum_latency_avg_diff_ns / (latencies.size() - 1)) / 1000;
-  } else {
-    std_dev_latency_us = UINT64_MAX;
-    std::cerr << "WARNING: Pass contained only one request, so sample latency "
-                 "standard deviation will be infinity (UINT64_MAX)."
-              << std::endl;
-  }
-
-
-  return std::make_tuple(avg_latency_ns, std_dev_latency_us);
-}
-
-cb::Error
-InferenceProfiler::SummarizeClientStat(
-    const cb::InferStat& start_stat, const cb::InferStat& end_stat,
-    const uint64_t duration_ns, const size_t valid_request_count,
-    const size_t valid_sequence_count, const size_t delayed_request_count,
-    const size_t response_count, PerfStatus& summary)
-{
-  summary.on_sequence_model =
-      ((parser_->SchedulerType() == ModelParser::SEQUENCE) ||
-       (parser_->SchedulerType() == ModelParser::ENSEMBLE_SEQUENCE));
-  summary.batch_size = std::max(manager_->BatchSize(), (size_t)1);
-  summary.client_stats.request_count = valid_request_count;
-  summary.client_stats.sequence_count = valid_sequence_count;
-  summary.client_stats.delayed_request_count = delayed_request_count;
-  summary.client_stats.response_count = response_count;
-  summary.client_stats.duration_ns = duration_ns;
-  float client_duration_sec =
-      (float)summary.client_stats.duration_ns / NANOS_PER_SECOND;
-  summary.client_stats.sequence_per_sec =
-      valid_sequence_count / client_duration_sec;
-  summary.client_stats.infer_per_sec =
-      (valid_request_count * summary.batch_size) / client_duration_sec;
-  summary.client_stats.responses_per_sec = response_count / client_duration_sec;
-
-  if (include_lib_stats_) {
-    size_t completed_count =
-        end_stat.completed_request_count - start_stat.completed_request_count;
-    uint64_t request_time_ns = end_stat.cumulative_total_request_time_ns -
-                               start_stat.cumulative_total_request_time_ns;
-    summary.client_stats.completed_count = completed_count;
-    uint64_t send_time_ns =
-        end_stat.cumulative_send_time_ns - start_stat.cumulative_send_time_ns;
-    uint64_t receive_time_ns = end_stat.cumulative_receive_time_ns -
-                               start_stat.cumulative_receive_time_ns;
-    if (completed_count != 0) {
-      summary.client_stats.avg_request_time_ns =
-          request_time_ns / completed_count;
-      summary.client_stats.avg_send_time_ns = send_time_ns / completed_count;
-      summary.client_stats.avg_receive_time_ns =
-          receive_time_ns / completed_count;
-    }
-  }
-
-  return cb::Error::Success;
-}
-
-void
-InferenceProfiler::SummarizeSendRequestRate(
-    const double window_duration_s, const size_t num_sent_requests,
-    PerfStatus& summary)
-{
-  if (window_duration_s <= 0.0) {
-    throw std::runtime_error("window_duration_s must be positive");
-  }
-
-  summary.send_request_rate = num_sent_requests / window_duration_s;
-}
-
-cb::Error
-InferenceProfiler::DetermineStatsModelVersion(
-    const cb::ModelIdentifier& model_identifier,
-    const std::map<cb::ModelIdentifier, cb::ModelStatistics>& start_stats,
-    const std::map<cb::ModelIdentifier, cb::ModelStatistics>& end_stats,
-    int64_t* status_model_version)
-{
-  // If model_version is unspecified then look in the stats to find the
-  // version with stats that incremented during the measurement.
-  //
-  // If multiple versions had incremented stats, use the highest numbered one
-  // and print a warning
-  *status_model_version = -1;
-  bool multiple_found = false;
-  bool version_unspecified = model_identifier.second.empty();
-
-  if (version_unspecified) {
-    for (const auto& x : end_stats) {
-      const auto& end_id = x.first;
-      const auto& end_stat = x.second;
-
-      bool is_correct_model_name =
-          model_identifier.first.compare(end_id.first) == 0;
-
-      if (is_correct_model_name) {
-        uint64_t end_queue_count = end_stat.queue_count_;
-        uint64_t start_queue_count = 0;
-
-        const auto& itr = start_stats.find(end_id);
-        if (itr != start_stats.end()) {
-          start_queue_count = itr->second.queue_count_;
-        }
-
-        if (end_queue_count > start_queue_count) {
-          int64_t this_version = std::stoll(end_id.second);
-          if (*status_model_version != -1) {
-            multiple_found = true;
-          }
-          *status_model_version = std::max(*status_model_version, this_version);
-        }
-      }
-    }
-  } else {
-    const auto& itr = end_stats.find(model_identifier);
-    if (itr != end_stats.end()) {
-      *status_model_version = std::stoll(model_identifier.second);
-    }
-  }
-  // FIXME - Investigate why composing model version is -1 in case of ensemble
-  // cache hit.
-  //
-  // In case of ensemble models, if top level response caching is
-  // enabled, the composing models versions are unavailable in case of a cache
-  // hit. This is due to the scheduler sends cache response and composing models
-  // do not get executed. It's a valid scenario and shouldn't throw error.
-  bool model_version_unspecified_and_invalid =
-      *status_model_version == -1 &&
-      (parser_ == nullptr || !parser_->TopLevelResponseCachingEnabled());
-  if (model_version_unspecified_and_invalid) {
-    return cb::Error(
-        "failed to find the requested model version", pa::GENERIC_ERROR);
-  }
-
-  if (multiple_found) {
-    std::cerr << "WARNING: Multiple versions of model "
-              << model_identifier.first
-              << " are loaded in the triton server, and the version to use was "
-                 "unspecified. The stats for that model may be inaccurate."
-              << std::endl;
-  }
-
-  return cb::Error::Success;
-}
-
-// Only for unit-testing
-#ifndef DOCTEST_CONFIG_DISABLE
-cb::Error
-InferenceProfiler::SetTopLevelResponseCaching(
-    bool enable_top_level_response_caching)
-{
-  parser_ = std::make_shared<ModelParser>(cb::BackendKind::TRITON);
-  if (parser_ == nullptr) {
-    return cb::Error("Failed to initialize ModelParser");
-  }
-  parser_->SetTopLevelResponseCaching(enable_top_level_response_caching);
-  return cb::Error::Success;
-}
-#endif
-
-cb::Error
-InferenceProfiler::SummarizeServerStats(
-    const std::map<cb::ModelIdentifier, cb::ModelStatistics>& start_status,
-    const std::map<cb::ModelIdentifier, cb::ModelStatistics>& end_status,
-    ServerSideStats* server_stats)
-{
-  RETURN_IF_ERROR(SummarizeServerStats(
-      std::make_pair(parser_->ModelName(), parser_->ModelVersion()),
-      start_status, end_status, server_stats));
-  return cb::Error::Success;
-}
-
-cb::Error
-InferenceProfiler::SummarizeServerStats(
-    const cb::ModelIdentifier& model_identifier,
-    const std::map<cb::ModelIdentifier, cb::ModelStatistics>& start_status,
-    const std::map<cb::ModelIdentifier, cb::ModelStatistics>& end_status,
-    ServerSideStats* server_stats)
-{
-  RETURN_IF_ERROR(SummarizeServerStatsHelper(
-      model_identifier, start_status, end_status, server_stats));
-
-  // Summarize the composing models, if any.
-  for (auto composing_model_identifier :
-       (*parser_->GetComposingModelMap())[model_identifier.first]) {
-    int64_t model_version;
-    RETURN_IF_ERROR(DetermineStatsModelVersion(
-        composing_model_identifier, start_status, end_status, &model_version));
-    composing_model_identifier.second = std::to_string(model_version);
-    auto it = server_stats->composing_models_stat
-                  .emplace(composing_model_identifier, ServerSideStats())
-                  .first;
-    RETURN_IF_ERROR(SummarizeServerStats(
-        composing_model_identifier, start_status, end_status, &(it->second)));
-  }
-
-  return cb::Error::Success;
-}
-
-cb::Error
-InferenceProfiler::SummarizeServerStatsHelper(
-    const cb::ModelIdentifier& model_identifier,
-    const std::map<cb::ModelIdentifier, cb::ModelStatistics>& start_status,
-    const std::map<cb::ModelIdentifier, cb::ModelStatistics>& end_status,
-    ServerSideStats* server_stats)
-{
-  int64_t model_version;
-  RETURN_IF_ERROR(DetermineStatsModelVersion(
-      model_identifier, start_status, end_status, &model_version));
-
-  const std::pair<std::string, std::string> this_id(
-      model_identifier.first, std::to_string(model_version));
-
-  const auto& end_itr = end_status.find(this_id);
-  if (end_itr == end_status.end()) {
-    // In case of ensemble models, if top level response caching is enabled,
-    // the composing models statistics are unavailable in case of a cache hit.
-    // This is due to the scheduler sends cache response and composing models do
-    // not get executed. It's a valid scenario and shouldn't throw error.
-    bool stats_not_found_and_invalid =
-        model_version == -1 && !parser_->TopLevelResponseCachingEnabled();
-    if (stats_not_found_and_invalid) {
-      return cb::Error(
-          "missing statistics for requested model", pa::GENERIC_ERROR);
-    } else {
-      // Setting server stats 0 for composing model in case of ensemble request
-      // cache hit since the composing model will not be executed
-      server_stats->Reset();
-    }
-  } else {
-    uint64_t start_infer_cnt = 0;
-    uint64_t start_exec_cnt = 0;
-    uint64_t start_cnt = 0;
-    uint64_t start_queue_cnt = 0;
-    uint64_t start_compute_input_cnt = 0;
-    uint64_t start_compute_infer_cnt = 0;
-    uint64_t start_compute_output_cnt = 0;
-    uint64_t start_cumm_time_ns = 0;
-    uint64_t start_queue_time_ns = 0;
-    uint64_t start_compute_input_time_ns = 0;
-    uint64_t start_compute_infer_time_ns = 0;
-    uint64_t start_compute_output_time_ns = 0;
-    uint64_t start_cache_hit_cnt = 0;
-    uint64_t start_cache_hit_time_ns = 0;
-    uint64_t start_cache_miss_cnt = 0;
-    uint64_t start_cache_miss_time_ns = 0;
-
-    const auto& start_itr = start_status.find(this_id);
-    if (start_itr != start_status.end()) {
-      start_infer_cnt = start_itr->second.inference_count_;
-      start_exec_cnt = start_itr->second.execution_count_;
-      start_cnt = start_itr->second.success_count_;
-      start_queue_cnt = start_itr->second.queue_count_;
-      start_compute_input_cnt = start_itr->second.compute_input_count_;
-      start_compute_infer_cnt = start_itr->second.compute_infer_count_;
-      start_compute_output_cnt = start_itr->second.compute_output_count_;
-      start_cumm_time_ns = start_itr->second.cumm_time_ns_;
-      start_queue_time_ns = start_itr->second.queue_time_ns_;
-      start_compute_input_time_ns = start_itr->second.compute_input_time_ns_;
-      start_compute_infer_time_ns = start_itr->second.compute_infer_time_ns_;
-      start_compute_output_time_ns = start_itr->second.compute_output_time_ns_;
-      start_cache_hit_cnt = start_itr->second.cache_hit_count_;
-      start_cache_hit_time_ns = start_itr->second.cache_hit_time_ns_;
-      start_cache_miss_cnt = start_itr->second.cache_miss_count_;
-      start_cache_miss_time_ns = start_itr->second.cache_miss_time_ns_;
-    }
-
-    server_stats->inference_count =
-        end_itr->second.inference_count_ - start_infer_cnt;
-    server_stats->execution_count =
-        end_itr->second.execution_count_ - start_exec_cnt;
-    server_stats->success_count = end_itr->second.success_count_ - start_cnt;
-    server_stats->queue_count = end_itr->second.queue_count_ - start_queue_cnt;
-    server_stats->compute_input_count =
-        end_itr->second.compute_input_count_ - start_compute_input_cnt;
-    server_stats->compute_infer_count =
-        end_itr->second.compute_infer_count_ - start_compute_infer_cnt;
-    server_stats->compute_output_count =
-        end_itr->second.compute_output_count_ - start_compute_output_cnt;
-    server_stats->cumm_time_ns =
-        end_itr->second.cumm_time_ns_ - start_cumm_time_ns;
-    server_stats->queue_time_ns =
-        end_itr->second.queue_time_ns_ - start_queue_time_ns;
-    server_stats->compute_input_time_ns =
-        end_itr->second.compute_input_time_ns_ - start_compute_input_time_ns;
-    server_stats->compute_infer_time_ns =
-        end_itr->second.compute_infer_time_ns_ - start_compute_infer_time_ns;
-    server_stats->compute_output_time_ns =
-        end_itr->second.compute_output_time_ns_ - start_compute_output_time_ns;
-    server_stats->cache_hit_count =
-        end_itr->second.cache_hit_count_ - start_cache_hit_cnt;
-    server_stats->cache_hit_time_ns =
-        end_itr->second.cache_hit_time_ns_ - start_cache_hit_time_ns;
-    server_stats->cache_miss_count =
-        end_itr->second.cache_miss_count_ - start_cache_miss_cnt;
-    server_stats->cache_miss_time_ns =
-        end_itr->second.cache_miss_time_ns_ - start_cache_miss_time_ns;
-  }
-
-  return cb::Error::Success;
-}
-
-void
-InferenceProfiler::SummarizeOverhead(
-    const uint64_t window_duration_ns, const uint64_t idle_ns,
-    PerfStatus& summary)
-{
-  // The window start/stop is not instantaneous. It is possible that the PA
-  // overhead is smaller than the delay in the window start/stop process. Treat
-  // it as 0% overhead (100% idle) in that case
-  //
-  if (idle_ns > window_duration_ns) {
-    summary.overhead_pct = 0;
-  } else {
-    uint64_t overhead_ns = window_duration_ns - idle_ns;
-    double overhead_pct = double(overhead_ns) / window_duration_ns * 100;
-    summary.overhead_pct = overhead_pct;
-  }
-}
-
-bool
-InferenceProfiler::AllMPIRanksAreStable(bool current_rank_stability)
-{
-  int world_size{mpi_driver_->MPICommSizeWorld()};
-  std::vector<int> stabilities_per_rank{};
-  stabilities_per_rank.resize(world_size, 0);
-  int my_rank{mpi_driver_->MPICommRankWorld()};
-  stabilities_per_rank[my_rank] = static_cast<int>(current_rank_stability);
-
-  for (int rank{0}; rank < world_size; rank++) {
-    mpi_driver_->MPIBcastIntWorld(stabilities_per_rank.data() + rank, 1, rank);
-  }
-
-  bool all_stable{true};
-  for (int rank{0}; rank < world_size; rank++) {
-    if (stabilities_per_rank[rank] == 0) {
-      all_stable = false;
-      break;
-    }
-  }
-
-  if (verbose_ && all_stable) {
-    std::cout << "All models on all MPI ranks are stable" << std::endl;
-  }
-
-  return all_stable;
-}
-
-cb::Error
-InferenceProfiler::MergeMetrics(
-    const std::vector<std::reference_wrapper<const Metrics>>& all_metrics,
-    Metrics& merged_metrics)
-{
-  // Maps from each metric collection mapping gpu uuid to gpu utilization
-  std::vector<std::reference_wrapper<const std::map<std::string, double>>>
-      gpu_utilization_per_gpu_maps{};
-
-  // Maps from each metric collection mapping gpu uuid to gpu power usage
-  std::vector<std::reference_wrapper<const std::map<std::string, double>>>
-      gpu_power_usage_per_gpu_maps{};
-
-  // Maps from each metric collection mapping gpu uuid to gpu memory used bytes
-  std::vector<std::reference_wrapper<const std::map<std::string, uint64_t>>>
-      gpu_memory_used_bytes_per_gpu_maps{};
-
-  // Maps from each metric collection mapping gpu uuid to gpu memory total bytes
-  std::vector<std::reference_wrapper<const std::map<std::string, uint64_t>>>
-      gpu_memory_total_bytes_per_gpu_maps{};
-
-  // Put all metric maps in vector so they're easier to aggregate
-  std::for_each(
-      all_metrics.begin(), all_metrics.end(),
-      [&gpu_utilization_per_gpu_maps, &gpu_power_usage_per_gpu_maps,
-       &gpu_memory_used_bytes_per_gpu_maps,
-       &gpu_memory_total_bytes_per_gpu_maps](
-          const std::reference_wrapper<const Metrics> m) {
-        gpu_utilization_per_gpu_maps.push_back(m.get().gpu_utilization_per_gpu);
-        gpu_power_usage_per_gpu_maps.push_back(m.get().gpu_power_usage_per_gpu);
-        gpu_memory_used_bytes_per_gpu_maps.push_back(
-            m.get().gpu_memory_used_bytes_per_gpu);
-        gpu_memory_total_bytes_per_gpu_maps.push_back(
-            m.get().gpu_memory_total_bytes_per_gpu);
-      });
-
-  GetMetricAveragePerGPU<double>(
-      gpu_utilization_per_gpu_maps, merged_metrics.gpu_utilization_per_gpu);
-  GetMetricAveragePerGPU<double>(
-      gpu_power_usage_per_gpu_maps, merged_metrics.gpu_power_usage_per_gpu);
-  GetMetricMaxPerGPU<uint64_t>(
-      gpu_memory_used_bytes_per_gpu_maps,
-      merged_metrics.gpu_memory_used_bytes_per_gpu);
-  GetMetricFirstPerGPU<uint64_t>(
-      gpu_memory_total_bytes_per_gpu_maps,
-      merged_metrics.gpu_memory_total_bytes_per_gpu);
-
-  return cb::Error::Success;
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/inference_profiler.h b/src/c++/perf_analyzer/inference_profiler.h
deleted file mode 100644
index a73651319..000000000
--- a/src/c++/perf_analyzer/inference_profiler.h
+++ /dev/null
@@ -1,818 +0,0 @@
-// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <algorithm>
-#include <cstdint>
-#include <deque>
-#include <functional>
-#include <map>
-#include <memory>
-#include <string>
-#include <thread>
-#include <tuple>
-#include <vector>
-
-#include "concurrency_manager.h"
-#include "constants.h"
-#include "custom_load_manager.h"
-#include "metrics.h"
-#include "metrics_manager.h"
-#include "model_parser.h"
-#include "mpi_utils.h"
-#include "periodic_concurrency_manager.h"
-#include "profile_data_collector.h"
-#include "request_rate_manager.h"
-
-namespace triton { namespace perfanalyzer {
-
-#ifndef DOCTEST_CONFIG_DISABLE
-class NaggyMockInferenceProfiler;
-class TestInferenceProfiler;
-class ModelParser;
-#endif
-
-/// Constant parameters that determine the whether stopping criteria has met
-/// for the current phase of testing
-struct LoadParams {
-  // The number of measurements to account for during calculation of load
-  // status
-  uint32_t stability_window;
-  // The +/- range to account for while assessing load status
-  double stability_threshold;
-};
-
-/// Data structure to keep track of real-time load status and determine whether
-/// stopping criteria has met for the current phase of testing.
-struct LoadStatus {
-  // Stores the observations of infer_per_sec and latencies in a vector
-  std::vector<double> infer_per_sec;
-  std::vector<uint64_t> latencies;
-  // Records the average inference per second within the stability window
-  double avg_ips = 0;
-  // Stores the average latency within the stability window
-  uint64_t avg_latency = 0;
-};
-
-/// Configuration for the Measure function
-struct MeasureConfig {
-  uint64_t measurement_window{0};
-  bool is_count_based{false};
-  bool clamp_window{false};
-};
-
-// Holds the total of the timiming components of composing models of an
-// ensemble.
-struct EnsembleDurations {
-  EnsembleDurations()
-      : total_queue_time_avg_us(0), total_compute_time_avg_us(0),
-        total_cache_hit_time_avg_us(0), total_cache_miss_time_avg_us(0),
-        total_combined_cache_compute_time_avg_us(0)
-  {
-  }
-  uint64_t total_queue_time_avg_us;
-  uint64_t total_compute_time_avg_us;
-  // Time spent on cache lookups/copies for cache hits
-  uint64_t total_cache_hit_time_avg_us;
-  // Time spent on cache lookups/copies/insertions for cache misses
-  uint64_t total_cache_miss_time_avg_us;
-
-  // Combined average of cache and compute times
-  uint64_t total_combined_cache_compute_time_avg_us;
-};
-
-/// Holds the server-side inference statisitcs of the target model and its
-/// composing models
-struct ServerSideStats {
-  uint64_t inference_count;
-  uint64_t execution_count;
-  uint64_t cache_hit_count;
-  uint64_t cache_miss_count;
-  uint64_t success_count;
-  uint64_t queue_count;
-  uint64_t compute_input_count;
-  uint64_t compute_infer_count;
-  uint64_t compute_output_count;
-  uint64_t cumm_time_ns;
-  uint64_t queue_time_ns;
-  uint64_t compute_input_time_ns;
-  uint64_t compute_infer_time_ns;
-  uint64_t compute_output_time_ns;
-  // Time spent on cache lookups/copies for cache hits
-  uint64_t cache_hit_time_ns;
-  // Time spent on cache lookups/copies/insertions for cache misses
-  uint64_t cache_miss_time_ns;
-
-  std::map<cb::ModelIdentifier, ServerSideStats> composing_models_stat;
-  // This function sets composing model server stats to 0 in case of a cache hit
-  // when top level response cache is enabled, since composing models are not
-  // executed and do not have any stats
-  void Reset()
-  {
-    inference_count = 0;
-    execution_count = 0;
-    success_count = 0;
-    queue_count = 0;
-    compute_input_count = 0;
-    compute_infer_count = 0;
-    compute_output_count = 0;
-    cumm_time_ns = 0;
-    queue_time_ns = 0;
-    compute_input_time_ns = 0;
-    compute_infer_time_ns = 0;
-    compute_output_time_ns = 0;
-    cache_hit_count = 0;
-    cache_hit_time_ns = 0;
-    cache_miss_count = 0;
-    cache_miss_time_ns = 0;
-  }
-};
-
-/// Holds the statistics recorded at the client side.
-struct ClientSideStats {
-  // Request count and elapsed time measured by client
-  uint64_t request_count;
-  // Only record sequences that finish within the measurement window
-  uint64_t sequence_count;
-  // The number of requests that missed their schedule
-  uint64_t delayed_request_count;
-  // The number of responses
-  uint64_t response_count;
-  uint64_t duration_ns;
-  uint64_t avg_latency_ns;
-  // a ordered map of percentiles to be reported (<percentile, value> pair)
-  std::map<size_t, uint64_t> percentile_latency_ns;
-  // List of all the valid latencies.
-  std::vector<uint64_t> latencies;
-  // Using usec to avoid square of large number (large in nsec)
-  uint64_t std_us;
-  uint64_t avg_request_time_ns;
-  uint64_t avg_send_time_ns;
-  uint64_t avg_receive_time_ns;
-  // Per sec stat
-  double infer_per_sec;
-  double responses_per_sec;
-  double sequence_per_sec;
-
-  // Completed request count reported by the client library
-  uint64_t completed_count;
-};
-
-/// The entire statistics record.
-struct PerfStatus {
-  uint32_t concurrency;
-  double request_rate;
-  size_t batch_size;
-  ServerSideStats server_stats;
-  ClientSideStats client_stats;
-  std::vector<Metrics> metrics{};
-  double overhead_pct;
-  bool on_sequence_model;
-
-  // placeholder for the latency value that is used for conditional checking
-  uint64_t stabilizing_latency_ns;
-  // Metric for requests sent per second
-  double send_request_rate{0.0};
-};
-
-cb::Error ReportPrometheusMetrics(const Metrics& metrics);
-
-//==============================================================================
-/// A InferenceProfiler is a helper class that measures and summarizes the
-/// inference statistic under different concurrency level.
-///
-/// The profiler can adjust the number of concurrent requests by informing the
-/// concurrency manager. And after the adjustment, the profiler will actively
-/// collecting the statistic from both the concurrency manager and the inference
-/// server directly until it is stable. Once stable, the profiler updates the
-/// 'status_summary' based on the most recent measurement.
-///
-/// The measurement procedure:
-/// 1. The profiler gets start status from the server and records the start
-/// time.
-/// 2. After given time interval, the profiler gets end status from the server
-///    and records the end time.
-/// 3. The profiler obtains the request records recorded by concurrency manager,
-///    and uses the request records that are recorded between start time and end
-///    time to measure client side status and update status_summary.
-///
-class InferenceProfiler {
- public:
-  /// Create a profiler that collects and summarizes inference statistic.
-  /// \param verbose Whether to print verbose logging.
-  /// \param stability_threshold The range that the measurement is considered as
-  /// stable. i.e. within (1 +/- stability_threshold) * average value of the
-  /// last 3 measurements. The criteria are "infer per second" and "average
-  /// latency", or "infer per second" and "percentile latency" if valid
-  /// percentile is set (see 'percentile' below).
-  /// \param measurement_window_ms The duration of each measurement in msec.
-  /// \param max_trials The maximum number of attempts to obtain
-  /// stable measurement.
-  /// \param percentile The percentile in terms of latency to be reported.
-  /// if it is a valid percentile value, the percentile latency will reported
-  /// and used as stable criteria instead of average latency. If it is -1,
-  /// average latency will be reported and used as stable criteria.
-  /// \param latency_threshold_ms The threshold on the latency measurements in
-  /// microseconds.
-  /// \param parser The ModelParse object which holds all the details about the
-  /// model.
-  /// \param profile_backend The ClientBackend object used to communicate
-  /// with the server by profiler.
-  /// \param manager The LoadManager object that will produce load on the
-  /// server.
-  /// \param profiler Returns a new InferenceProfiler object.
-  /// \param measurement_request_count The number of requests to capture when
-  /// using "count_windows" mode.
-  /// \param measurement_mode The measurement mode to use for windows.
-  /// \param mpi_driver The driver class for MPI operations.
-  /// \param metrics_interval_ms The interval at which the server-side metrics
-  /// \param should_collect_metrics Whether server-side inference server metrics
-  /// should be collected.
-  /// \param overhead_pct_threshold User set threshold above which the PA
-  /// overhead is too significant to provide usable results.
-  /// \param collector Collector for the profile data from experiments
-  /// \param should_collect_profile_data Whether to collect profile data.
-  /// \return cb::Error object indicating success or failure.
-  static cb::Error Create(
-      const bool verbose, const double stability_threshold,
-      const uint64_t measurement_window_ms, const size_t max_trials,
-      const int64_t percentile, const uint64_t latency_threshold_ms,
-      const cb::ProtocolType protocol, std::shared_ptr<ModelParser>& parser,
-      std::shared_ptr<cb::ClientBackend> profile_backend,
-      std::unique_ptr<LoadManager> manager,
-      std::unique_ptr<InferenceProfiler>* profiler,
-      uint64_t measurement_request_count, MeasurementMode measurement_mode,
-      std::shared_ptr<MPIDriver> mpi_driver, const uint64_t metrics_interval_ms,
-      const bool should_collect_metrics, const double overhead_pct_threshold,
-      const bool async_mode,
-      const std::shared_ptr<ProfileDataCollector> collector,
-      const bool should_collect_profile_data);
-
-  /// Performs the profiling on the given range with the given search algorithm.
-  /// For profiling using request rate invoke template with double, otherwise
-  /// invoke with size_t for concurrency search.
-  /// \param start The starting point of the search range.
-  /// \param end The ending point of the search range.
-  /// \param step The step size to move along the search range in linear search
-  /// or the precision in binary search.
-  /// \param search_mode The search algorithm to be applied.
-  /// \param request_count The number of requests to generate in each
-  /// experiment. If 0, then there is no limit, and it will generate until
-  /// stable.
-  /// \param summary Returns the trace of the measurement along the search path.
-  /// \return cb::Error object indicating success or failure.
-  template <typename T>
-  cb::Error Profile(
-      const T start, const T end, const T step, const SearchMode search_mode,
-      const size_t request_count, std::vector<PerfStatus>& perf_statuses)
-  {
-    cb::Error err;
-    bool meets_threshold, is_stable;
-    if (search_mode == SearchMode::NONE) {
-      err = Profile(request_count, perf_statuses, meets_threshold, is_stable);
-      if (!err.IsOk()) {
-        return err;
-      }
-    } else if (search_mode == SearchMode::LINEAR) {
-      T current_value = start;
-      do {
-        err = Profile(
-            current_value, request_count, perf_statuses, meets_threshold,
-            is_stable);
-        if (!err.IsOk()) {
-          return err;
-        }
-        current_value += step;
-      } while (((current_value <= end) || (end == static_cast<T>(NO_LIMIT))) &&
-               (meets_threshold));
-      // If there was only one concurrency we swept over and it did not meet the
-      // stability threshold, we should return an error.
-      if (current_value == (start + step) && is_stable == false) {
-        return cb::Error(
-            "Failed to obtain stable measurement.", pa::STABILITY_ERROR);
-      }
-    } else {
-      err = Profile(
-          start, request_count, perf_statuses, meets_threshold, is_stable);
-      if (!err.IsOk() || (!meets_threshold)) {
-        return err;
-      }
-      err = Profile(
-          end, request_count, perf_statuses, meets_threshold, is_stable);
-      if (!err.IsOk() || (meets_threshold)) {
-        return err;
-      }
-
-      T this_start = start;
-      T this_end = end;
-      while ((this_end - this_start) > step) {
-        T current_value = (this_end + this_start) / 2;
-        err = Profile(
-            current_value, request_count, perf_statuses, meets_threshold,
-            is_stable);
-        if (!err.IsOk()) {
-          return err;
-        }
-        if (meets_threshold) {
-          this_start = current_value;
-        } else {
-          this_end = current_value;
-        }
-      }
-    }
-    return cb::Error::Success;
-  }
-
-  cb::Error ProfilePeriodicConcurrencyMode()
-  {
-    auto& manager{dynamic_cast<PeriodicConcurrencyManager&>(*manager_)};
-    std::vector<RequestRecord> request_records{manager.RunExperiment()};
-    // FIXME - Refactor collector class to not need ID or window in the case of
-    // periodic concurrency mode
-    InferenceLoadMode id{1, 0.0};
-    collector_->AddWindow(id, 0, UINT64_MAX);
-    collector_->AddData(id, std::move(request_records));
-    return cb::Error::Success;
-  }
-
-  bool IncludeServerStats() { return include_server_stats_; }
-
- private:
-  InferenceProfiler(
-      const bool verbose, const double stability_threshold,
-      const int32_t measurement_window_ms, const size_t max_trials,
-      const bool extra_percentile, const size_t percentile,
-      const uint64_t latency_threshold_ms, const cb::ProtocolType protocol,
-      std::shared_ptr<ModelParser>& parser,
-      std::shared_ptr<cb::ClientBackend> profile_backend,
-      std::unique_ptr<LoadManager> manager, uint64_t measurement_request_count,
-      MeasurementMode measurement_mode, std::shared_ptr<MPIDriver> mpi_driver,
-      const uint64_t metrics_interval_ms, const bool should_collect_metrics,
-      const double overhead_pct_threshold, const bool async_mode,
-      const std::shared_ptr<ProfileDataCollector> collector,
-      const bool should_collect_profile_data);
-
-  /// Actively measure throughput in every 'measurement_window' msec until the
-  /// throughput is stable. Once the throughput is stable, it adds the
-  /// observations on summary trace and returns whether the setting met the
-  /// threshold. NOTE: the requests are being sent regardless of the
-  /// measurement, so the data returned by the server (see struct
-  /// PerforamnceStatusStruct) will include more requests than what the client
-  /// measures (we can't get the exact server status right before the first
-  /// request and right after the last request in the measurement window).
-  /// \param concurrent_request_count The concurrency level for the measurement.
-  /// \param perf_statuses Appends the measurements summary at the end of this
-  /// list.
-  /// \param request_count The number of requests to generate when profiling. If
-  /// 0, then there is no limit, and it will generate until stable.
-  /// \param meets_threshold Returns whether the setting meets the
-  /// threshold.
-  /// \param is_stable Returns whether the measurement is stable.
-  /// \return cb::Error object indicating success or failure.
-  cb::Error Profile(
-      const size_t concurrent_request_count, const size_t request_count,
-      std::vector<PerfStatus>& perf_statuses, bool& meets_threshold,
-      bool& is_stable);
-
-  /// Similar to above function, but instead of setting the concurrency, it
-  /// sets the specified request rate for measurements.
-  /// \param request_rate The request rate for inferences.
-  /// \param request_count The number of requests to generate when profiling. If
-  /// 0, then there is no limit, and it will generate until stable.
-  /// \param perf_statuses Appends the measurements summary at the end of this
-  /// list.
-  /// \param meets_threshold Returns whether the setting meets the
-  /// threshold.
-  /// \param is_stable Returns whether the measurement is stable.
-  /// \return cb::Error object indicating success or failure.
-  cb::Error Profile(
-      const double request_rate, const size_t request_count,
-      std::vector<PerfStatus>& perf_statuses, bool& meets_threshold,
-      bool& is_stable);
-
-  /// Measures throughput and latencies for custom load without controlling
-  /// request rate nor concurrency. Requires load manager to be loaded with
-  /// a file specifying the time intervals.
-  /// \param request_count The number of requests to generate when profiling. If
-  /// 0, then there is no limit, and it will generate until stable.
-  /// \param perf_statuses Appends the measurements summary at the end of this
-  /// list.
-  /// \param meets_threshold Returns whether the measurement met the
-  /// threshold.
-  /// \param is_stable Returns whether the measurement is stable.
-  /// \return cb::Error object indicating success
-  /// or failure.
-  cb::Error Profile(
-      const size_t request_count, std::vector<PerfStatus>& perf_statuses,
-      bool& meets_threshold, bool& is_stable);
-
-  /// A helper function for profiling functions.
-  /// \param status_summary Returns the summary of the measurement.
-  /// \param request_count The number of requests to generate when profiling. If
-  /// 0, then there is no limit, and it will generate until stable.
-  /// \param is_stable Returns whether the measurement stabilized or not.
-  /// \return cb::Error object indicating success or failure.
-  cb::Error ProfileHelper(
-      PerfStatus& status_summary, size_t request_count, bool* is_stable);
-
-  /// A helper function to determine if profiling is stable
-  /// \param load_status Stores the observations of infer_per_sec and latencies
-  /// \param check_latency Whether to check latency for stability
-  /// \return Returns if the threshold and latencies are stable.
-  bool DetermineStability(LoadStatus& load_status, bool check_latency = true);
-
-  /// Check if latency at index idx is within the latency threshold
-  /// \param idx index in latency vector
-  /// \param load_status Stores the observations of infer_per_sec and latencies
-  /// \return Returns whether the latencies are below the max threshold
-  bool CheckWithinThreshold(size_t idx, LoadStatus& load_status);
-
-  /// A helper function to determine if profiling is done
-  /// \param load_status Stores the observations of infer_per_sec and latencies
-  /// \param is_stable Returns whether the measurement stabilized or not.
-  /// \return Returns if we should break out of the infinite stability check
-  /// loop.
-  bool IsDoneProfiling(LoadStatus& load_status, bool* is_stable);
-
-  /// Check if observed inferences and latencies are within threshold
-  /// for a single window starting at idx
-  /// \param idx index in latency vector
-  /// \param load_status Stores the observations of infer_per_sec and latencies
-  /// \param check_latency Whether to check latency for stability
-  /// \return Returns whether inference and latency are stable
-  bool CheckWindowForStability(
-      size_t idx, LoadStatus& load_status, bool check_latency);
-
-  /// Check if observed inferences are within threshold
-  /// for a single window starting at idx
-  /// \param idx index in latency vector
-  /// \param load_status Stores the observations of infer_per_sec and latencies
-  /// \return Returns whether inference is stable
-  bool IsInferWindowStable(size_t idx, LoadStatus& load_status);
-
-  /// Check if observed latencies are within threshold
-  /// for a single window starting at idx
-  /// \param idx index in latency vector
-  /// \param load_status Stores the observations of infer_per_sec and latencies
-  /// \return Returns whether latency is stable
-  bool IsLatencyWindowStable(size_t idx, LoadStatus& load_status);
-
-  /// Helper function to perform measurement.
-  /// \param status_summary The summary of this measurement.
-  /// \param config The configuration for measurement.
-  /// \return cb::Error object indicating success or failure.
-  cb::Error Measure(PerfStatus& status_summary, MeasureConfig config);
-
-  /// Gets the server side statistics
-  /// \param model_status Returns the status of the models provided by
-  /// the server. If the model being profiled is non-ensemble model,
-  /// only its status will be returned. Otherwise, the status of the composing
-  /// models will also be returned.
-  /// \return cb::Error object indicating success or failure.
-  cb::Error GetServerSideStatus(
-      std::map<cb::ModelIdentifier, cb::ModelStatistics>* model_status);
-
-  /// Summarize the measurement with the provided statistics.
-  /// \param start_status The model status at the start of the measurement.
-  /// \param end_status The model status at the end of the measurement.
-  /// \param start_stat The accumulated context status at the start.
-  /// \param end_stat The accumulated context status at the end.
-  /// \param summary Returns the summary of the measurement.
-  /// \param window_start_ns The window start timestamp in nanoseconds.
-  /// \param window_end_ns The window end timestamp in nanoseconds.
-  /// \param clamp_window If true, the actual window range is reduced to the
-  /// start of the first request to the final response.
-  /// \return cb::Error object indicating success or failure.
-  cb::Error Summarize(
-      const std::map<cb::ModelIdentifier, cb::ModelStatistics>& start_status,
-      const std::map<cb::ModelIdentifier, cb::ModelStatistics>& end_status,
-      const cb::InferStat& start_stat, const cb::InferStat& end_stat,
-      PerfStatus& summary, uint64_t window_start_ns, uint64_t window_end_ns,
-      bool clamp_window);
-
-  /// \param valid_range The start and end timestamp of the measurement window.
-  /// \param valid_sequence_count Returns the number of completed sequences
-  /// during the measurement. A sequence is a set of correlated requests sent to
-  /// sequence model.
-  /// \param latencies Returns the vector of request latencies where the
-  /// requests are completed within the measurement window.
-  /// \param response_count Returns the number of responses
-  /// \param valid_requests Returns a vector of valid request records
-  virtual void ValidLatencyMeasurement(
-      const std::pair<uint64_t, uint64_t>& valid_range,
-      size_t& valid_sequence_count, size_t& delayed_request_count,
-      std::vector<uint64_t>* latencies, size_t& response_count,
-      std::vector<RequestRecord>& valid_requests);
-
-  /// Clamp a window around a set of requests, from the earliest start time to
-  /// the latest response
-  /// \param requests A vector of requests to clamp the window around.
-  /// \return std::pair object containing <start, end> of the window.
-  std::pair<uint64_t, uint64_t> ClampWindow(
-      std::vector<RequestRecord>& requests);
-
-  /// Add the data from the request records to the Raw Data Collector
-  /// \param perf_status PerfStatus of the current measurement
-  /// \param window_start_ns The window start timestamp in nanoseconds.
-  /// \param window_end_ns The window end timestamp in nanoseconds.
-  /// \param request_records The request records to collect.
-  void CollectData(
-      PerfStatus& perf_status, uint64_t window_start_ns, uint64_t window_end_ns,
-      std::vector<RequestRecord>&& request_records);
-
-  /// \param latencies The vector of request latencies collected.
-  /// \param summary Returns the summary that the latency related fields are
-  /// set.
-  /// \return cb::Error object indicating success or failure.
-  virtual cb::Error SummarizeLatency(
-      const std::vector<uint64_t>& latencies, PerfStatus& summary);
-
-  /// \param latencies The vector of request latencies collected.
-  /// \return std::tuple object containing:
-  ///   * mean of latencies in nanoseconds
-  ///   * sample standard deviation of latencies in microseconds
-  std::tuple<uint64_t, uint64_t> GetMeanAndStdDev(
-      const std::vector<uint64_t>& latencies);
-
-  /// \param start_stat The accumulated client statistics at the start.
-  /// \param end_stat The accumulated client statistics at the end.
-  /// \param duration_ns The duration of the measurement in nsec.
-  /// \param valid_request_count The number of completed requests recorded.
-  /// \param valid_sequence_count The number of completed sequences recorded.
-  /// \param delayed_request_count The number of requests that missed their
-  /// schedule.
-  /// \param response_count The number of responses.
-  /// \param summary Returns the summary that the fields recorded by
-  /// client are set.
-  /// \return cb::Error object indicating success or failure.
-  virtual cb::Error SummarizeClientStat(
-      const cb::InferStat& start_stat, const cb::InferStat& end_stat,
-      const uint64_t duration_ns, const size_t valid_request_count,
-      const size_t delayed_request_count, const size_t valid_sequence_count,
-      const size_t response_count, PerfStatus& summary);
-
-  /// Adds the send request rate metric to the summary object.
-  /// \param window_duration_s The duration of the window in seconds.
-  /// \param num_sent_requests The number of requests sent during the last
-  /// window.
-  /// \param summary The summary object to be updated with the send request rate
-  /// metric.
-  void SummarizeSendRequestRate(
-      const double window_duration_s, const size_t num_sent_requests,
-      PerfStatus& summary);
-
-  /// Given a model_identifier to gather stats for, and a map of ALL stats,
-  /// determine which version of the model should be gathered
-  /// \param model_identifier A pair of model_name and model_version to identify
-  /// a specific model
-  /// \param start_stats The stats for all models at the start of the
-  ///  measurement
-  /// \param end_stats The stats for all models at the end of the measurement
-  /// \param model_version The determined model version
-
-  cb::Error DetermineStatsModelVersion(
-      const cb::ModelIdentifier& model_identifier,
-      const std::map<cb::ModelIdentifier, cb::ModelStatistics>& start_stats,
-      const std::map<cb::ModelIdentifier, cb::ModelStatistics>& end_stats,
-      int64_t* model_version);
-
-#ifndef DOCTEST_CONFIG_DISABLE
-  cb::Error SetTopLevelResponseCaching(bool enable_top_level_request_caching);
-#endif
-
-  /// \param start_status The model status at the start of the measurement.
-  /// \param end_status The model status at the end of the measurement.
-  /// \param server_stats Returns the summary that the fields recorded by server
-  /// are set.
-  /// \return cb::Error object indicating success or failure.
-  cb::Error SummarizeServerStats(
-      const std::map<cb::ModelIdentifier, cb::ModelStatistics>& start_status,
-      const std::map<cb::ModelIdentifier, cb::ModelStatistics>& end_status,
-      ServerSideStats* server_stats);
-
-  /// \param model_identifier A pair of model_name and model_version to identify
-  /// a specific model.
-  /// \param start_status The model status at the start of the measurement.
-  /// \param end_status The model status at the end of the measurement.
-  /// \param server_stats Returns the summary that the fields recorded by server
-  /// are set.
-  /// \return cb::Error object indicating success or failure.
-  cb::Error SummarizeServerStats(
-      const cb::ModelIdentifier& model_identifier,
-      const std::map<cb::ModelIdentifier, cb::ModelStatistics>& start_status,
-      const std::map<cb::ModelIdentifier, cb::ModelStatistics>& end_status,
-      ServerSideStats* server_stats);
-
-  /// \param model_identifier A pair of model_name and model_version to identify
-  /// a specific model.
-  /// \param start_status The model status at the start of the measurement.
-  /// \param end_status The model status at the end of the measurement.
-  /// \param server_stats Returns the summary that the fields recorded by server
-  /// are set.
-  /// \return cb::Error object indicating success or failure.
-  cb::Error SummarizeServerStatsHelper(
-      const cb::ModelIdentifier& model_identifier,
-      const std::map<cb::ModelIdentifier, cb::ModelStatistics>& start_status,
-      const std::map<cb::ModelIdentifier, cb::ModelStatistics>& end_status,
-      ServerSideStats* server_stats);
-
-  /// Calculate the overhead and put the results into the summary
-  ///
-  /// \param window_duration_ns The duration of the window
-  /// \param idle_ns The average worker idle time during the window
-  /// \param summary The summary object to be updated with overhead stats
-  ///
-  void SummarizeOverhead(
-      const uint64_t window_duration_ns, const uint64_t idle_ns,
-      PerfStatus& summary);
-
-  /// Returns true if all MPI ranks (models) are stable. Should only be run if
-  /// and only if IsMPIRun() returns true.
-  /// \param current_rank_stability The stability of the current rank.
-  /// \return True if all MPI ranks are stable.
-  bool AllMPIRanksAreStable(bool current_rank_stability);
-
-  /// Merge individual perf status reports into a single perf status.  This
-  /// function is used to merge the results from multiple Measure runs into a
-  /// single report.
-  /// \param perf_status List of perf status reports to be merged.
-  /// \param summary_status Final merged summary status.
-  /// \return cb::Error object indicating success or failure.
-  virtual cb::Error MergePerfStatusReports(
-      std::deque<PerfStatus>& perf_status, PerfStatus& summary_status);
-
-  /// Merge individual server side statistics into a single server side report.
-  /// \param server_side_stats List of server side statistics reports to be
-  /// merged.
-  /// \param server_side_summary Final merged summary status.
-  /// \return cb::Error object indicating success or failure.
-  virtual cb::Error MergeServerSideStats(
-      std::vector<ServerSideStats>& server_side_stats,
-      ServerSideStats& server_side_summary);
-
-  /// \param all_metrics Individual metrics from all intervals from stable
-  /// passes.
-  /// \param merged_metrics Output merged metrics from all intervals from stable
-  /// passes.
-  /// \return cb::Error object indicating success or failure.
-  cb::Error MergeMetrics(
-      const std::vector<std::reference_wrapper<const Metrics>>& all_metrics,
-      Metrics& merged_metrics);
-
-  template <typename T>
-  void GetMetricAveragePerGPU(
-      const std::vector<std::reference_wrapper<const std::map<std::string, T>>>&
-          input_metric_maps,
-      std::map<std::string, T>& output_metric_map)
-  {
-    std::map<std::string, size_t> metric_count_per_gpu{};
-
-    for (const auto& input_metric_map : input_metric_maps) {
-      for (const auto& input_metric : input_metric_map.get()) {
-        const auto& gpu_uuid{input_metric.first};
-        const auto& metric{input_metric.second};
-
-        if (output_metric_map.find(gpu_uuid) == output_metric_map.end()) {
-          output_metric_map[gpu_uuid] = 0;
-          metric_count_per_gpu[gpu_uuid] = 0;
-        }
-
-        output_metric_map[gpu_uuid] += metric;
-        metric_count_per_gpu[gpu_uuid]++;
-      }
-    }
-
-    for (auto& output_metric : output_metric_map) {
-      const auto& gpu_uuid{output_metric.first};
-      auto& metric{output_metric.second};
-      const auto& metric_count{metric_count_per_gpu[gpu_uuid]};
-      if (metric_count > 0) {
-        metric /= metric_count;
-      }
-    }
-  }
-
-  template <typename T>
-  void GetMetricMaxPerGPU(
-      const std::vector<std::reference_wrapper<const std::map<std::string, T>>>&
-          input_metric_maps,
-      std::map<std::string, T>& output_metric_map)
-  {
-    for (const auto& input_metric_map : input_metric_maps) {
-      for (const auto& input_metric : input_metric_map.get()) {
-        const auto& gpu_uuid{input_metric.first};
-        const auto& metric{input_metric.second};
-
-        if (output_metric_map.find(gpu_uuid) == output_metric_map.end()) {
-          output_metric_map[gpu_uuid] = 0;
-        }
-
-        output_metric_map[gpu_uuid] =
-            std::max(output_metric_map[gpu_uuid], metric);
-      }
-    }
-  }
-
-  template <typename T>
-  void GetMetricFirstPerGPU(
-      const std::vector<std::reference_wrapper<const std::map<std::string, T>>>&
-          input_metric_maps,
-      std::map<std::string, T>& output_metric_map)
-  {
-    for (const auto& input_metric_map : input_metric_maps) {
-      for (const auto& input_metric : input_metric_map.get()) {
-        const auto& gpu_uuid{input_metric.first};
-        const auto& metric{input_metric.second};
-
-        if (output_metric_map.find(gpu_uuid) == output_metric_map.end()) {
-          output_metric_map[gpu_uuid] = metric;
-        }
-      }
-    }
-  }
-
-  bool verbose_;
-  uint64_t measurement_window_ms_;
-  uint64_t measurement_request_count_;
-  MeasurementMode measurement_mode_;
-  size_t max_trials_;
-  bool extra_percentile_;
-  size_t percentile_;
-  uint64_t latency_threshold_ms_;
-
-  cb::ProtocolType protocol_;
-  std::string model_name_;
-  int64_t model_version_;
-
-  std::shared_ptr<ModelParser> parser_;
-  std::shared_ptr<cb::ClientBackend> profile_backend_;
-  std::unique_ptr<LoadManager> manager_;
-  std::shared_ptr<ProfileDataCollector> collector_;
-  LoadParams load_parameters_;
-
-  bool include_lib_stats_;
-  bool include_server_stats_;
-  std::shared_ptr<MPIDriver> mpi_driver_;
-
-  /// The request records of the requests completed during all measurements
-  std::vector<RequestRecord> all_request_records_;
-
-  /// The end time of the previous measurement window
-  uint64_t previous_window_end_ns_;
-
-  /// Server side statistics from the previous measurement window
-  std::map<cb::ModelIdentifier, cb::ModelStatistics> prev_server_side_stats_;
-
-  /// Client side statistics from the previous measurement window
-  cb::InferStat prev_client_side_stats_;
-
-  /// Metrics manager that collects server-side metrics periodically
-  std::shared_ptr<MetricsManager> metrics_manager_{nullptr};
-
-  /// Whether server-side inference server metrics should be collected.
-  bool should_collect_metrics_{false};
-
-  /// User set threshold above which the PA overhead is too significant to
-  /// provide usable results.
-  const double overhead_pct_threshold_{0.0};
-
-  // Whether to collect profile data.
-  bool should_collect_profile_data_{false};
-
-  // Whether the client is operating in async mode.
-  const bool async_mode_{false};
-
-#ifndef DOCTEST_CONFIG_DISABLE
-  friend NaggyMockInferenceProfiler;
-  friend TestInferenceProfiler;
-  friend ModelParser;
-
- public:
-  InferenceProfiler() = default;
-#endif
-};
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/ischeduler.h b/src/c++/perf_analyzer/ischeduler.h
deleted file mode 100644
index a854b64b4..000000000
--- a/src/c++/perf_analyzer/ischeduler.h
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#pragma once
-
-#include "rate_schedule.h"
-
-namespace triton { namespace perfanalyzer {
-
-/// Interface for worker threads that use a schedule
-///
-class IScheduler {
- public:
-  /// Provides the schedule that should be followed
-  ///
-  virtual void SetSchedule(RateSchedulePtr_t schedule) = 0;
-};
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/iworker.h b/src/c++/perf_analyzer/iworker.h
deleted file mode 100644
index 3a72f4c10..000000000
--- a/src/c++/perf_analyzer/iworker.h
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#pragma once
-
-namespace triton { namespace perfanalyzer {
-
-/// Interface for worker threads that generate inference requests
-///
-class IWorker {
- public:
-  virtual void Infer() = 0;
-};
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/load_manager.cc b/src/c++/perf_analyzer/load_manager.cc
deleted file mode 100644
index 1f648a7f4..000000000
--- a/src/c++/perf_analyzer/load_manager.cc
+++ /dev/null
@@ -1,288 +0,0 @@
-// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "load_manager.h"
-
-#include <algorithm>
-
-#include "client_backend/client_backend.h"
-#include "infer_data_manager_factory.h"
-
-namespace triton { namespace perfanalyzer {
-
-
-cb::Error
-LoadManager::CheckHealth()
-{
-  // Check thread status to make sure that the load setting is
-  // consistent to the one being reported
-  // If some thread return early, main thread will return and
-  // the worker thread's error message will be reported
-  // when derived class destructor gets called.
-  for (auto& thread_stat : threads_stat_) {
-    if (!thread_stat->status_.IsOk()) {
-      return cb::Error(
-          "Failed to maintain requested inference load."
-          " Worker thread(s) failed to generate concurrent requests.",
-          pa::GENERIC_ERROR);
-    }
-    if (!thread_stat->cb_status_.IsOk()) {
-      return cb::Error(
-          "Failed to retrieve results from inference request.",
-          pa::GENERIC_ERROR);
-    }
-  }
-  return cb::Error::Success;
-}
-
-cb::Error
-LoadManager::SwapRequestRecords(std::vector<RequestRecord>& new_request_records)
-{
-  std::vector<RequestRecord> total_request_records;
-  // Gather request records with proper locking from all the worker threads
-  for (auto& thread_stat : threads_stat_) {
-    std::lock_guard<std::mutex> lock(thread_stat->mu_);
-    total_request_records.insert(
-        total_request_records.end(), thread_stat->request_records_.begin(),
-        thread_stat->request_records_.end());
-    thread_stat->request_records_.clear();
-  }
-  // Swap the results
-  total_request_records.swap(new_request_records);
-  return cb::Error::Success;
-}
-
-uint64_t
-LoadManager::CountCollectedRequests()
-{
-  uint64_t num_of_requests = 0;
-  for (auto& thread_stat : threads_stat_) {
-    std::lock_guard<std::mutex> lock(thread_stat->mu_);
-    num_of_requests += thread_stat->request_records_.size();
-  }
-  return num_of_requests;
-}
-
-cb::Error
-LoadManager::GetAccumulatedClientStat(cb::InferStat* contexts_stat)
-{
-  contexts_stat->completed_request_count = 0;
-  contexts_stat->cumulative_receive_time_ns = 0;
-  contexts_stat->cumulative_send_time_ns = 0;
-  contexts_stat->cumulative_total_request_time_ns = 0;
-
-  for (auto& thread_stat : threads_stat_) {
-    std::lock_guard<std::mutex> lock(thread_stat->mu_);
-    for (auto& context_stat : thread_stat->contexts_stat_) {
-      contexts_stat->completed_request_count +=
-          context_stat.completed_request_count;
-      contexts_stat->cumulative_total_request_time_ns +=
-          context_stat.cumulative_total_request_time_ns;
-      contexts_stat->cumulative_send_time_ns +=
-          context_stat.cumulative_send_time_ns;
-      contexts_stat->cumulative_receive_time_ns +=
-          context_stat.cumulative_receive_time_ns;
-    }
-  }
-  return cb::Error::Success;
-}
-
-uint64_t
-LoadManager::GetIdleTime()
-{
-  uint64_t total{0};
-  size_t num_active_threads = 0;
-  for (auto& thread_stat : threads_stat_) {
-    std::lock_guard<std::mutex> lock(thread_stat->mu_);
-    uint64_t idle_time = thread_stat->idle_timer.GetIdleTime();
-    if (idle_time) {
-      total += idle_time;
-      num_active_threads++;
-    }
-  }
-
-  // TODO REFACTOR TMA-1043 InferDataManager should have an API to get
-  // num_active_threads. This method of determining active threads isn't fully
-  // accurate
-  if (num_active_threads) {
-    total /= num_active_threads;
-  }
-
-  return total;
-}
-
-void
-LoadManager::ResetIdleTime()
-{
-  for (auto& thread_stat : threads_stat_) {
-    std::lock_guard<std::mutex> lock(thread_stat->mu_);
-    thread_stat->idle_timer.Reset();
-  }
-}
-
-const size_t
-LoadManager::GetAndResetNumSentRequests()
-{
-  size_t num_sent_requests{0};
-
-  for (auto& thread_stat : threads_stat_) {
-    num_sent_requests += thread_stat->num_sent_requests_;
-    thread_stat->num_sent_requests_ = 0;
-  }
-
-  return num_sent_requests;
-}
-
-LoadManager::LoadManager(
-    const bool async, const bool streaming, const int32_t batch_size,
-    const size_t max_threads, const SharedMemoryType shared_memory_type,
-    const size_t output_shm_size, const std::shared_ptr<ModelParser>& parser,
-    const std::shared_ptr<cb::ClientBackendFactory>& factory,
-    const std::unordered_map<std::string, cb::RequestParameter>&
-        request_parameters)
-    : async_(async), streaming_(streaming), batch_size_(batch_size),
-      max_threads_(max_threads), parser_(parser), factory_(factory),
-      using_json_data_(false)
-{
-  on_sequence_model_ =
-      ((parser_->SchedulerType() == ModelParser::SEQUENCE) ||
-       (parser_->SchedulerType() == ModelParser::ENSEMBLE_SEQUENCE));
-
-  data_loader_.reset(new DataLoader(batch_size_));
-
-  infer_data_manager_ = InferDataManagerFactory::CreateInferDataManager(
-      max_threads, batch_size, shared_memory_type, output_shm_size,
-      request_parameters, parser, factory, data_loader_);
-}
-
-void
-LoadManager::InitManager(
-    const size_t string_length, const std::string& string_data,
-    const bool zero_input, std::vector<std::string>& user_data,
-    const uint64_t start_sequence_id, const uint64_t sequence_id_range,
-    const size_t sequence_length, const bool sequence_length_specified,
-    const double sequence_length_variation)
-{
-  // Note, this is already caught by the CLI, but adding it here for extra
-  // protection
-  if (on_sequence_model_ && batch_size_ > 1) {
-    throw PerfAnalyzerException(
-        "error: sequence models do not support batching", GENERIC_ERROR);
-  }
-
-  auto status =
-      InitManagerInputs(string_length, string_data, zero_input, user_data);
-  THROW_IF_ERROR(status, "Failed to init manager inputs");
-
-  THROW_IF_ERROR(
-      infer_data_manager_->Init(), "Unable to init infer data manager");
-
-  sequence_manager_ = MakeSequenceManager(
-      start_sequence_id, sequence_id_range, sequence_length,
-      sequence_length_specified, sequence_length_variation, using_json_data_,
-      data_loader_);
-
-  InitManagerFinalize();
-}
-
-cb::Error
-LoadManager::InitManagerInputs(
-    const size_t string_length, const std::string& string_data,
-    const bool zero_input, std::vector<std::string>& user_data)
-{
-  RETURN_IF_ERROR(factory_->CreateClientBackend(&backend_));
-
-  // Read provided data
-  if (!user_data.empty()) {
-    if (IsDirectory(user_data[0])) {
-      RETURN_IF_ERROR(data_loader_->ValidateIOExistsInModel(
-          parser_->Inputs(), parser_->Outputs(), user_data[0]));
-      RETURN_IF_ERROR(data_loader_->ReadDataFromDir(
-          parser_->Inputs(), parser_->Outputs(), user_data[0]));
-    } else {
-      using_json_data_ = true;
-      for (const auto& json_file : user_data) {
-        RETURN_IF_ERROR(data_loader_->ReadDataFromJSON(
-            parser_->Inputs(), parser_->Outputs(), json_file));
-      }
-      std::cout << " Successfully read data for "
-                << data_loader_->GetDataStreamsCount() << " stream/streams";
-      if (data_loader_->GetDataStreamsCount() == 1) {
-        std::cout << " with " << data_loader_->GetTotalSteps(0)
-                  << " step/steps";
-      }
-      std::cout << "." << std::endl;
-    }
-  } else {
-    RETURN_IF_ERROR(data_loader_->GenerateData(
-        parser_->Inputs(), zero_input, string_length, string_data));
-  }
-
-  // Reserve the required vector space
-  threads_stat_.reserve(max_threads_);
-
-  return cb::Error::Success;
-}
-
-void
-LoadManager::StopWorkerThreads()
-{
-  early_exit = true;
-  // wake up all threads
-  wake_signal_.notify_all();
-
-  size_t cnt = 0;
-  for (auto& thread : threads_) {
-    thread.join();
-    if (!threads_stat_[cnt]->status_.IsOk()) {
-      std::cerr << "Thread [" << cnt
-                << "] had error: " << (threads_stat_[cnt]->status_)
-                << std::endl;
-    }
-    if (!threads_stat_[cnt]->cb_status_.IsOk()) {
-      std::cerr << "Thread [" << cnt
-                << "] had error: " << (threads_stat_[cnt]->cb_status_)
-                << std::endl;
-    }
-    cnt++;
-  }
-  threads_.clear();
-}
-
-std::shared_ptr<SequenceManager>
-LoadManager::MakeSequenceManager(
-    const uint64_t start_sequence_id, const uint64_t sequence_id_range,
-    const size_t sequence_length, const bool sequence_length_specified,
-    const double sequence_length_variation, const bool using_json_data,
-    std::shared_ptr<DataLoader> data_loader)
-{
-  return std::make_shared<SequenceManager>(
-      start_sequence_id, sequence_id_range, sequence_length,
-      sequence_length_specified, sequence_length_variation, using_json_data,
-      data_loader);
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/load_manager.h b/src/c++/perf_analyzer/load_manager.h
deleted file mode 100644
index 799bfa75f..000000000
--- a/src/c++/perf_analyzer/load_manager.h
+++ /dev/null
@@ -1,182 +0,0 @@
-// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <atomic>
-#include <condition_variable>
-#include <memory>
-#include <random>
-#include <thread>
-
-#include "client_backend/client_backend.h"
-#include "data_loader.h"
-#include "iinfer_data_manager.h"
-#include "load_worker.h"
-#include "perf_utils.h"
-#include "sequence_manager.h"
-
-namespace triton { namespace perfanalyzer {
-
-
-#ifndef DOCTEST_CONFIG_DISABLE
-class NaggyMockLoadManager;
-#endif
-
-class LoadManager {
- public:
-  virtual ~LoadManager() = default;
-
-  /// Initialize the Manager class to set up shared memory and inputs
-  /// \param string_length The length of the random strings to be generated
-  /// for string inputs.
-  /// \param string_data The string to be used as string inputs for model.
-  /// \param zero_input Whether to use zero for model inputs.
-  /// \param user_data The vector containing path/paths to user-provided data
-  /// that can be a directory or path to a json data file.
-  /// \param start_sequence_id The starting sequence ID to be used for iterating
-  /// through valid sequence IDs.
-  /// \param sequence_id_range The maximum sequence ID to be used for iterating
-  /// through valid sequence IDs.
-  /// \param sequence_length The base length of new sequences.
-  /// \param sequence_length_specified Whether the user specified the sequence
-  /// length.
-  /// \param sequence_length_variation The percentage variation in length of
-  /// sequences using autogenerated data as input.
-  void InitManager(
-      const size_t string_length, const std::string& string_data,
-      const bool zero_input, std::vector<std::string>& user_data,
-      const uint64_t start_sequence_id, const uint64_t sequence_id_range,
-      const size_t sequence_length, const bool sequence_length_specified,
-      const double sequence_length_variation);
-
-  /// Check if the load manager is working as expected.
-  /// \return cb::Error object indicating success or failure.
-  cb::Error CheckHealth();
-
-  /// Swap the content of the request records vector recorded by the load
-  /// manager with a new request records vector
-  /// \param new_request_records The request records vector to be swapped.
-  /// \return cb::Error object indicating success or failure.
-  cb::Error SwapRequestRecords(std::vector<RequestRecord>& new_request_records);
-
-  /// Get the sum of all contexts' stat
-  /// \param contexts_stat Returned the accumulated stat from all contexts
-  /// in load manager
-  cb::Error GetAccumulatedClientStat(cb::InferStat* contexts_stat);
-
-  /// Returns the amount of valid time each worker thread has averaged in
-  /// nanoseconds
-  ///
-  uint64_t GetIdleTime();
-
-  /// Resets the counter for tracking valid time
-  ///
-  void ResetIdleTime();
-
-  /// Calculates and returns the total number of sent requests across all
-  /// threads. Resets individual number of sent requests per thread.
-  /// \return The total number of sent requests across all threads.
-  const size_t GetAndResetNumSentRequests();
-
-  /// \return the batch size used for the inference requests
-  virtual size_t BatchSize() const { return batch_size_; }
-
-  /// Count the number of requests collected until now.
-  uint64_t CountCollectedRequests();
-
- protected:
-  LoadManager(
-      const bool async, const bool streaming, const int32_t batch_size,
-      const size_t max_threads, const SharedMemoryType shared_memory_type,
-      const size_t output_shm_size, const std::shared_ptr<ModelParser>& parser,
-      const std::shared_ptr<cb::ClientBackendFactory>& factory,
-      const std::unordered_map<std::string, cb::RequestParameter>&
-          request_parameters);
-
-  /// Complete any subclass-specific manager initialization tasks.
-  virtual void InitManagerFinalize() {}
-
-  /// Helper function to retrieve the input data for the inferences
-  /// \param string_length The length of the random strings to be generated
-  /// for string inputs.
-  /// \param string_data The string to be used as string inputs for model.
-  /// \param zero_input Whether to use zero for model inputs.
-  /// \param user_data The vector containing path/paths to user-provided data
-  /// that can be a directory or path to a json data file.
-  /// \return cb::Error object indicating success or failure.
-  cb::Error InitManagerInputs(
-      const size_t string_length, const std::string& string_data,
-      const bool zero_input, std::vector<std::string>& user_data);
-
-  /// Stops all the worker threads generating the request load.
-  void StopWorkerThreads();
-
- protected:
-  bool async_;
-  bool streaming_;
-  size_t batch_size_;
-  size_t max_threads_;
-  bool on_sequence_model_;
-
-  std::shared_ptr<ModelParser> parser_;
-  std::shared_ptr<cb::ClientBackendFactory> factory_;
-
-  bool using_json_data_;
-
-  std::shared_ptr<DataLoader> data_loader_;
-  std::unique_ptr<cb::ClientBackend> backend_;
-  std::shared_ptr<IInferDataManager> infer_data_manager_;
-
-  // Track the workers so they all go out of scope at the
-  // same time
-  std::vector<std::shared_ptr<IWorker>> workers_;
-
-  // Worker threads that loads the server with inferences
-  std::vector<std::thread> threads_;
-  // Contains the statistics on the current working threads
-  std::vector<std::shared_ptr<ThreadStat>> threads_stat_;
-
-  // Use condition variable to pause/continue worker threads
-  std::condition_variable wake_signal_;
-  std::mutex wake_mutex_;
-
-  std::shared_ptr<SequenceManager> sequence_manager_{nullptr};
-
-  virtual std::shared_ptr<SequenceManager> MakeSequenceManager(
-      const uint64_t start_sequence_id, const uint64_t sequence_id_range,
-      const size_t sequence_length, const bool sequence_length_specified,
-      const double sequence_length_variation, const bool using_json_data,
-      std::shared_ptr<DataLoader> data_loader);
-
-#ifndef DOCTEST_CONFIG_DISABLE
-  friend NaggyMockLoadManager;
-
- public:
-  LoadManager() = default;
-#endif
-};
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/load_worker.cc b/src/c++/perf_analyzer/load_worker.cc
deleted file mode 100644
index a32976c6a..000000000
--- a/src/c++/perf_analyzer/load_worker.cc
+++ /dev/null
@@ -1,132 +0,0 @@
-// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "load_worker.h"
-
-#include <algorithm>
-#include <thread>
-
-#include "client_backend/client_backend.h"
-#include "perf_utils.h"
-
-namespace triton { namespace perfanalyzer {
-
-bool
-LoadWorker::ShouldExit()
-{
-  bool bad_status =
-      !thread_stat_->cb_status_.IsOk() || !thread_stat_->status_.IsOk();
-
-  bool done_with_request_count =
-      thread_config_->num_requests_ != 0 &&
-      thread_stat_->num_sent_requests_ >= thread_config_->num_requests_;
-
-  return early_exit || bad_status || done_with_request_count;
-}
-
-bool
-LoadWorker::HandleExitConditions()
-{
-  if (ShouldExit()) {
-    CompleteOngoingSequences();
-    thread_stat_->idle_timer.Start();
-    WaitForOngoingRequests();
-    return true;
-  }
-  return false;
-}
-
-void
-LoadWorker::CompleteOngoingSequences()
-{
-  if (on_sequence_model_) {
-    for (size_t ctx_id = 0; ctx_id < ctxs_.size(); ++ctx_id) {
-      size_t seq_stat_index = GetSeqStatIndex(ctx_id);
-      ctxs_[ctx_id]->CompleteOngoingSequence(seq_stat_index);
-    }
-  }
-}
-
-void
-LoadWorker::WaitForOngoingRequests()
-{
-  while (GetNumOngoingRequests() != 0) {
-    std::this_thread::sleep_for(std::chrono::milliseconds(50));
-  }
-}
-
-uint
-LoadWorker::GetNumOngoingRequests()
-{
-  uint num = 0;
-  for (auto ctx : ctxs_) {
-    num += ctx->GetNumOngoingRequests();
-  }
-  return num;
-}
-
-void
-LoadWorker::CreateContext()
-{
-  auto ctx = CreateInferContext();
-  ctx->Init();
-  CreateContextFinalize(ctx);
-  ctxs_.push_back(ctx);
-}
-
-uint32_t
-LoadWorker::GetCtxId()
-{
-  std::lock_guard<std::mutex> lk(cb_mtx_);
-  return ctx_id_tracker_->Get();
-}
-
-
-void
-LoadWorker::RestoreFreeCtxId(uint32_t ctx_id)
-{
-  if (!async_) {
-    {
-      std::lock_guard<std::mutex> lock(cb_mtx_);
-      ctx_id_tracker_->Restore(ctx_id);
-    }
-  }
-}
-
-void
-LoadWorker::AsyncCallbackFinalize(uint32_t ctx_id)
-{
-  // avoid competition over 'cb_mtx_'
-  {
-    std::lock_guard<std::mutex> lk(cb_mtx_);
-    ctx_id_tracker_->Restore(ctx_id);
-    notified_ = true;
-  }
-
-  cb_cv_.notify_all();
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/load_worker.h b/src/c++/perf_analyzer/load_worker.h
deleted file mode 100644
index dd7e0297f..000000000
--- a/src/c++/perf_analyzer/load_worker.h
+++ /dev/null
@@ -1,159 +0,0 @@
-// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <condition_variable>
-#include <memory>
-#include <mutex>
-#include <queue>
-
-#include "ctx_id_tracker_factory.h"
-#include "data_loader.h"
-#include "infer_context.h"
-#include "iworker.h"
-#include "model_parser.h"
-#include "sequence_manager.h"
-#include "thread_config.h"
-
-namespace triton { namespace perfanalyzer {
-
-/// Abstract base class for worker threads
-///
-class LoadWorker : public IWorker {
- protected:
-  LoadWorker(
-      uint32_t id, std::shared_ptr<ThreadStat> thread_stat,
-      std::shared_ptr<ThreadConfig> thread_config,
-      const std::shared_ptr<ModelParser> parser,
-      std::shared_ptr<DataLoader> data_loader,
-      const std::shared_ptr<cb::ClientBackendFactory> factory,
-      const bool on_sequence_model, const bool async, const bool streaming,
-      const int32_t batch_size, const bool using_json_data,
-      std::condition_variable& wake_signal, std::mutex& wake_mutex,
-      bool& execute,
-      const std::shared_ptr<IInferDataManager>& infer_data_manager,
-      std::shared_ptr<SequenceManager> sequence_manager)
-      : id_(id), thread_stat_(thread_stat), thread_config_(thread_config),
-        parser_(parser), data_loader_(data_loader), factory_(factory),
-        on_sequence_model_(on_sequence_model), async_(async),
-        streaming_(streaming), batch_size_(batch_size),
-        using_json_data_(using_json_data), wake_signal_(wake_signal),
-        wake_mutex_(wake_mutex), execute_(execute),
-        infer_data_manager_(infer_data_manager),
-        sequence_manager_(sequence_manager)
-  {
-  }
-
-  virtual ~LoadWorker() = default;
-
- protected:
-  // Return the total number of async requests that have started and not
-  // finished
-  uint GetNumOngoingRequests();
-
-  void SendInferRequest(uint32_t ctx_id, bool delayed = false)
-  {
-    if (ShouldExit()) {
-      return;
-    }
-
-    if (on_sequence_model_) {
-      uint32_t seq_stat_index = GetSeqStatIndex(ctx_id);
-      ctxs_[ctx_id]->SendSequenceInferRequest(seq_stat_index, delayed);
-    } else {
-      ctxs_[ctx_id]->SendInferRequest(delayed);
-    }
-  }
-
-  virtual std::shared_ptr<InferContext> CreateInferContext()
-  {
-    return std::make_shared<InferContext>(
-        id_, ctxs_.size(), async_, streaming_, on_sequence_model_,
-        using_json_data_, batch_size_, thread_stat_, data_loader_, parser_,
-        factory_, execute_, infer_data_manager_, sequence_manager_);
-  }
-
-  // Create an inference context and add it to ctxs_
-  virtual void CreateContext();
-
-  // Any code that needs to execute after the Context has been created
-  virtual void CreateContextFinalize(std::shared_ptr<InferContext> ctx) = 0;
-
-  // Detect the cases where this thread needs to exit
-  bool ShouldExit();
-
-  // Detect and handle the case where this thread needs to exit
-  // Returns true if an exit condition was met
-  bool HandleExitConditions();
-  void CompleteOngoingSequences();
-  void WaitForOngoingRequests();
-
-  virtual uint32_t GetSeqStatIndex(uint32_t ctx_id) = 0;
-  uint32_t GetCtxId();
-  void RestoreFreeCtxId(uint32_t ctx_id);
-
-  void AsyncCallbackFinalize(uint32_t ctx_id);
-
-  uint32_t id_;
-
-  std::vector<std::shared_ptr<InferContext>> ctxs_;
-  std::shared_ptr<ICtxIdTracker> ctx_id_tracker_;
-
-  // Variables used to signal async request completion
-  bool notified_ = false;
-  std::mutex cb_mtx_;
-  std::condition_variable cb_cv_;
-
-  // TODO REFACTOR TMA-1017 is there a better way to do threading than to pass
-  // the same cv/mutex into every thread by reference? Used to wake up this
-  // thread if it has been put to sleep
-  std::condition_variable& wake_signal_;
-  std::mutex& wake_mutex_;
-
-  // TODO REFACTOR TMA-1017 is there a better way to communicate this than a
-  // shared bool reference? Used to pause execution of this thread
-  bool& execute_;
-
-  // Stats for this thread
-  std::shared_ptr<ThreadStat> thread_stat_;
-  // Configuration for this thread
-  std::shared_ptr<ThreadConfig> thread_config_;
-
-  std::shared_ptr<DataLoader> data_loader_;
-  const std::shared_ptr<ModelParser> parser_;
-  const std::shared_ptr<cb::ClientBackendFactory> factory_;
-  const std::shared_ptr<IInferDataManager> infer_data_manager_;
-
-  const bool on_sequence_model_;
-  const bool async_;
-  const bool streaming_;
-  const int32_t batch_size_;
-  const bool using_json_data_;
-
-  std::shared_ptr<SequenceManager> sequence_manager_{nullptr};
-};
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/main.cc b/src/c++/perf_analyzer/main.cc
deleted file mode 100644
index bf5176294..000000000
--- a/src/c++/perf_analyzer/main.cc
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "perf_analyzer.h"
-#include "perf_analyzer_exception.h"
-
-namespace pa = triton::perfanalyzer;
-
-int
-main(int argc, char* argv[])
-{
-  try {
-    triton::perfanalyzer::CLParser clp;
-    pa::PAParamsPtr params = clp.Parse(argc, argv);
-
-    PerfAnalyzer analyzer(params);
-    analyzer.Run();
-  }
-  catch (pa::PerfAnalyzerException& e) {
-    std::cerr << e.what() << std::endl;
-    return e.GetError();
-  }
-
-  return 0;
-}
diff --git a/src/c++/perf_analyzer/metrics.h b/src/c++/perf_analyzer/metrics.h
deleted file mode 100644
index 8fbb7584c..000000000
--- a/src/c++/perf_analyzer/metrics.h
+++ /dev/null
@@ -1,44 +0,0 @@
-// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <cstdint>
-#include <map>
-#include <string>
-
-namespace triton { namespace perfanalyzer {
-
-/// Struct that holds server-side metrics for the inference server.
-/// The keys for each map are GPU UUIDs and the values are described in the
-/// variable names.
-struct Metrics {
-  std::map<std::string, double> gpu_utilization_per_gpu{};
-  std::map<std::string, double> gpu_power_usage_per_gpu{};
-  std::map<std::string, uint64_t> gpu_memory_used_bytes_per_gpu{};
-  std::map<std::string, uint64_t> gpu_memory_total_bytes_per_gpu{};
-};
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/metrics_manager.cc b/src/c++/perf_analyzer/metrics_manager.cc
deleted file mode 100644
index 0e1262ce3..000000000
--- a/src/c++/perf_analyzer/metrics_manager.cc
+++ /dev/null
@@ -1,174 +0,0 @@
-// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "metrics_manager.h"
-
-#include <iostream>
-#include <stdexcept>
-#include <utility>
-
-#include "constants.h"
-#include "perf_analyzer_exception.h"
-
-namespace triton { namespace perfanalyzer {
-
-MetricsManager::MetricsManager(
-    std::shared_ptr<clientbackend::ClientBackend> client_backend,
-    uint64_t metrics_interval_ms)
-    : client_backend_(client_backend), metrics_interval_ms_(metrics_interval_ms)
-{
-}
-
-MetricsManager::~MetricsManager()
-{
-  if (query_loop_future_.valid()) {
-    StopQueryingMetrics();
-  }
-}
-
-void
-MetricsManager::StartQueryingMetrics()
-{
-  should_keep_querying_ = true;
-  query_loop_future_ =
-      std::async(&MetricsManager::QueryMetricsEveryNMilliseconds, this);
-}
-
-void
-MetricsManager::QueryMetricsEveryNMilliseconds()
-{
-  while (should_keep_querying_) {
-    const auto& start{std::chrono::system_clock::now()};
-
-    Metrics metrics{};
-    clientbackend::Error err{client_backend_->Metrics(metrics)};
-    if (err.IsOk() == false) {
-      throw PerfAnalyzerException(err.Message(), err.Err());
-    }
-
-    CheckForMissingMetrics(metrics);
-
-    {
-      std::lock_guard<std::mutex> metrics_lock{metrics_mutex_};
-      metrics_.push_back(std::move(metrics));
-    }
-
-    const auto& end{std::chrono::system_clock::now()};
-    const auto& duration{end - start};
-    const auto& remainder{
-        std::chrono::milliseconds(metrics_interval_ms_) - duration};
-
-    CheckForMetricIntervalTooShort(remainder, duration);
-
-    {
-      std::unique_lock<std::mutex> query_loop_lock{query_loop_mutex_};
-      query_loop_cv_.wait_for(query_loop_lock, remainder);
-    }
-  }
-}
-
-void
-MetricsManager::CheckForMissingMetrics(const Metrics& metrics)
-{
-  if (has_given_missing_metrics_warning_) {
-    return;
-  }
-  if (metrics.gpu_utilization_per_gpu.empty()) {
-    std::cerr << "WARNING: Unable to parse 'nv_gpu_utilization' metric."
-              << std::endl;
-    has_given_missing_metrics_warning_ = true;
-  }
-  if (metrics.gpu_power_usage_per_gpu.empty()) {
-    std::cerr << "WARNING: Unable to parse 'nv_gpu_power_usage' metric."
-              << std::endl;
-    has_given_missing_metrics_warning_ = true;
-  }
-  if (metrics.gpu_memory_used_bytes_per_gpu.empty()) {
-    std::cerr << "WARNING: Unable to parse 'nv_gpu_memory_used_bytes' metric."
-              << std::endl;
-    has_given_missing_metrics_warning_ = true;
-  }
-  if (metrics.gpu_memory_total_bytes_per_gpu.empty()) {
-    std::cerr << "WARNING: Unable to parse 'nv_gpu_memory_total_bytes' metric."
-              << std::endl;
-    has_given_missing_metrics_warning_ = true;
-  }
-}
-
-void
-MetricsManager::CheckForMetricIntervalTooShort(
-    const std::chrono::nanoseconds& remainder,
-    const std::chrono::nanoseconds& duration)
-{
-  if (has_given_metric_interval_warning_) {
-    return;
-  }
-  if (remainder < std::chrono::nanoseconds::zero()) {
-    std::cerr << "WARNING: Triton metrics endpoint latency ("
-              << std::chrono::duration_cast<std::chrono::milliseconds>(duration)
-                     .count()
-              << "ms) is larger than the querying interval ("
-              << metrics_interval_ms_
-              << "ms). Please try a larger querying interval "
-                 "via `--triton-metrics-interval`."
-              << std::endl;
-    has_given_metric_interval_warning_ = true;
-  }
-}
-
-void
-MetricsManager::CheckQueryingStatus()
-{
-  if (query_loop_future_.valid() &&
-      query_loop_future_.wait_for(std::chrono::seconds(0)) ==
-          std::future_status::ready) {
-    query_loop_future_.get();
-  }
-}
-
-void
-MetricsManager::GetLatestMetrics(std::vector<Metrics>& metrics)
-{
-  if (metrics.empty() == false) {
-    throw PerfAnalyzerException(
-        "MetricsManager::GetLatestMetrics() must be passed an empty vector.",
-        GENERIC_ERROR);
-  }
-  std::lock_guard<std::mutex> metrics_lock{metrics_mutex_};
-  metrics_.swap(metrics);
-}
-
-void
-MetricsManager::StopQueryingMetrics()
-{
-  should_keep_querying_ = false;
-  query_loop_cv_.notify_one();
-  if (query_loop_future_.valid()) {
-    query_loop_future_.get();
-  }
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/metrics_manager.h b/src/c++/perf_analyzer/metrics_manager.h
deleted file mode 100644
index ae6b6135f..000000000
--- a/src/c++/perf_analyzer/metrics_manager.h
+++ /dev/null
@@ -1,94 +0,0 @@
-// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <chrono>
-#include <condition_variable>
-#include <cstdint>
-#include <future>
-#include <memory>
-#include <mutex>
-#include <vector>
-
-#include "client_backend/client_backend.h"
-#include "metrics.h"
-
-namespace triton { namespace perfanalyzer {
-
-#ifndef DOCTEST_CONFIG_DISABLE
-class TestMetricsManager;
-#endif
-
-class MetricsManager {
- public:
-  MetricsManager(
-      std::shared_ptr<clientbackend::ClientBackend> client_backend,
-      uint64_t metrics_interval_ms);
-
-  /// Ends the background thread, redundant in case StopQueryingMetrics() isn't
-  /// called
-  ~MetricsManager();
-
-  /// Starts background thread that queries metrics on an interval
-  void StartQueryingMetrics();
-
-  /// Checks if background thread threw exception and propagates it if so
-  void CheckQueryingStatus();
-
-  /// Puts the latest-collected metrics from background thread into vector
-  /// output parameter to be used by main thread
-  void GetLatestMetrics(std::vector<Metrics>& metrics_per_timestamp);
-
-  /// Ends the background thread
-  void StopQueryingMetrics();
-
- private:
-  void QueryMetricsEveryNMilliseconds();
-  void CheckForMissingMetrics(const Metrics& metrics);
-  void CheckForMetricIntervalTooShort(
-      const std::chrono::nanoseconds& remainder,
-      const std::chrono::nanoseconds& duration);
-
-  std::shared_ptr<clientbackend::ClientBackend> client_backend_{nullptr};
-  uint64_t metrics_interval_ms_{0};
-  std::mutex metrics_mutex_{};
-  std::vector<Metrics> metrics_{};
-  bool should_keep_querying_{false};
-  std::future<void> query_loop_future_{};
-  std::mutex query_loop_mutex_{};
-  std::condition_variable query_loop_cv_{};
-  bool has_given_missing_metrics_warning_{false};
-  bool has_given_metric_interval_warning_{false};
-
-#ifndef DOCTEST_CONFIG_DISABLE
-  friend TestMetricsManager;
-
- public:
-  MetricsManager() = default;
-#endif
-};
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/mock_concurrency_worker.h b/src/c++/perf_analyzer/mock_concurrency_worker.h
deleted file mode 100644
index 636b92743..000000000
--- a/src/c++/perf_analyzer/mock_concurrency_worker.h
+++ /dev/null
@@ -1,69 +0,0 @@
-// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#pragma once
-
-#include "concurrency_worker.h"
-#include "gmock/gmock.h"
-
-namespace triton { namespace perfanalyzer {
-
-class NaggyMockConcurrencyWorker : public ConcurrencyWorker {
- public:
-  NaggyMockConcurrencyWorker(
-      uint32_t id, std::shared_ptr<ThreadStat> thread_stat,
-      std::shared_ptr<ThreadConfig> thread_config,
-      const std::shared_ptr<ModelParser> parser,
-      std::shared_ptr<DataLoader> data_loader,
-      const std::shared_ptr<cb::ClientBackendFactory> factory,
-      const bool on_sequence_model, const bool async,
-      const size_t max_concurrency, const bool using_json_data,
-      const bool streaming, const int32_t batch_size,
-      std::condition_variable& wake_signal, std::mutex& wake_mutex,
-      size_t& active_threads, bool& execute,
-      const std::shared_ptr<IInferDataManager>& infer_data_manager,
-      std::shared_ptr<SequenceManager> sequence_manager)
-      : ConcurrencyWorker(
-            id, thread_stat, thread_config, parser, data_loader, factory,
-            on_sequence_model, async, max_concurrency, using_json_data,
-            streaming, batch_size, wake_signal, wake_mutex, active_threads,
-            execute, infer_data_manager, sequence_manager)
-  {
-    ON_CALL(*this, Infer()).WillByDefault([this]() -> void {
-      ConcurrencyWorker::Infer();
-    });
-  }
-
-  MOCK_METHOD(void, Infer, (), (override));
-
-  void EmptyInfer() { thread_config_->is_paused_ = true; }
-};
-
-// Non-naggy version of Mock (won't warn when using default gmock
-// mocked function)
-using MockConcurrencyWorker = testing::NiceMock<NaggyMockConcurrencyWorker>;
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/mock_data_loader.h b/src/c++/perf_analyzer/mock_data_loader.h
deleted file mode 100644
index 0eccdabff..000000000
--- a/src/c++/perf_analyzer/mock_data_loader.h
+++ /dev/null
@@ -1,98 +0,0 @@
-// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "data_loader.h"
-#include "gmock/gmock.h"
-
-namespace triton { namespace perfanalyzer {
-
-/// Mock DataLoader class used for testing to allow JSON data to be read
-/// from string, rather than file.
-///
-class NaggyMockDataLoader : public DataLoader {
- public:
-  NaggyMockDataLoader() { SetupMocks(); }
-  NaggyMockDataLoader(size_t batch_size) : DataLoader(batch_size)
-  {
-    SetupMocks();
-  }
-
-  void SetupMocks()
-  {
-    ON_CALL(*this, GetTotalSteps(testing::_))
-        .WillByDefault([this](size_t stream_id) -> size_t {
-          return this->DataLoader::GetTotalSteps(stream_id);
-        });
-    ON_CALL(*this, ReadFile(testing::_, testing::_))
-        .WillByDefault(
-            [this](
-                const std::string& path,
-                std::vector<char>* contents) -> cb::Error {
-              return this->DataLoader::ReadFile(path, contents);
-            });
-    ON_CALL(*this, ReadTextFile(testing::_, testing::_))
-        .WillByDefault(
-            [this](
-                const std::string& path,
-                std::vector<std::string>* contents) -> cb::Error {
-              return this->DataLoader::ReadTextFile(path, contents);
-            });
-  }
-
-  MOCK_METHOD(size_t, GetTotalSteps, (size_t), (override));
-  MOCK_METHOD(cb::Error, ReadFile, (const std::string&, std::vector<char>*));
-  MOCK_METHOD(
-      cb::Error, ReadTextFile, (const std::string&, std::vector<std::string>*));
-
-  cb::Error ReadDataFromJSON(
-      const std::shared_ptr<ModelTensorMap>& inputs,
-      const std::shared_ptr<ModelTensorMap>& outputs,
-      const std::string& json_file) override
-  {
-    return ReadDataFromStr(json_file, inputs, outputs);
-  }
-
-  cb::Error ReadDataFromStr(
-      const std::string& str, const std::shared_ptr<ModelTensorMap>& inputs,
-      const std::shared_ptr<ModelTensorMap>& outputs)
-  {
-    rapidjson::Document d{};
-    const unsigned int parseFlags = rapidjson::kParseNanAndInfFlag;
-    d.Parse<parseFlags>(str.c_str());
-
-    return ParseData(d, inputs, outputs);
-  };
-
-  std::vector<size_t>& step_num_{DataLoader::step_num_};
-  size_t& data_stream_cnt_{DataLoader::data_stream_cnt_};
-};
-
-// Non-naggy version of Mock Data Loader (won't warn when using default gmock
-// mocked function)
-using MockDataLoader = testing::NiceMock<NaggyMockDataLoader>;
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/mock_infer_context.h b/src/c++/perf_analyzer/mock_infer_context.h
deleted file mode 100644
index e1c15d03c..000000000
--- a/src/c++/perf_analyzer/mock_infer_context.h
+++ /dev/null
@@ -1,69 +0,0 @@
-// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "gmock/gmock.h"
-#include "infer_context.h"
-
-namespace triton { namespace perfanalyzer {
-
-class NaggyMockInferContext : public InferContext {
- public:
-  NaggyMockInferContext()
-  {
-    ON_CALL(*this, SendRequest(testing::_, testing::_, testing::_))
-        .WillByDefault(
-            [this](
-                const uint64_t request_id, const bool delayed,
-                const uint64_t sequence_id) -> void {
-              this->InferContext::SendRequest(request_id, delayed, sequence_id);
-            });
-  }
-
-  MOCK_METHOD(
-      void, SendRequest, (const uint64_t, const bool, const uint64_t),
-      (override));
-
-  std::shared_ptr<SequenceManager>& sequence_manager_{
-      InferContext::sequence_manager_};
-  std::shared_ptr<DataLoader>& data_loader_{InferContext::data_loader_};
-  std::shared_ptr<IInferDataManager>& infer_data_manager_{
-      InferContext::infer_data_manager_};
-  std::shared_ptr<ThreadStat>& thread_stat_{InferContext::thread_stat_};
-  std::reference_wrapper<const bool>& execute_{InferContext::execute_};
-  bool& using_json_data_{InferContext::using_json_data_};
-  bool& async_{InferContext::async_};
-  bool& streaming_{InferContext::streaming_};
-  InferData& infer_data_{InferContext::infer_data_};
-  std::unique_ptr<cb::ClientBackend>& infer_backend_{
-      InferContext::infer_backend_};
-  std::function<void(cb::InferResult*)>& async_callback_func_{
-      InferContext::async_callback_func_};
-};
-
-using MockInferContext = testing::NiceMock<NaggyMockInferContext>;
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/mock_infer_data_manager.h b/src/c++/perf_analyzer/mock_infer_data_manager.h
deleted file mode 100644
index 8f9cd7ec0..000000000
--- a/src/c++/perf_analyzer/mock_infer_data_manager.h
+++ /dev/null
@@ -1,150 +0,0 @@
-// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "gmock/gmock.h"
-#include "infer_data_manager.h"
-#include "infer_data_manager_shm.h"
-#include "mock_client_backend.h"
-
-namespace triton { namespace perfanalyzer {
-
-
-class MockInferDataManagerShm : public InferDataManagerShm {
- public:
-  MockInferDataManagerShm(
-      const int32_t batch_size, const SharedMemoryType shared_memory_type,
-      const size_t output_shm_size,
-      std::unordered_map<std::string, clientbackend::RequestParameter>
-          request_parameters,
-      const std::shared_ptr<ModelParser>& parser,
-      const std::shared_ptr<cb::ClientBackendFactory>& factory,
-      const std::shared_ptr<DataLoader>& data_loader)
-      : InferDataManagerShm(
-            batch_size, shared_memory_type, output_shm_size, request_parameters,
-            parser, factory, data_loader)
-  {
-  }
-
-  // Mocked version of the CopySharedMemory method in loadmanager.
-  // Tracks the mapping of shared memory label to data
-  //
-  cb::Error CopySharedMemory(
-      uint8_t* input_shm_ptr, const std::vector<TensorData>& input_datas,
-      bool is_shape_tensor, std::string& region_name) override
-  {
-    std::vector<int32_t> vals;
-
-    for (size_t i = 0; i < input_datas.size(); i++) {
-      int32_t val = *reinterpret_cast<const int32_t*>(input_datas[i].data_ptr);
-      vals.push_back(val);
-    }
-    mocked_shared_memory_regions.insert(std::make_pair(region_name, vals));
-    return cb::Error::Success;
-  }
-
-  cb::Error CreateInferInput(
-      cb::InferInput** infer_input, const cb::BackendKind kind,
-      const std::string& name, const std::vector<int64_t>& dims,
-      const std::string& datatype) override
-  {
-    *infer_input = new cb::MockInferInput(kind, name, dims, datatype);
-    return cb::Error::Success;
-  }
-
-  // Tracks the mapping of shared memory label to data
-  std::map<std::string, std::vector<int32_t>> mocked_shared_memory_regions;
-};
-
-
-class MockInferDataManager : public InferDataManager {
- public:
-  MockInferDataManager() { SetupMocks(); }
-
-  MockInferDataManager(
-      const size_t max_threads, const int32_t batch_size,
-      std::unordered_map<std::string, clientbackend::RequestParameter>
-          request_parameters,
-      const std::shared_ptr<ModelParser>& parser,
-      const std::shared_ptr<cb::ClientBackendFactory>& factory,
-      const std::shared_ptr<DataLoader>& data_loader)
-      : InferDataManager(
-            max_threads, batch_size, request_parameters, parser, factory,
-            data_loader)
-  {
-    SetupMocks();
-  }
-
-  void SetupMocks()
-  {
-    ON_CALL(
-        *this, UpdateInferData(testing::_, testing::_, testing::_, testing::_))
-        .WillByDefault(
-            [this](
-                size_t thread_id, int stream_index, int step_index,
-                InferData& infer_data) -> cb::Error {
-              return this->InferDataManager::UpdateInferData(
-                  thread_id, stream_index, step_index, infer_data);
-            });
-  }
-
-  MOCK_METHOD(
-      cb::Error, UpdateInferData, (size_t, int, int, InferData&), (override));
-
-  cb::Error CreateInferInput(
-      cb::InferInput** infer_input, const cb::BackendKind kind,
-      const std::string& name, const std::vector<int64_t>& dims,
-      const std::string& datatype) override
-  {
-    *infer_input = new cb::MockInferInput(kind, name, dims, datatype);
-    return cb::Error::Success;
-  }
-};
-
-class MockInferDataManagerFactory {
- public:
-  static std::shared_ptr<IInferDataManager> CreateMockInferDataManager(
-      const size_t max_threads, const int32_t batch_size,
-      const SharedMemoryType shared_memory_type, const size_t output_shm_size,
-      std::unordered_map<std::string, clientbackend::RequestParameter>
-          request_parameters,
-      const std::shared_ptr<ModelParser>& parser,
-      const std::shared_ptr<cb::ClientBackendFactory>& factory,
-      const std::shared_ptr<DataLoader>& data_loader)
-  {
-    if (shared_memory_type == SharedMemoryType::NO_SHARED_MEMORY) {
-      return std::make_shared<testing::NiceMock<MockInferDataManager>>(
-          max_threads, batch_size, request_parameters, parser, factory,
-          data_loader);
-    } else {
-      return std::make_shared<testing::NiceMock<MockInferDataManagerShm>>(
-          batch_size, shared_memory_type, output_shm_size, request_parameters,
-          parser, factory, data_loader);
-    }
-  }
-};
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/mock_inference_profiler.h b/src/c++/perf_analyzer/mock_inference_profiler.h
deleted file mode 100644
index 7e08e489b..000000000
--- a/src/c++/perf_analyzer/mock_inference_profiler.h
+++ /dev/null
@@ -1,123 +0,0 @@
-// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "gmock/gmock.h"
-#include "inference_profiler.h"
-
-namespace triton { namespace perfanalyzer {
-
-class NaggyMockInferenceProfiler : public InferenceProfiler {
- public:
-  NaggyMockInferenceProfiler()
-  {
-    ON_CALL(
-        *this, ValidLatencyMeasurement(
-                   testing::_, testing::_, testing::_, testing::_, testing::_,
-                   testing::_))
-        .WillByDefault(
-            [this](
-                const std::pair<uint64_t, uint64_t>& valid_range,
-                size_t& valid_sequence_count, size_t& delayed_request_count,
-                std::vector<uint64_t>* latencies, size_t& response_count,
-                std::vector<RequestRecord>& valid_requests) -> void {
-              this->InferenceProfiler::ValidLatencyMeasurement(
-                  valid_range, valid_sequence_count, delayed_request_count,
-                  latencies, response_count, valid_requests);
-            });
-    ON_CALL(*this, SummarizeLatency(testing::_, testing::_))
-        .WillByDefault(
-            [this](
-                const std::vector<uint64_t>& latencies,
-                PerfStatus& summary) -> cb::Error {
-              return this->InferenceProfiler::SummarizeLatency(
-                  latencies, summary);
-            });
-    ON_CALL(*this, MergePerfStatusReports(testing::_, testing::_))
-        .WillByDefault(
-            [this](
-                std::deque<PerfStatus>& perf_status,
-                PerfStatus& summary_status) -> cb::Error {
-              return this->InferenceProfiler::MergePerfStatusReports(
-                  perf_status, summary_status);
-            });
-    ON_CALL(*this, MergeServerSideStats(testing::_, testing::_))
-        .WillByDefault(
-            [this](
-                std::vector<ServerSideStats>& server_side_stats,
-                ServerSideStats& server_side_summary) -> cb::Error {
-              return this->InferenceProfiler::MergeServerSideStats(
-                  server_side_stats, server_side_summary);
-            });
-    ON_CALL(
-        *this, SummarizeClientStat(
-                   testing::_, testing::_, testing::_, testing::_, testing::_,
-                   testing::_, testing::_, testing::_))
-        .WillByDefault(
-            [this](
-                const cb::InferStat& start_stat, const cb::InferStat& end_stat,
-                const uint64_t duration_ns, const size_t valid_request_count,
-                const size_t delayed_request_count,
-                const size_t valid_sequence_count, const size_t response_count,
-                PerfStatus& summary) -> cb::Error {
-              return this->InferenceProfiler::SummarizeClientStat(
-                  start_stat, end_stat, duration_ns, valid_request_count,
-                  delayed_request_count, valid_sequence_count, response_count,
-                  summary);
-            });
-  };
-
-  MOCK_METHOD0(IncludeServerStats, bool());
-  MOCK_METHOD(
-      void, ValidLatencyMeasurement,
-      ((const std::pair<uint64_t, uint64_t>&), size_t&, size_t&,
-       std::vector<uint64_t>*, size_t&, std::vector<RequestRecord>&),
-      (override));
-  MOCK_METHOD(
-      cb::Error, SummarizeLatency, (const std::vector<uint64_t>&, PerfStatus&),
-      (override));
-  MOCK_METHOD(
-      cb::Error, MergePerfStatusReports, (std::deque<PerfStatus>&, PerfStatus&),
-      (override));
-  MOCK_METHOD(
-      cb::Error, MergeServerSideStats,
-      (std::vector<ServerSideStats>&, ServerSideStats&), (override));
-  MOCK_METHOD(
-      cb::Error, SummarizeClientStat,
-      (const cb::InferStat&, const cb::InferStat&, const uint64_t, const size_t,
-       const size_t, const size_t, const size_t, PerfStatus&),
-      (override));
-
-  std::shared_ptr<ModelParser>& parser_{InferenceProfiler::parser_};
-  std::unique_ptr<LoadManager>& manager_{InferenceProfiler::manager_};
-  bool& include_lib_stats_{InferenceProfiler::include_lib_stats_};
-  std::vector<RequestRecord>& all_request_records_{
-      InferenceProfiler::all_request_records_};
-};
-
-using MockInferenceProfiler = testing::NiceMock<NaggyMockInferenceProfiler>;
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/mock_load_manager.h b/src/c++/perf_analyzer/mock_load_manager.h
deleted file mode 100644
index 2088a4053..000000000
--- a/src/c++/perf_analyzer/mock_load_manager.h
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright 2023 (c), NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "gmock/gmock.h"
-#include "load_manager.h"
-
-namespace triton { namespace perfanalyzer {
-
-class NaggyMockLoadManager : public LoadManager {};
-
-using MockLoadManager = testing::NiceMock<NaggyMockLoadManager>;
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/mock_model_parser.h b/src/c++/perf_analyzer/mock_model_parser.h
deleted file mode 100644
index 72222a826..000000000
--- a/src/c++/perf_analyzer/mock_model_parser.h
+++ /dev/null
@@ -1,78 +0,0 @@
-// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#pragma once
-
-#include "model_parser.h"
-
-namespace triton { namespace perfanalyzer {
-
-class MockModelParser : public ModelParser {
- public:
-  MockModelParser() : ModelParser(clientbackend::BackendKind::TRITON) {}
-
-  MockModelParser(
-      bool is_sequence_model, bool is_decoupled_model,
-      size_t max_batch_size = 64)
-      : ModelParser(clientbackend::BackendKind::TRITON)
-  {
-    if (is_sequence_model) {
-      scheduler_type_ = ModelParser::SEQUENCE;
-    }
-    is_decoupled_ = is_decoupled_model;
-    max_batch_size_ = max_batch_size;
-  }
-
-  // Expose private function
-  cb::Error GetInt(const rapidjson::Value& value, int64_t* integer_value)
-  {
-    return ModelParser::GetInt(value, integer_value);
-  }
-
-  // Expose private function
-  cb::Error DetermineComposingModelMap(
-      const std::vector<cb::ModelIdentifier>& bls_composing_models,
-      const rapidjson::Document& config,
-      std::unique_ptr<cb::ClientBackend>& backend)
-  {
-    return ModelParser::DetermineComposingModelMap(
-        bls_composing_models, config, backend);
-  }
-
-  // Expose private function
-  cb::Error DetermineSchedulerType(
-      const rapidjson::Document& config,
-      std::unique_ptr<cb::ClientBackend>& backend)
-  {
-    return ModelParser::DetermineSchedulerType(config, backend);
-  }
-
-  std::shared_ptr<ComposingModelMap>& composing_models_map_{
-      ModelParser::composing_models_map_};
-  std::shared_ptr<ModelTensorMap>& inputs_{ModelParser::inputs_};
-};
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/mock_profile_data_collector.h b/src/c++/perf_analyzer/mock_profile_data_collector.h
deleted file mode 100644
index 94467892d..000000000
--- a/src/c++/perf_analyzer/mock_profile_data_collector.h
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "gmock/gmock.h"
-#include "profile_data_collector.h"
-
-namespace triton { namespace perfanalyzer {
-
-class NaggyMockProfileDataCollector : public ProfileDataCollector {
- public:
-  NaggyMockProfileDataCollector()
-  {
-    ON_CALL(*this, FindExperiment(testing::_))
-        .WillByDefault(
-            [this](InferenceLoadMode& id) -> std::vector<Experiment>::iterator {
-              return this->ProfileDataCollector::FindExperiment(id);
-            });
-  }
-
-  MOCK_METHOD(
-      std::vector<Experiment>::iterator, FindExperiment, (InferenceLoadMode&),
-      (override));
-
-  std::vector<Experiment>& experiments_{ProfileDataCollector::experiments_};
-};
-
-using MockProfileDataCollector =
-    testing::NiceMock<NaggyMockProfileDataCollector>;
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/mock_profile_data_exporter.h b/src/c++/perf_analyzer/mock_profile_data_exporter.h
deleted file mode 100644
index 90e96d736..000000000
--- a/src/c++/perf_analyzer/mock_profile_data_exporter.h
+++ /dev/null
@@ -1,95 +0,0 @@
-// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS"" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "gmock/gmock.h"
-#include "profile_data_exporter.h"
-
-namespace triton { namespace perfanalyzer {
-
-class NaggyMockProfileDataExporter : public ProfileDataExporter {
- public:
-  NaggyMockProfileDataExporter()
-  {
-    ON_CALL(
-        *this, ConvertToJson(testing::_, testing::_, testing::_, testing::_))
-        .WillByDefault(
-            [this](
-                const std::vector<Experiment>& raw_experiments,
-                std::string& raw_version, cb::BackendKind& service_kind,
-                std::string& endpoint) -> void {
-              return this->ProfileDataExporter::ConvertToJson(
-                  raw_experiments, raw_version, service_kind, endpoint);
-            });
-
-    ON_CALL(*this, OutputToFile(testing::_))
-        .WillByDefault([this](std::string& file_path) -> void {
-          this->ProfileDataExporter::OutputToFile(file_path);
-        });
-
-    ON_CALL(*this, AddExperiment(testing::_, testing::_, testing::_))
-        .WillByDefault(
-            [this](
-                rapidjson::Value& entry, rapidjson::Value& experiment,
-                const Experiment& raw_experiment) -> void {
-              this->ProfileDataExporter::AddExperiment(
-                  entry, experiment, raw_experiment);
-            });
-
-    ON_CALL(*this, AddServiceKind(testing::_))
-        .WillByDefault([this](cb::BackendKind& service_kind) -> void {
-          this->ProfileDataExporter::AddServiceKind(service_kind);
-        });
-
-    ON_CALL(*this, AddEndpoint(testing::_))
-        .WillByDefault([this](std::string& endpoint) -> void {
-          this->ProfileDataExporter::AddEndpoint(endpoint);
-        });
-
-    ON_CALL(*this, ClearDocument()).WillByDefault([this]() -> void {
-      this->ProfileDataExporter::ClearDocument();
-    });
-  }
-
-  MOCK_METHOD(
-      void, ConvertToJson,
-      (const std::vector<Experiment>&, std::string&, cb::BackendKind&,
-       std::string&),
-      (override));
-  MOCK_METHOD(
-      void, AddExperiment,
-      (rapidjson::Value&, rapidjson::Value&, const Experiment&), (override));
-  MOCK_METHOD(void, OutputToFile, (std::string&), (override));
-  MOCK_METHOD(void, AddServiceKind, (cb::BackendKind&));
-  MOCK_METHOD(void, AddEndpoint, (std::string&));
-  MOCK_METHOD(void, ClearDocument, ());
-
-  rapidjson::Document& document_{ProfileDataExporter::document_};
-};
-
-using MockProfileDataExporter = testing::NiceMock<NaggyMockProfileDataExporter>;
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/mock_request_rate_worker.h b/src/c++/perf_analyzer/mock_request_rate_worker.h
deleted file mode 100644
index 0132a9a0b..000000000
--- a/src/c++/perf_analyzer/mock_request_rate_worker.h
+++ /dev/null
@@ -1,79 +0,0 @@
-// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#pragma once
-
-#include "gmock/gmock.h"
-#include "request_rate_worker.h"
-
-namespace triton { namespace perfanalyzer {
-
-class NaggyMockRequestRateWorker : public RequestRateWorker {
- public:
-  NaggyMockRequestRateWorker(
-      uint32_t id, std::shared_ptr<ThreadStat> thread_stat,
-      std::shared_ptr<ThreadConfig> thread_config,
-      const std::shared_ptr<ModelParser> parser,
-      std::shared_ptr<DataLoader> data_loader,
-      const std::shared_ptr<cb::ClientBackendFactory> factory,
-      const bool on_sequence_model, const bool async, const size_t max_threads,
-      const bool using_json_data, const bool streaming,
-      const int32_t batch_size, std::condition_variable& wake_signal,
-      std::mutex& wake_mutex, bool& execute,
-      std::chrono::steady_clock::time_point& start_time,
-      const bool serial_sequences,
-      const std::shared_ptr<IInferDataManager>& infer_data_manager,
-      std::shared_ptr<SequenceManager> sequence_manager)
-      : RequestRateWorker(
-            id, thread_stat, thread_config, parser, data_loader, factory,
-            on_sequence_model, async, max_threads, using_json_data, streaming,
-            batch_size, wake_signal, wake_mutex, execute, start_time,
-            serial_sequences, infer_data_manager, sequence_manager)
-  {
-    ON_CALL(*this, Infer()).WillByDefault([this]() -> void {
-      RequestRateWorker::Infer();
-    });
-  }
-
-  MOCK_METHOD(void, Infer, (), (override));
-
-  void CreateContext() override { RequestRateWorker::CreateContext(); }
-
-  void SendInferRequest()
-  {
-    if (thread_stat_->status_.IsOk()) {
-      LoadWorker::SendInferRequest(0, false);
-    }
-  }
-
-  void EmptyInfer() { thread_config_->is_paused_ = true; }
-};
-
-// Non-naggy version of Mock (won't warn when using default gmock
-// mocked function)
-using MockRequestRateWorker = testing::NiceMock<NaggyMockRequestRateWorker>;
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/mock_sequence_manager.h b/src/c++/perf_analyzer/mock_sequence_manager.h
deleted file mode 100644
index 522079c13..000000000
--- a/src/c++/perf_analyzer/mock_sequence_manager.h
+++ /dev/null
@@ -1,91 +0,0 @@
-// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "gmock/gmock.h"
-#include "sequence_manager.h"
-
-namespace triton { namespace perfanalyzer {
-
-class NaggyMockSequenceManager : public SequenceManager {
- public:
-  NaggyMockSequenceManager() { SetupMocks(); }
-
-  NaggyMockSequenceManager(
-      const uint64_t start_sequence_id, const uint64_t sequence_id_range,
-      const size_t sequence_length, const bool sequence_length_specified,
-      const double sequence_length_variation, const bool using_json_data,
-      std::shared_ptr<DataLoader> data_loader)
-      : SequenceManager(
-            start_sequence_id, sequence_id_range, sequence_length,
-            sequence_length_specified, sequence_length_variation,
-            using_json_data, data_loader)
-  {
-    SetupMocks();
-  }
-
-  void SetupMocks()
-  {
-    ON_CALL(*this, SetInferSequenceOptions(testing::_, testing::_))
-        .WillByDefault([this](
-                           const uint32_t seq_stat_index,
-                           std::unique_ptr<cb::InferOptions>& options) {
-          this->SequenceManager::SetInferSequenceOptions(
-              seq_stat_index, options);
-        });
-    ON_CALL(*this, InitNewSequence(testing::_))
-        .WillByDefault([this](int seq_stat_index) {
-          this->SequenceManager::InitNewSequence(seq_stat_index);
-        });
-    ON_CALL(*this, GetNextSeqId(testing::_))
-        .WillByDefault([this](int seq_stat_index) -> uint64_t {
-          return this->SequenceManager::GetNextSeqId(seq_stat_index);
-        });
-    ON_CALL(*this, GetRandomSequenceLength(testing::_))
-        .WillByDefault([this](double offset_ratio) -> size_t {
-          return this->SequenceManager::GetRandomSequenceLength(offset_ratio);
-        });
-    ON_CALL(*this, GetNewDataStreamId()).WillByDefault([this]() -> size_t {
-      return this->SequenceManager::GetNewDataStreamId();
-    });
-  }
-
-  MOCK_METHOD(
-      void, SetInferSequenceOptions,
-      (const uint32_t, std::unique_ptr<cb::InferOptions>&), (override));
-  MOCK_METHOD(void, InitNewSequence, (int), (override));
-  MOCK_METHOD(uint64_t, GetNextSeqId, (int), (override));
-  MOCK_METHOD(size_t, GetRandomSequenceLength, (double), (override));
-  MOCK_METHOD(uint64_t, GetNewDataStreamId, (), (override));
-
-  std::vector<std::shared_ptr<SequenceStatus>>& sequence_statuses_{
-      SequenceManager::sequence_statuses_};
-  std::atomic<uint64_t>& curr_seq_id_{SequenceManager::curr_seq_id_};
-};
-
-using MockSequenceManager = testing::NiceMock<NaggyMockSequenceManager>;
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/model_parser.cc b/src/c++/perf_analyzer/model_parser.cc
deleted file mode 100644
index 8ffea56da..000000000
--- a/src/c++/perf_analyzer/model_parser.cc
+++ /dev/null
@@ -1,467 +0,0 @@
-// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "model_parser.h"
-
-#include "rapidjson/writer.h"
-
-namespace triton { namespace perfanalyzer {
-
-cb::Error
-ModelParser::InitTriton(
-    const rapidjson::Document& metadata, const rapidjson::Document& config,
-    const std::string& model_version,
-    const std::vector<cb::ModelIdentifier>& bls_composing_models,
-    const std::unordered_map<std::string, std::vector<int64_t>>& input_shapes,
-    std::unique_ptr<cb::ClientBackend>& backend)
-{
-  model_name_ = metadata["name"].GetString();
-  model_version_ = model_version;
-
-  RETURN_IF_ERROR(
-      DetermineComposingModelMap(bls_composing_models, config, backend));
-
-  RETURN_IF_ERROR(DetermineSchedulerType(config, backend));
-
-  max_batch_size_ = 0;
-  const auto bs_itr = config.FindMember("max_batch_size");
-  if (bs_itr != config.MemberEnd()) {
-    int64_t mbs;
-    RETURN_IF_ERROR(GetInt(bs_itr->value, &mbs));
-    max_batch_size_ = mbs;
-  }
-
-  const auto txn_itr = config.FindMember("model_transaction_policy");
-  if (txn_itr != config.MemberEnd()) {
-    is_decoupled_ = txn_itr->value["decoupled"].GetBool();
-  }
-
-  // Get the information about inputs from metadata
-  const auto inputs_itr = metadata.FindMember("inputs");
-  if (inputs_itr != metadata.MemberEnd()) {
-    for (const auto& input : inputs_itr->value.GetArray()) {
-      auto it =
-          inputs_->emplace(input["name"].GetString(), ModelTensor()).first;
-      it->second.name_ = input["name"].GetString();
-      it->second.datatype_ = input["datatype"].GetString();
-      bool is_dynamic = false;
-      bool skip = (max_batch_size_ > 0);
-      for (const auto& dim : input["shape"].GetArray()) {
-        if (skip) {
-          skip = false;
-          continue;
-        }
-        int64_t dim_int;
-        RETURN_IF_ERROR(GetInt(dim, &dim_int));
-        if (dim_int == -1) {
-          is_dynamic = true;
-        }
-        it->second.shape_.push_back(dim_int);
-      }
-
-      if (is_dynamic) {
-        const auto user_shape_it = input_shapes.find(it->second.name_);
-        if (user_shape_it != input_shapes.end()) {
-          // Update the default shape to be used.
-          it->second.shape_.clear();
-          for (const auto dim : user_shape_it->second) {
-            it->second.shape_.push_back(dim);
-          }
-        }
-      }
-    }
-  }
-
-  // Check whether the tensor is shape tensor or not from config.
-  const auto inputs_config_itr = config.FindMember("input");
-  if (inputs_config_itr != config.MemberEnd()) {
-    for (const auto& input_config : inputs_config_itr->value.GetArray()) {
-      const auto name = std::string(
-          input_config["name"].GetString(),
-          input_config["name"].GetStringLength());
-      auto it = inputs_->find(name);
-      if (it == inputs_->end()) {
-        return cb::Error(
-            "no metadata found for input tensor " + name, pa::GENERIC_ERROR);
-      }
-      const auto& shape_tensor_itr = input_config.FindMember("is_shape_tensor");
-      if (shape_tensor_itr != input_config.MemberEnd()) {
-        it->second.is_shape_tensor_ = shape_tensor_itr->value.GetBool();
-      }
-
-      if (input_config.HasMember("optional")) {
-        it->second.is_optional_ = input_config["optional"].GetBool();
-      } else {
-        it->second.is_optional_ = false;
-      }
-    }
-  }
-
-  // Get the information about outputs from metadata
-  const auto outputs_itr = metadata.FindMember("outputs");
-  if (outputs_itr != metadata.MemberEnd()) {
-    for (const auto& output : outputs_itr->value.GetArray()) {
-      auto it =
-          outputs_->emplace(output["name"].GetString(), ModelTensor()).first;
-      it->second.name_ = output["name"].GetString();
-      it->second.datatype_ = output["datatype"].GetString();
-      bool skip = (max_batch_size_ > 0);
-      for (const auto& dim : output["shape"].GetArray()) {
-        if (skip) {
-          skip = false;
-          continue;
-        }
-        int64_t dim_int;
-        RETURN_IF_ERROR(GetInt(dim, &dim_int));
-        it->second.shape_.push_back(dim_int);
-      }
-    }
-  }
-
-  // Check whether the tensor is shape tensor or not from config.
-  const auto output_config_itr = config.FindMember("output");
-  if (output_config_itr != config.MemberEnd()) {
-    for (const auto& output_config : output_config_itr->value.GetArray()) {
-      const auto name = std::string(
-          output_config["name"].GetString(),
-          output_config["name"].GetStringLength());
-      auto itr = outputs_->find(name);
-      if (itr == outputs_->end()) {
-        return cb::Error(
-            "no metadata found for output tensor " + name, pa::GENERIC_ERROR);
-      }
-      const auto& shape_tensor_itr =
-          output_config.FindMember("is_shape_tensor");
-      if (shape_tensor_itr != output_config.MemberEnd()) {
-        itr->second.is_shape_tensor_ = shape_tensor_itr->value.GetBool();
-      }
-    }
-  }
-
-  // Check if model has response caching enabled
-  const auto cache_itr = config.FindMember("response_cache");
-  // response_cache_enabled_ set globally for reporting purposes if any
-  // composing model has it enabled, so don't overwrite it if already set
-  if (cache_itr != config.MemberEnd() && !response_cache_enabled_) {
-    response_cache_enabled_ = cache_itr->value["enable"].GetBool();
-  }
-
-  if (cache_itr != config.MemberEnd()) {
-    top_level_response_caching_enabled_ = cache_itr->value["enable"].GetBool();
-  }
-
-  return cb::Error::Success;
-}
-
-cb::Error
-ModelParser::InitTFServe(
-    const rapidjson::Document& metadata, const std::string& model_name,
-    const std::string& model_version, const std::string& model_signature_name,
-    const int32_t batch_size,
-    const std::unordered_map<std::string, std::vector<int64_t>>& input_shapes,
-    std::unique_ptr<cb::ClientBackend>& backend)
-{
-  model_name_ = model_name;
-  model_version_ = model_version;
-  model_signature_name_ = model_signature_name;
-  // Get the scheduler type for the model
-  scheduler_type_ = NONE;
-
-  // Will use the user provided batch size as max. Relies on the service
-  // to throw an error if not supported.
-  max_batch_size_ = batch_size;
-
-  const rapidjson::Value& signature_config =
-      metadata["metadata"]["signature_def"]["signature_def"];
-  if (!signature_config.HasMember(model_signature_name.c_str())) {
-    return cb::Error(
-        "Failed to find signature_name \"" + model_signature_name +
-            "\" in the metadata",
-        pa::GENERIC_ERROR);
-  }
-
-  // Get the information about inputs from metadata
-  if (signature_config[model_signature_name.c_str()].HasMember("inputs")) {
-    const rapidjson::Value& inputs =
-        signature_config[model_signature_name.c_str()]["inputs"];
-    for (rapidjson::Value::ConstMemberIterator json_itr = inputs.MemberBegin();
-         json_itr != inputs.MemberEnd(); ++json_itr) {
-      auto it =
-          inputs_->emplace(json_itr->name.GetString(), ModelTensor()).first;
-      it->second.name_ = json_itr->name.GetString();
-      RETURN_IF_ERROR(ConvertDTypeFromTFS(
-          json_itr->value["dtype"].GetString(), &it->second.datatype_));
-
-      bool is_dynamic = false;
-      if (json_itr->value["tensor_shape"]["unknown_rank"].GetBool()) {
-        if (max_batch_size_ != 0) {
-          return cb::Error(
-              "Can not specify -b flag for saved model with unknown ranked "
-              "inputs",
-              pa::GENERIC_ERROR);
-        }
-        is_dynamic = true;
-      } else {
-        bool first_dim = true;
-        for (const auto& dim :
-             json_itr->value["tensor_shape"]["dim"].GetArray()) {
-          int64_t dim_int;
-          RETURN_IF_ERROR(GetInt(dim["size"], &dim_int));
-          if (first_dim && (max_batch_size_ != 0)) {
-            if (dim_int != -1) {
-              return cb::Error(
-                  "Can not specify -b flag for saved model with input not "
-                  "having their first dim as -1",
-                  pa::GENERIC_ERROR);
-            }
-            first_dim = false;
-          } else {
-            if (dim_int == -1) {
-              is_dynamic = true;
-            }
-            it->second.shape_.push_back(dim_int);
-          }
-        }
-      }
-
-      if (is_dynamic) {
-        const auto user_shape_it = input_shapes.find(it->second.name_);
-        if (user_shape_it != input_shapes.end()) {
-          // Update the default shape to be used.
-          it->second.shape_.clear();
-          for (const auto dim : user_shape_it->second) {
-            it->second.shape_.push_back(dim);
-          }
-        }
-      }
-    }
-  }
-
-  // Will not extract the information about the information about the outputs.
-  // As by default, the TensorFlow serving will return all the output tensors
-  // if none are requested.
-  // See here
-  // https://github.com/tensorflow/serving/blob/2.3.0/tensorflow_serving/apis/predict.proto#L27
-
-  return cb::Error::Success;
-}
-
-cb::Error
-ModelParser::InitOpenAI(
-    const std::string& model_name, const std::string& model_version,
-    const int32_t batch_size)
-{
-  // OpenAI does not return model metadata hence we can not obtain any
-  // parameters.
-  model_name_ = model_name;
-  model_version_ = model_version;
-  max_batch_size_ = batch_size;
-
-  // OpenAI will take a single json input with a fully formed payload
-  auto in_it = inputs_->emplace("payload", ModelTensor()).first;
-  in_it->second.name_ = "payload";
-  in_it->second.datatype_ = "JSON";
-  in_it->second.shape_.push_back(1);
-
-  // OpenAI will reply with a single json output
-  auto out_it = outputs_->emplace("response", ModelTensor()).first;
-  out_it->second.name_ = "response";
-  out_it->second.datatype_ = "JSON";
-  out_it->second.shape_.push_back(1);
-
-  return cb::Error::Success;
-}
-
-cb::Error
-ModelParser::InitTorchServe(
-    const std::string& model_name, const std::string& model_version,
-    const int32_t batch_size)
-{
-  // TorchServe does not return model metadata hence we can not obtain any
-  // parameters.
-  model_name_ = model_name;
-  model_version_ = model_version;
-  max_batch_size_ = batch_size;
-
-  // TorchServe needs to upload a file to the server. The input will hold the
-  // path to the file which should be provided as json to --input-data
-  auto it = inputs_->emplace("TORCHSERVE_INPUT", ModelTensor()).first;
-  it->second.name_ = "TORCHSERVE_INPUT";
-  it->second.datatype_ = "BYTES";
-  // Supports only a single input file
-  it->second.shape_.push_back(1);
-
-  return cb::Error::Success;
-}
-
-cb::Error
-ModelParser::DetermineComposingModelMap(
-    const std::vector<cb::ModelIdentifier>& bls_composing_models,
-    const rapidjson::Document& config,
-    std::unique_ptr<cb::ClientBackend>& backend)
-{
-  RETURN_IF_ERROR(AddBLSComposingModels(bls_composing_models, config, backend));
-  RETURN_IF_ERROR(AddEnsembleComposingModels(config, backend));
-
-  return cb::Error::Success;
-}
-
-cb::Error
-ModelParser::AddBLSComposingModels(
-    const std::vector<cb::ModelIdentifier>& bls_composing_models,
-    const rapidjson::Document& config,
-    std::unique_ptr<cb::ClientBackend>& backend)
-{
-  for (auto model : bls_composing_models) {
-    (*composing_models_map_)[config["name"].GetString()].insert(model);
-
-    rapidjson::Document composing_model_config;
-    RETURN_IF_ERROR(backend->ModelConfig(
-        &composing_model_config, model.first, model.second));
-    RETURN_IF_ERROR(
-        AddEnsembleComposingModels(composing_model_config, backend));
-  }
-
-  return cb::Error::Success;
-}
-
-cb::Error
-ModelParser::AddEnsembleComposingModels(
-    const rapidjson::Document& config,
-    std::unique_ptr<cb::ClientBackend>& backend)
-{
-  if (config.HasMember("platform") &&
-      std::string(config["platform"].GetString()).compare("ensemble") == 0) {
-    const auto step_itr = config["ensemble_scheduling"].FindMember("step");
-    for (const auto& step : step_itr->value.GetArray()) {
-      std::string step_model_version;
-      int64_t model_version_int;
-      RETURN_IF_ERROR(GetInt(step["model_version"], &model_version_int));
-      if (model_version_int == -1) {
-        step_model_version = "";
-      } else {
-        step_model_version = std::to_string(model_version_int);
-      }
-
-      (*composing_models_map_)[config["name"].GetString()].emplace(
-          std::string(step["model_name"].GetString()), step_model_version);
-
-      rapidjson::Document composing_model_config;
-      RETURN_IF_ERROR(backend->ModelConfig(
-          &composing_model_config, step["model_name"].GetString(),
-          step_model_version));
-      RETURN_IF_ERROR(
-          AddEnsembleComposingModels(composing_model_config, backend));
-    }
-  }
-
-  return cb::Error::Success;
-}
-
-
-cb::Error
-ModelParser::DetermineSchedulerType(
-    const rapidjson::Document& config,
-    std::unique_ptr<cb::ClientBackend>& backend)
-{
-  scheduler_type_ = NONE;
-
-  if (composing_models_map_->size() != 0) {
-    bool is_sequential = false;
-    RETURN_IF_ERROR(GetComposingSchedulerType(backend, &is_sequential));
-    if (is_sequential) {
-      scheduler_type_ = ENSEMBLE_SEQUENCE;
-    } else {
-      scheduler_type_ = ENSEMBLE;
-    }
-  } else {
-    const auto& sequence_itr = config.FindMember("sequence_batching");
-    if (sequence_itr != config.MemberEnd()) {
-      scheduler_type_ = SEQUENCE;
-    } else {
-      const auto& dynamic_itr = config.FindMember("dynamic_batching");
-      if (dynamic_itr != config.MemberEnd()) {
-        scheduler_type_ = DYNAMIC;
-      }
-    }
-  }
-  return cb::Error::Success;
-}
-
-cb::Error
-ModelParser::GetComposingSchedulerType(
-    std::unique_ptr<cb::ClientBackend>& backend, bool* is_sequential)
-{
-  for (auto parent_composing_models : *composing_models_map_.get()) {
-    auto& composing_models = parent_composing_models.second;
-    for (auto composing_model : composing_models) {
-      rapidjson::Document config;
-      RETURN_IF_ERROR(backend->ModelConfig(
-          &config, composing_model.first, composing_model.second));
-
-      const auto& sequence_itr = config.FindMember("sequence_batching");
-      if (sequence_itr != config.MemberEnd()) {
-        *is_sequential = true;
-      }
-
-      const auto cache_itr = config.FindMember("response_cache");
-      // response_cache_enabled_ set globally for reporting purposes if any
-      // composing model has it enabled, so don't overwrite it if already set
-      if (cache_itr != config.MemberEnd() && !response_cache_enabled_) {
-        response_cache_enabled_ = cache_itr->value["enable"].GetBool();
-      }
-    }
-  }
-  return cb::Error::Success;
-}
-
-cb::Error
-ModelParser::GetInt(const rapidjson::Value& value, int64_t* integer_value)
-{
-  if (value.IsString()) {
-    std::string str(value.GetString(), value.GetStringLength());
-
-    try {
-      *integer_value = std::stoll(str.c_str());
-    }
-    catch (...) {
-      return cb::Error(
-          std::string("unable to convert '") + str + "' to integer",
-          pa::GENERIC_ERROR);
-    }
-
-  } else if (value.IsInt64()) {
-    *integer_value = value.GetInt64();
-  } else if (value.IsInt()) {
-    *integer_value = value.GetInt();
-  } else {
-    return cb::Error("failed to parse the integer value", pa::GENERIC_ERROR);
-  }
-
-  return cb::Error::Success;
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/model_parser.h b/src/c++/perf_analyzer/model_parser.h
deleted file mode 100644
index ac76b3e22..000000000
--- a/src/c++/perf_analyzer/model_parser.h
+++ /dev/null
@@ -1,254 +0,0 @@
-// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <unordered_map>
-
-#include "client_backend/client_backend.h"
-#include "perf_utils.h"
-
-namespace triton { namespace perfanalyzer {
-
-#ifndef DOCTEST_CONFIG_DISABLE
-class TestModelParser;
-class MockModelParser;
-class InferenceProfiler;
-#endif
-
-struct ModelTensor {
-  ModelTensor() : is_shape_tensor_(false) {}
-  std::string name_;
-  std::string datatype_;
-  std::vector<int64_t> shape_;
-  // Indicates if this tensor holds shape information for other tensors
-  bool is_shape_tensor_;
-  bool is_optional_;
-};
-
-using ModelTensorMap = std::map<std::string, ModelTensor>;
-using ComposingModelMap = std::map<std::string, std::set<cb::ModelIdentifier>>;
-
-//==============================================================================
-/// ModelParser is a helper class to parse the information about the target
-/// model from the metadata and configuration returned by the server.
-///
-/// Perf Analyzer depends upon the various properties of the model to correctly
-/// generate and issue inference request for the model. The object of this
-/// class will provide these necessary details.
-class ModelParser {
- public:
-  enum ModelSchedulerType {
-    NONE,
-    DYNAMIC,
-    SEQUENCE,
-    ENSEMBLE,
-    ENSEMBLE_SEQUENCE
-  };
-
-  explicit ModelParser(cb::BackendKind backend_kind)
-      : backend_kind_(backend_kind),
-        inputs_(std::make_shared<ModelTensorMap>()),
-        outputs_(std::make_shared<ModelTensorMap>()),
-        composing_models_map_(std::make_shared<ComposingModelMap>()),
-        scheduler_type_(NONE), max_batch_size_(0), is_decoupled_(false),
-        response_cache_enabled_(false),
-        top_level_response_caching_enabled_(false)
-  {
-  }
-
-  /// Initializes the ModelParser with the metadata and config rapidjson DOM
-  /// for the target model obtained from Triton service
-  /// \param metadata The metadata of the target model.
-  /// \param config The config of the target model.
-  /// \param model_version The version of target model.
-  /// \param bls_composing_models A list of BLS composing model identifiers
-  /// \param input_shapes The user provided default shapes which will be use
-  /// if a certain input has wildcard in its dimension.
-  /// \param backend The backend object.
-  /// \return cb::Error object indicating success or failure.
-  cb::Error InitTriton(
-      const rapidjson::Document& metadata, const rapidjson::Document& config,
-      const std::string& model_version,
-      const std::vector<cb::ModelIdentifier>& bls_composing_models,
-      const std::unordered_map<std::string, std::vector<int64_t>>& input_shapes,
-      std::unique_ptr<cb::ClientBackend>& backend);
-
-  /// Initializes the ModelParser with the metadata and config rapidjson DOM
-  /// for the target model obtained from TF serving service.
-  /// \param metadata The metadata of the target model.
-  /// \param model_name The name of target model.
-  /// \param model_version The version of target model.
-  /// \param model_signature_name The signature name of target model.
-  /// \param input_shapes The user provided default shapes which will be use
-  /// if a certain input has wildcard in its dimension.
-  /// \param backend The backend object.
-  /// \return cb::Error object indicating success or failure.
-  cb::Error InitTFServe(
-      const rapidjson::Document& metadata, const std::string& model_name,
-      const std::string& model_version, const std::string& model_signature_name,
-      const int32_t batch_size,
-      const std::unordered_map<std::string, std::vector<int64_t>>& input_shapes,
-      std::unique_ptr<cb::ClientBackend>& backend);
-
-  cb::Error InitOpenAI(
-      const std::string& model_name, const std::string& model_version,
-      const int32_t batch_size);
-
-  cb::Error InitTorchServe(
-      const std::string& model_name, const std::string& model_version,
-      const int32_t batch_size);
-
-  /// Get the name of the target model
-  /// \return Model name as string
-  const std::string& ModelName() const { return model_name_; }
-
-  /// Get the version of target model
-  /// \return Model version as string
-  const std::string& ModelVersion() const { return model_version_; }
-
-  /// Get the signature name of target model
-  /// \return Model signature name as string
-  const std::string& ModelSignatureName() const
-  {
-    return model_signature_name_;
-  }
-
-  /// Get the scheduler type for the model
-  ModelSchedulerType SchedulerType() const { return scheduler_type_; }
-
-  /// Get the max batch size supported by the model. Returns 0 if the model
-  /// does not support batching.
-  /// \return The maximum supported batch size.
-  size_t MaxBatchSize() const { return max_batch_size_; }
-
-  /// Returns whether or not the model is decoupled
-  /// \return the truth value of whether the model is decoupled
-  bool IsDecoupled() const { return is_decoupled_; }
-
-  /// Returns whether or not response cache is enabled for this model
-  /// \return the truth value of whether response cache is enabled for this
-  /// model
-  bool ResponseCacheEnabled() const { return response_cache_enabled_; }
-
-  /// Returns whether or not top level request caching is enabled for this model
-  /// \return the truth value of whether top level request caching is enabled
-  /// for this model
-  bool TopLevelResponseCachingEnabled() const
-  {
-    return top_level_response_caching_enabled_;
-  }
-
-/// Only for testing
-#ifndef DOCTEST_CONFIG_DISABLE
-  void SetTopLevelResponseCaching(bool enable_top_level_response_caching)
-  {
-    top_level_response_caching_enabled_ = enable_top_level_response_caching;
-  }
-#endif
-
-  /// Get the details about the model inputs.
-  /// \return The map with tensor_name and the tensor details
-  /// stored as key-value pair.
-  const std::shared_ptr<ModelTensorMap>& Inputs() { return inputs_; }
-
-  /// Get the details about the model outputs.
-  /// \return The map with tensor_name and the tensor details
-  /// stored as key-value pair.
-  const std::shared_ptr<ModelTensorMap>& Outputs() { return outputs_; }
-
-  /// Get the composing maps for the target model.
-  /// \return The pointer to the nested map describing the
-  /// nested flow in the target model.
-  const std::shared_ptr<ComposingModelMap>& GetComposingModelMap()
-  {
-    return composing_models_map_;
-  }
-
-
- protected:
-  ModelSchedulerType scheduler_type_;
-  bool is_decoupled_;
-
- private:
-  /// Populate composing_models_map_ based on any bls composing models passed in
-  /// via the CLI as well as any ensemble or nested ensemble models
-  cb::Error DetermineComposingModelMap(
-      const std::vector<cb::ModelIdentifier>& bls_composing_models,
-      const rapidjson::Document& config,
-      std::unique_ptr<cb::ClientBackend>& backend);
-
-  cb::Error AddBLSComposingModels(
-      const std::vector<cb::ModelIdentifier>& bls_composing_models,
-      const rapidjson::Document& config,
-      std::unique_ptr<cb::ClientBackend>& backend);
-
-  cb::Error AddEnsembleComposingModels(
-      const rapidjson::Document& config,
-      std::unique_ptr<cb::ClientBackend>& backend);
-
-  /// Populate scheduler_type_ based on the scheduler type of the parent model
-  /// as well as any composing models
-  cb::Error DetermineSchedulerType(
-      const rapidjson::Document& config,
-      std::unique_ptr<cb::ClientBackend>& backend);
-
-  /// Sets is_sequential to true if any of the composing models are sequential
-  cb::Error GetComposingSchedulerType(
-      std::unique_ptr<cb::ClientBackend>& backend, bool* is_sequential);
-
-  /// In the json produced by protobuf, int64 and uint64 values are
-  /// represented as strings. Protobuf doesn't provide an option to
-  /// disable this (sigh) so we need to correctly parse these fields
-  /// for ModelParser to receive appropriate requests.
-  /// \param value The rapidjson value object with the int value.
-  /// \param integer_value The output integer pointer.
-  /// \return cb::Error object indicating success or failure.
-  cb::Error GetInt(const rapidjson::Value& value, int64_t* integer_value);
-
-  cb::BackendKind backend_kind_;
-
-  std::shared_ptr<ModelTensorMap> inputs_;
-  std::shared_ptr<ModelTensorMap> outputs_;
-  std::shared_ptr<ComposingModelMap> composing_models_map_;
-
-  std::string model_name_;
-  std::string model_version_;
-  std::string model_signature_name_;
-  size_t max_batch_size_;
-  bool response_cache_enabled_;
-  bool top_level_response_caching_enabled_;
-
-#ifndef DOCTEST_CONFIG_DISABLE
-  friend TestModelParser;
-  friend MockModelParser;
-  friend InferenceProfiler;
-
- public:
-  ModelParser() = default;
-#endif
-};
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/mpi_utils.cc b/src/c++/perf_analyzer/mpi_utils.cc
deleted file mode 100644
index 2923f6552..000000000
--- a/src/c++/perf_analyzer/mpi_utils.cc
+++ /dev/null
@@ -1,251 +0,0 @@
-// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "mpi_utils.h"
-
-#include <dlfcn.h>
-
-#include <iostream>
-#include <stdexcept>
-
-namespace triton { namespace perfanalyzer {
-
-MPIDriver::MPIDriver(bool is_enabled) : is_enabled_(is_enabled)
-{
-  if (is_enabled_ == false) {
-    return;
-  }
-
-  handle_ = dlopen("libmpi.so", RTLD_LAZY | RTLD_GLOBAL);
-
-  if (handle_ == nullptr) {
-    throw std::runtime_error(
-        "Unable to load MPI library. If you are trying to run with "
-        "MPI / multiple models, check that 'libmpi.so' is on "
-        "`LD_LIBRARY_PATH` environment variable path.");
-  }
-
-  CheckMPIImpl();
-}
-
-bool
-MPIDriver::IsMPIRun()
-{
-  if (is_enabled_ == false) {
-    return false;
-  }
-
-  if (MPIInitialized() == false) {
-    throw std::runtime_error("Must call MPI_Init() before calling IsMPIRun().");
-  }
-
-  return MPICommSizeWorld() > 1;
-}
-
-void
-MPIDriver::MPIInit(int* argc, char*** argv)
-{
-  if (is_enabled_ == false) {
-    return;
-  }
-
-  int (*MPI_Init)(
-      int*, char***){(int (*)(int*, char***))dlsym(handle_, "MPI_Init")};
-  if (MPI_Init == nullptr) {
-    throw std::runtime_error("Unable to obtain address of `MPI_Init` symbol.");
-  }
-
-  MPI_Init(argc, argv);
-}
-
-int
-MPIDriver::MPICommSizeWorld()
-{
-  if (is_enabled_ == false) {
-    return -1;
-  }
-
-  int world_size{1};
-
-  int (*MPI_Comm_size)(
-      void*, int*){(int (*)(void*, int*))dlsym(handle_, "MPI_Comm_size")};
-  if (MPI_Comm_size == nullptr) {
-    throw std::runtime_error(
-        "Unable to obtain address of `MPI_Comm_size` symbol.");
-  }
-
-  MPI_Comm_size(MPICommWorld(), &world_size);
-
-  return world_size;
-}
-
-void
-MPIDriver::MPIBarrierWorld()
-{
-  if (is_enabled_ == false) {
-    return;
-  }
-
-  int (*MPI_Barrier)(void*){(int (*)(void*))dlsym(handle_, "MPI_Barrier")};
-  if (MPI_Barrier == nullptr) {
-    throw std::runtime_error(
-        "Unable to obtain address of `MPI_Barrier` symbol.");
-  }
-
-  MPI_Barrier(MPICommWorld());
-}
-
-int
-MPIDriver::MPICommRankWorld()
-{
-  if (is_enabled_ == false) {
-    return -1;
-  }
-
-  int rank{0};
-
-  int (*MPI_Comm_rank)(
-      void*, int*){(int (*)(void*, int*))dlsym(handle_, "MPI_Comm_rank")};
-  if (MPI_Comm_rank == nullptr) {
-    throw std::runtime_error(
-        "Unable to obtain address of `MPI_Comm_rank` symbol.");
-  }
-
-  MPI_Comm_rank(MPICommWorld(), &rank);
-
-  return rank;
-}
-
-void
-MPIDriver::MPIBcastIntWorld(void* buffer, int count, int root)
-{
-  if (is_enabled_ == false) {
-    return;
-  }
-
-  int (*MPI_Bcast)(void*, int, void*, int, void*){
-      (int (*)(void*, int, void*, int, void*))dlsym(handle_, "MPI_Bcast")};
-  if (MPI_Bcast == nullptr) {
-    throw std::runtime_error("Unable to obtain address of `MPI_Bcast` symbol.");
-  }
-
-  MPI_Bcast(buffer, count, MPIInt(), root, MPICommWorld());
-}
-
-void
-MPIDriver::MPIFinalize()
-{
-  if (is_enabled_ == false) {
-    return;
-  }
-
-  int (*MPI_Finalize)(){(int (*)())dlsym(handle_, "MPI_Finalize")};
-  if (MPI_Finalize == nullptr) {
-    throw std::runtime_error(
-        "Unable to obtain address of `MPI_Finalize` symbol.");
-  }
-
-  MPI_Finalize();
-}
-
-bool
-MPIDriver::MPIInitialized()
-{
-  if (is_enabled_ == false) {
-    return false;
-  }
-
-  int (*MPI_Initialized)(int*){
-      (int (*)(int*))dlsym(handle_, "MPI_Initialized")};
-  if (MPI_Initialized == nullptr) {
-    throw std::runtime_error(
-        "Unable to obtain address of `MPI_Initialized` symbol.");
-  }
-
-  int initialized{0};
-  MPI_Initialized(&initialized);
-  return initialized != 0;
-}
-
-void*
-MPIDriver::MPICommWorld()
-{
-  if (is_enabled_ == false) {
-    return nullptr;
-  }
-
-  void* MPI_COMM_WORLD{dlsym(handle_, "ompi_mpi_comm_world")};
-  if (MPI_COMM_WORLD == nullptr) {
-    throw std::runtime_error(
-        "Unable to obtain address of `ompi_mpi_comm_world` symbol.");
-  }
-
-  return MPI_COMM_WORLD;
-}
-
-void*
-MPIDriver::MPIInt()
-{
-  if (is_enabled_ == false) {
-    return nullptr;
-  }
-
-  void* MPI_INT{dlsym(handle_, "ompi_mpi_int")};
-  if (MPI_INT == nullptr) {
-    throw std::runtime_error(
-        "Unable to obtain address of `ompi_mpi_int` symbol.");
-  }
-
-  return MPI_INT;
-}
-
-void
-MPIDriver::CheckMPIImpl()
-{
-  if (is_enabled_ == false) {
-    return;
-  }
-
-  int (*MPI_Get_library_version)(char*, int*){
-      (int (*)(char*, int*))dlsym(handle_, "MPI_Get_library_version")};
-  if (MPI_Get_library_version == nullptr) {
-    throw std::runtime_error(
-        "Unable to obtain address of `MPI_Get_library_version` symbol.");
-  }
-
-  std::string version;
-  version.resize(MPIVersionStringMaximumLength);
-  int resultlen{0};
-  MPI_Get_library_version(&version[0], &resultlen);
-
-  if (version.find("Open MPI") != 0) {
-    throw std::runtime_error(
-        "Perf Analyzer only supports Open MPI. Please uninstall your current "
-        "implementation of MPI and install Open MPI.");
-  }
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/mpi_utils.h b/src/c++/perf_analyzer/mpi_utils.h
deleted file mode 100644
index 862c8a3c3..000000000
--- a/src/c++/perf_analyzer/mpi_utils.h
+++ /dev/null
@@ -1,85 +0,0 @@
-// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <memory>
-
-namespace triton { namespace perfanalyzer {
-
-class MPIDriver {
- public:
-  // Initializes class. Saves handle to MPI library if MPI library is available.
-  MPIDriver(bool is_enabled = false);
-
-  // Returns true if the current process is an MPI process with world size
-  // greater than 1.
-  bool IsMPIRun();
-
-  // Attempts to call MPI_Init API.
-  void MPIInit(int* argc, char*** argv);
-
-  // Attempts to call MPI_Comm_size API with MPI_COMM_WORLD communicator.
-  int MPICommSizeWorld();
-
-  // Attempts to call MPI_Barrier API with MPI_COMM_WORLD communicator.
-  void MPIBarrierWorld();
-
-  // Attempts to call MPI_Comm_rank API with MPI_COMM_WORLD communicator.
-  int MPICommRankWorld();
-
-  // Attempts to call MPI_Bcast API with MPI_INT data type and MPI_COMM_WORLD
-  // communicator.
-  void MPIBcastIntWorld(void* buffer, int count, int root);
-
-  // Attempts to call MPI_Finalize API.
-  void MPIFinalize();
-
- private:
-  // Attempts to call MPI_Initialized API.
-  bool MPIInitialized();
-
-  // Returns MPI_COMM_WORLD symbol address if MPI library is available,
-  // otherwise `nullptr`.
-  void* MPICommWorld();
-
-  // Returns MPI_INT symbol address if MPI library is available, otherwise
-  // `nullptr`.
-  void* MPIInt();
-
-  // Attempts to check that Open MPI is installed.
-  void CheckMPIImpl();
-
-  // Bool for whether user has opted to attempt to use MPI functionality.
-  bool is_enabled_{false};
-
-  // Loaded object for MPI library.
-  void* handle_{nullptr};
-
-  // Maximum string length for MPI version string.
-  const uint64_t MPIVersionStringMaximumLength{32768};
-};
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/perf_analyzer.cc b/src/c++/perf_analyzer/perf_analyzer.cc
deleted file mode 100644
index c10101e1c..000000000
--- a/src/c++/perf_analyzer/perf_analyzer.cc
+++ /dev/null
@@ -1,473 +0,0 @@
-// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "perf_analyzer.h"
-
-#include "perf_analyzer_exception.h"
-#include "periodic_concurrency_manager.h"
-#include "report_writer.h"
-#include "request_rate_manager.h"
-
-namespace pa = triton::perfanalyzer;
-
-namespace triton { namespace perfanalyzer {
-
-volatile bool early_exit = false;
-
-void
-SignalHandler(int signum)
-{
-  std::cout << "Interrupt signal (" << signum << ") received." << std::endl;
-  // Upon invoking the SignalHandler for the first time early_exit flag is
-  // invoked and analyzer waits for in-flight inferences to complete before
-  // exiting. On the second invocation, the program exits immediately.
-  if (!early_exit) {
-    std::cout << "Waiting for in-flight inferences to complete." << std::endl;
-    early_exit = true;
-  } else {
-    std::cout << "Exiting immediately..." << std::endl;
-    exit(0);
-  }
-}
-}}  // namespace triton::perfanalyzer
-
-PerfAnalyzer::PerfAnalyzer(pa::PAParamsPtr params) : params_(params)
-{
-  CreateAnalyzerObjects();
-}
-
-void
-PerfAnalyzer::Run()
-{
-  PrerunReport();
-  Profile();
-  WriteReport();
-  GenerateProfileExport();
-  Finalize();
-}
-
-void
-PerfAnalyzer::CreateAnalyzerObjects()
-{
-  // trap SIGINT to allow threads to exit gracefully
-  signal(SIGINT, pa::SignalHandler);
-  std::shared_ptr<cb::ClientBackendFactory> factory;
-  FAIL_IF_ERR(
-      cb::ClientBackendFactory::Create(
-          params_->kind, params_->url, params_->endpoint, params_->protocol,
-          params_->ssl_options, params_->trace_options,
-          params_->compression_algorithm, params_->http_headers,
-          params_->triton_server_path, params_->model_repository_path,
-          params_->extra_verbose, params_->metrics_url,
-          params_->input_tensor_format, params_->output_tensor_format,
-          &factory),
-      "failed to create client factory");
-
-  FAIL_IF_ERR(
-      factory->CreateClientBackend(&backend_),
-      "failed to create triton client backend");
-
-  parser_ = std::make_shared<pa::ModelParser>(params_->kind);
-  if (params_->kind == cb::BackendKind::TRITON ||
-      params_->kind == cb::BackendKind::TRITON_C_API) {
-    rapidjson::Document model_metadata;
-    FAIL_IF_ERR(
-        backend_->ModelMetadata(
-            &model_metadata, params_->model_name, params_->model_version),
-        "failed to get model metadata");
-    rapidjson::Document model_config;
-    FAIL_IF_ERR(
-        backend_->ModelConfig(
-            &model_config, params_->model_name, params_->model_version),
-        "failed to get model config");
-
-    FAIL_IF_ERR(
-        parser_->InitTriton(
-            model_metadata, model_config, params_->model_version,
-            params_->bls_composing_models, params_->input_shapes, backend_),
-        "failed to create model parser");
-  } else if (params_->kind == cb::BackendKind::OPENAI) {
-    FAIL_IF_ERR(
-        parser_->InitOpenAI(
-            params_->model_name, params_->model_version, params_->batch_size),
-        "failed to create model parser");
-  } else if (params_->kind == cb::BackendKind::TENSORFLOW_SERVING) {
-    rapidjson::Document model_metadata;
-    FAIL_IF_ERR(
-        backend_->ModelMetadata(
-            &model_metadata, params_->model_name, params_->model_version),
-        "failed to get model metadata");
-    FAIL_IF_ERR(
-        parser_->InitTFServe(
-            model_metadata, params_->model_name, params_->model_version,
-            params_->model_signature_name, params_->batch_size,
-            params_->input_shapes, backend_),
-        "failed to create model parser");
-  } else if (params_->kind == cb::BackendKind::TORCHSERVE) {
-    FAIL_IF_ERR(
-        parser_->InitTorchServe(
-            params_->model_name, params_->model_version, params_->batch_size),
-        "failed to create model parser");
-  } else {
-    std::cerr << "unsupported client backend kind" << std::endl;
-    throw pa::PerfAnalyzerException(pa::GENERIC_ERROR);
-  }
-
-  if ((parser_->MaxBatchSize() == 0) && params_->batch_size > 1) {
-    std::cerr << "can not specify batch size > 1 as the model does not support "
-                 "batching"
-              << std::endl;
-    throw pa::PerfAnalyzerException(pa::GENERIC_ERROR);
-  }
-
-  // Change the default value for the --async option for sequential models
-  if ((parser_->SchedulerType() == pa::ModelParser::SEQUENCE) ||
-      (parser_->SchedulerType() == pa::ModelParser::ENSEMBLE_SEQUENCE)) {
-    if (!params_->async) {
-      params_->async = params_->forced_sync ? false : true;
-    }
-    // Validate the batch_size specification
-    if (params_->batch_size > 1) {
-      std::cerr << "can not specify batch size > 1 when using a sequence model"
-                << std::endl;
-      throw pa::PerfAnalyzerException(pa::GENERIC_ERROR);
-    }
-  }
-
-  if (params_->streaming) {
-    if (params_->forced_sync) {
-      std::cerr << "can not use streaming with synchronous API" << std::endl;
-      throw pa::PerfAnalyzerException(pa::GENERIC_ERROR);
-    }
-    params_->async = true;
-  }
-
-  std::unique_ptr<pa::LoadManager> manager;
-  if (params_->targeting_concurrency()) {
-    if ((parser_->SchedulerType() == pa::ModelParser::SEQUENCE) ||
-        (parser_->SchedulerType() == pa::ModelParser::ENSEMBLE_SEQUENCE)) {
-      if (params_->concurrency_range.end == pa::NO_LIMIT && params_->async) {
-        std::cerr << "The 'end' concurrency can not be 0 for sequence "
-                     "models when using asynchronous API."
-                  << std::endl;
-        throw pa::PerfAnalyzerException(pa::GENERIC_ERROR);
-      }
-    }
-    params_->max_concurrency = std::max(
-        params_->concurrency_range.start, params_->concurrency_range.end);
-
-    if (!params_->async) {
-      if (params_->concurrency_range.end == pa::NO_LIMIT) {
-        std::cerr
-            << "WARNING: The maximum attainable concurrency will be limited by "
-               "max_threads specification."
-            << std::endl;
-        params_->concurrency_range.end = params_->max_threads;
-      } else {
-        // As only one synchronous request can be generated from a thread at a
-        // time, to maintain the requested concurrency, that many threads need
-        // to be generated.
-        if (params_->max_threads_specified) {
-          std::cerr
-              << "WARNING: Overriding max_threads specification to ensure "
-                 "requested concurrency range."
-              << std::endl;
-        }
-        params_->max_threads = std::max(
-            params_->concurrency_range.start, params_->concurrency_range.end);
-      }
-    }
-    if ((params_->sequence_id_range != 0) &&
-        (params_->sequence_id_range < params_->max_concurrency)) {
-      std::cerr << "sequence id range specified is smaller than the "
-                << "maximum possible concurrency, sequence id collision may "
-                << "occur." << std::endl;
-      throw pa::PerfAnalyzerException(pa::GENERIC_ERROR);
-    }
-    FAIL_IF_ERR(
-        pa::ConcurrencyManager::Create(
-            params_->async, params_->streaming, params_->batch_size,
-            params_->max_threads, params_->max_concurrency,
-            params_->shared_memory_type, params_->output_shm_size, parser_,
-            factory, &manager, params_->request_parameters),
-        "failed to create concurrency manager");
-
-  } else if (params_->is_using_periodic_concurrency_mode) {
-    manager = std::make_unique<pa::PeriodicConcurrencyManager>(
-        params_->async, params_->streaming, params_->batch_size,
-        params_->max_threads, params_->max_concurrency,
-        params_->shared_memory_type, params_->output_shm_size, parser_, factory,
-        params_->periodic_concurrency_range, params_->request_period,
-        params_->request_parameters);
-  } else if (params_->using_request_rate_range) {
-    if ((params_->sequence_id_range != 0) &&
-        (params_->sequence_id_range < params_->num_of_sequences)) {
-      std::cerr
-          << "sequence id range specified is smaller than the "
-          << "maximum possible number of sequences, sequence id collision "
-          << "may occur." << std::endl;
-      throw pa::PerfAnalyzerException(pa::GENERIC_ERROR);
-    }
-    FAIL_IF_ERR(
-        pa::RequestRateManager::Create(
-            params_->async, params_->streaming, params_->measurement_window_ms,
-            params_->max_trials, params_->request_distribution,
-            params_->batch_size, params_->max_threads,
-            params_->num_of_sequences, params_->shared_memory_type,
-            params_->output_shm_size, params_->serial_sequences, parser_,
-            factory, &manager, params_->request_parameters),
-        "failed to create request rate manager");
-
-  } else {
-    if ((params_->sequence_id_range != 0) &&
-        (params_->sequence_id_range < params_->num_of_sequences)) {
-      std::cerr
-          << "sequence id range specified is smaller than the "
-          << "maximum possible number of sequences, sequence id collision "
-          << "may occur." << std::endl;
-      throw pa::PerfAnalyzerException(pa::GENERIC_ERROR);
-    }
-    FAIL_IF_ERR(
-        pa::CustomLoadManager::Create(
-            params_->async, params_->streaming, params_->measurement_window_ms,
-            params_->max_trials, params_->request_intervals_file,
-            params_->batch_size, params_->max_threads,
-            params_->num_of_sequences, params_->shared_memory_type,
-            params_->output_shm_size, params_->serial_sequences, parser_,
-            factory, &manager, params_->request_parameters),
-        "failed to create custom load manager");
-  }
-
-  manager->InitManager(
-      params_->string_length, params_->string_data, params_->zero_input,
-      params_->user_data, params_->start_sequence_id,
-      params_->sequence_id_range, params_->sequence_length,
-      params_->sequence_length_specified, params_->sequence_length_variation);
-
-  FAIL_IF_ERR(
-      pa::ProfileDataCollector::Create(&collector_),
-      "failed to create profile data collector");
-
-  FAIL_IF_ERR(
-      pa::ProfileDataExporter::Create(&exporter_),
-      "failed to create profile data exporter");
-
-  FAIL_IF_ERR(
-      pa::InferenceProfiler::Create(
-          params_->verbose, params_->stability_threshold,
-          params_->measurement_window_ms, params_->max_trials,
-          params_->percentile, params_->latency_threshold_ms, params_->protocol,
-          parser_, std::move(backend_), std::move(manager), &profiler_,
-          params_->measurement_request_count, params_->measurement_mode,
-          params_->mpi_driver, params_->metrics_interval_ms,
-          params_->should_collect_metrics, params_->overhead_pct_threshold,
-          params_->async, collector_, !params_->profile_export_file.empty()),
-      "failed to create profiler");
-}
-
-void
-PerfAnalyzer::PrerunReport()
-{
-  std::cout << "*** Measurement Settings ***" << std::endl;
-  if (params_->kind == cb::BackendKind::TRITON || params_->using_batch_size) {
-    std::cout << "  Batch size: " << params_->batch_size << std::endl;
-  }
-
-  std::cout << "  Service Kind: " << BackendKindToString(params_->kind)
-            << std::endl;
-
-  if (params_->request_count != 0) {
-    std::cout << "  Sending a total of " << params_->request_count
-              << " requests" << std::endl;
-  } else {
-    if (params_->measurement_mode == pa::MeasurementMode::COUNT_WINDOWS) {
-      std::cout << "  Using \"count_windows\" mode for stabilization"
-                << std::endl;
-    } else {
-      std::cout << "  Using \"time_windows\" mode for stabilization"
-                << std::endl;
-    }
-
-    std::string stabilization_metric = "latency and throughput";
-    if (params_->async) {
-      stabilization_metric = "throughput";
-    }
-    if (params_->percentile == -1) {
-      std::cout << "  Stabilizing using average " << stabilization_metric
-                << std::endl;
-    } else {
-      std::cout << "  Stabilizing using p" << params_->percentile
-                << stabilization_metric << std::endl;
-    }
-
-    if (params_->measurement_mode == pa::MeasurementMode::TIME_WINDOWS) {
-      std::cout << "  Measurement window: " << params_->measurement_window_ms
-                << " msec" << std::endl;
-    } else if (
-        params_->measurement_mode == pa::MeasurementMode::COUNT_WINDOWS) {
-      std::cout << "  Minimum number of samples in each window: "
-                << params_->measurement_request_count << std::endl;
-    }
-  }
-
-  if (params_->concurrency_range.end != 1) {
-    std::cout << "  Latency limit: " << params_->latency_threshold_ms << " msec"
-              << std::endl;
-    if (params_->concurrency_range.end != pa::NO_LIMIT) {
-      std::cout << "  Concurrency limit: "
-                << std::max(
-                       params_->concurrency_range.start,
-                       params_->concurrency_range.end)
-                << " concurrent requests" << std::endl;
-    }
-  }
-  if (params_->request_rate_range[pa::SEARCH_RANGE::kEND] != 1.0) {
-    std::cout << "  Latency limit: " << params_->latency_threshold_ms << " msec"
-              << std::endl;
-    if (params_->request_rate_range[pa::SEARCH_RANGE::kEND] !=
-        static_cast<double>(pa::NO_LIMIT)) {
-      std::cout << "  Request Rate limit: "
-                << std::max(
-                       params_->request_rate_range[pa::SEARCH_RANGE::kSTART],
-                       params_->request_rate_range[pa::SEARCH_RANGE::kEND])
-                << " requests per seconds" << std::endl;
-    }
-  }
-  if (params_->using_request_rate_range) {
-    if (params_->request_distribution == pa::Distribution::POISSON) {
-      std::cout << "  Using poisson distribution on request generation"
-                << std::endl;
-    } else {
-      std::cout << "  Using uniform distribution on request generation"
-                << std::endl;
-    }
-  }
-  if (params_->search_mode == pa::SearchMode::BINARY) {
-    std::cout << "  Using Binary Search algorithm" << std::endl;
-  }
-  if (params_->async) {
-    std::cout << "  Using asynchronous calls for inference" << std::endl;
-  } else {
-    std::cout << "  Using synchronous calls for inference" << std::endl;
-  }
-  if (parser_->IsDecoupled()) {
-    std::cout << "  Detected decoupled model, using the first response for "
-                 "measuring latency"
-              << std::endl;
-  }
-
-  std::cout << std::endl;
-}
-
-void
-PerfAnalyzer::Profile()
-{
-  params_->mpi_driver->MPIBarrierWorld();
-
-  cb::Error err;
-  if (params_->targeting_concurrency()) {
-    err = profiler_->Profile<size_t>(
-        params_->concurrency_range.start, params_->concurrency_range.end,
-        params_->concurrency_range.step, params_->search_mode,
-        params_->request_count, perf_statuses_);
-  } else if (params_->is_using_periodic_concurrency_mode) {
-    err = profiler_->ProfilePeriodicConcurrencyMode();
-  } else {
-    err = profiler_->Profile<double>(
-        params_->request_rate_range[pa::SEARCH_RANGE::kSTART],
-        params_->request_rate_range[pa::SEARCH_RANGE::kEND],
-        params_->request_rate_range[pa::SEARCH_RANGE::kSTEP],
-        params_->search_mode, params_->request_count, perf_statuses_);
-  }
-
-  params_->mpi_driver->MPIBarrierWorld();
-
-  if (!err.IsOk()) {
-    std::cerr << err;
-    // In the case of early_exit, the thread does not return and continues to
-    // report the summary
-    if (!pa::early_exit) {
-      throw pa::PerfAnalyzerException(err.Err());
-    }
-  }
-}
-
-void
-PerfAnalyzer::WriteReport()
-{
-  if (!perf_statuses_.size() || params_->is_using_periodic_concurrency_mode) {
-    return;
-  }
-
-  // Can print more depending on verbose, but it seems too much information
-  std::cout << "Inferences/Second vs. Client ";
-  if (params_->percentile == -1) {
-    std::cout << "Average Batch Latency" << std::endl;
-  } else {
-    std::cout << "p" << params_->percentile << " Batch Latency" << std::endl;
-  }
-
-  for (pa::PerfStatus& status : perf_statuses_) {
-    if (params_->targeting_concurrency()) {
-      std::cout << "Concurrency: " << status.concurrency << ", ";
-    } else {
-      std::cout << "Request Rate: " << status.request_rate << ", ";
-    }
-    std::cout << "throughput: " << status.client_stats.infer_per_sec
-              << " infer/sec, latency "
-              << (status.stabilizing_latency_ns / 1000) << " usec" << std::endl;
-  }
-
-  bool should_output_metrics{
-      params_->should_collect_metrics && params_->verbose_csv};
-
-  std::unique_ptr<pa::ReportWriter> writer;
-
-  FAIL_IF_ERR(
-      pa::ReportWriter::Create(
-          params_->filename, params_->targeting_concurrency(), perf_statuses_,
-          params_->verbose_csv, profiler_->IncludeServerStats(),
-          params_->percentile, parser_, &writer, should_output_metrics),
-      "failed to create report writer");
-
-  writer->GenerateReport();
-}
-
-void
-PerfAnalyzer::GenerateProfileExport()
-{
-  if (!params_->profile_export_file.empty()) {
-    exporter_->Export(
-        collector_->GetData(), collector_->GetVersion(),
-        params_->profile_export_file, params_->kind, params_->endpoint);
-  }
-}
-
-void
-PerfAnalyzer::Finalize()
-{
-  params_->mpi_driver->MPIFinalize();
-}
diff --git a/src/c++/perf_analyzer/perf_analyzer.h b/src/c++/perf_analyzer/perf_analyzer.h
deleted file mode 100644
index b75fe35f0..000000000
--- a/src/c++/perf_analyzer/perf_analyzer.h
+++ /dev/null
@@ -1,202 +0,0 @@
-// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <getopt.h>
-#include <signal.h>
-
-#include <algorithm>
-
-#include "command_line_parser.h"
-#include "concurrency_manager.h"
-#include "custom_load_manager.h"
-#include "inference_profiler.h"
-#include "model_parser.h"
-#include "mpi_utils.h"
-#include "perf_utils.h"
-#include "profile_data_collector.h"
-#include "profile_data_exporter.h"
-
-// Perf Analyzer provides various metrics to measure the performance of
-// the inference server. It can either be used to measure the throughput,
-// latency and time distribution under specific setting (i.e. fixed batch size
-// and fixed concurrent requests), or be used to generate throughput-latency
-// data point under dynamic setting (i.e. collecting throughput-latency data
-// under different load level).
-//
-// The following data is collected and used as part of the metrics:
-// - Throughput (infer/sec):
-//     The number of inference processed per second as seen by the analyzer.
-//     The number of inference is measured by the multiplication of the number
-//     of requests and their batch size. And the total time is the time elapsed
-//     from when the analyzer starts sending requests to when it received
-//     all responses.
-// - Latency (usec):
-//     The average elapsed time between when a request is sent and
-//     when the response for the request is received. If 'percentile' flag is
-//     specified, the selected percentile value will be reported instead of
-//     average value.
-//
-// Perf Analyzer determines the stability of throughput and latency by observing
-// measurements in different trials. If the latency and throughput, are within
-// the stability percentage (see --stability-percentage option) Perf Analyzer
-// will report the average of the throughput and latency numbers observed in the
-// last three trials. All the measurements gathered during the last three trials
-// is aggregated to generate a single report. The number of total requests is
-// the sum of all the requests in the individual measurement windows.
-//
-// There are broadly three ways to load server for the data collection using
-// perf_analyzer:
-// - Maintaining Target Concurrency:
-//     In this setting, the analyzer will maintain a target number of concurrent
-//     requests sent to the server (see --concurrency-range option) while
-//     taking measurements.
-//     The number of requests will be the total number of requests sent within
-//     the time interval for measurement (see --measurement-interval option) and
-//     the latency will be the average latency across all requests.
-//
-//     Besides throughput and latency, which is measured on client side,
-//     the following data measured by the server will also be reported
-//     in this setting:
-//     - Concurrent request: the number of concurrent requests as specified
-//         in --concurrency-range option. Note, for running perf analyzer for
-//         a single concurrency, user must specify --concurrency-range
-//         <'start'>, omitting 'end' and 'step' values.
-//     - Batch size: the batch size of each request as specified in -b option
-//     - Inference count: batch size * number of inference requests
-//     - Cumulative time: the total time between request received and
-//         response sent on the requests sent by perf analyzer.
-//     - Average Cumulative time: cumulative time / number of inference requests
-//     - Compute time: the total time it takes to run inferencing including time
-//         copying input tensors to GPU memory, time executing the model,
-//         and time copying output tensors from GPU memory for the requests
-//         sent by perf analyzer.
-//     - Average compute time: compute time / number of inference requests
-//     - Queue time: the total time it takes to wait for an available model
-//         instance for the requests sent by perf analyzer.
-//     - Average queue time: queue time / number of inference requests
-//     If all fields of --concurrency-range are specified, the analyzer will
-//     perform the following procedure:
-//       1. Follows the procedure in fixed concurrent request mode using
-//          k concurrent requests (k starts at 'start').
-//       2. Gathers data reported from step 1.
-//       3. Increases k by 'step' and repeats step 1 and 2 until latency from
-//          current iteration exceeds latency threshold (see --latency-threshold
-//          option) or concurrency level reaches 'end'. Note, by setting
-//          --latency-threshold or 'end' to 0 the effect of each threshold can
-//          be removed. However, both can not be 0 simultaneously.
-//     At each iteration, the data mentioned in fixed concurrent request mode
-//     will be reported. Besides that, after the procedure above, a collection
-//     of "throughput, latency, concurrent request count" tuples will be
-//     reported in increasing load level order.
-//
-// - Maintaining Target Request Rate:
-//     This mode is enabled only when --request-rate-range option is specified.
-//     Unlike above, here the analyzer will try to maintain a target rate of
-//     requests issued to the server while taking measurements. Rest of the
-//     behaviour of analyzer is identical as above. It is important to note that
-//     even though over a  sufficiently large interval the rate of requests
-//     will tend to the target request rate, the actual request rate for a small
-//     time interval will depend upon the selected request distribution
-//     (--request-distribution). For 'constant' request distribution the time
-//     interval between successive requests is maintained to be constant, hence
-//     request rate is constant over time. However, 'poisson' request
-//     distribution varies the time interval between successive requests such
-//     that there are periods of bursts and nulls in request generation.
-//     Additionally, 'poisson' distribution mimics the real-world traffic and
-//     can be used to obtain measurements for a realistic-load.
-//     With each request-rate, the analyzer also reports the 'Delayed Request
-//     Count' which gives an idea of how many requests missed their schedule as
-//     specified by the distribution. Users can use --max-threads to increase
-//     the number of threads which might help in dispatching requests as per
-//     the schedule. Also note that a very large number of threads might be
-//     counter-productive with most of the time being spent on context-switching
-//     the threads.
-//
-// - Following User Provided Request Delivery Schedule:
-//     This mode is enabled only when --request-intervals option is specified.
-//     In this case, analyzer will try to dispatch the requests to the server
-//     with time intervals between successive requests specified in a user
-//     provided file. This file should contain time intervals in microseconds in
-//     each new line. Analyzer will loop around the values to produce a
-//     consistent load for measurements. Once, the readings are stabilized then
-//     the final statistics will be reported. The statistics will include
-//     'Delayed Request Count' for the requests that missed their schedule. As
-//     described before, users can tune --max-threads to allow analyzer in
-//     keeping up with the schedule. This mode will help user in analyzing the
-//     performance of the server under different custom settings which may be of
-//     interest.
-//
-// By default, perf_analyzer will maintain target concurrency while measuring
-// the performance.
-//
-// Options:
-// -b: batch size for each request sent.
-// --concurrency-range: The range of concurrency levels perf_analyzer will use.
-//    A concurrency level indicates the number of concurrent requests in queue.
-// --request-rate-range: The range of request rates perf_analyzer will use to
-//    load the server.
-// --request-intervals: File containing time intervals (in microseconds) to use
-//    between successive requests.
-// --latency-threshold: latency threshold in msec.
-// --measurement-interval: time interval for each measurement window in msec.
-// --async: Enables Asynchronous inference calls.
-// --binary-search: Enables binary search within the specified range.
-// --request-distribution: Allows user to specify the distribution for selecting
-//    the time intervals between the request dispatch.
-//
-// For detail of the options not listed, please refer to the usage.
-//
-class PerfAnalyzer {
- public:
-  PerfAnalyzer(pa::PAParamsPtr params);
-  virtual ~PerfAnalyzer(){};
-
-  // Main runner function for Perf Analyzer.
-  void Run();
-
- private:
-  pa::PAParamsPtr params_;
-  std::unique_ptr<pa::InferenceProfiler> profiler_;
-  std::unique_ptr<cb::ClientBackend> backend_;
-  std::shared_ptr<pa::ModelParser> parser_;
-  std::vector<pa::PerfStatus> perf_statuses_;
-  std::shared_ptr<pa::ProfileDataCollector> collector_;
-  std::shared_ptr<pa::ProfileDataExporter> exporter_;
-
-  //
-  // Helper methods
-  //
-
-  // Parse the options out of the command line argument
-  //
-  void CreateAnalyzerObjects();
-  void PrerunReport();
-  void Profile();
-  void WriteReport();
-  void GenerateProfileExport();
-  void Finalize();
-};
diff --git a/src/c++/perf_analyzer/perf_analyzer_exception.h b/src/c++/perf_analyzer/perf_analyzer_exception.h
deleted file mode 100644
index a0b8ae708..000000000
--- a/src/c++/perf_analyzer/perf_analyzer_exception.h
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-#pragma once
-
-#include <exception>
-#include <string>
-
-namespace triton { namespace perfanalyzer {
-
-// Perf Exception error class
-//
-class PerfAnalyzerException : public std::exception {
- public:
-  PerfAnalyzerException(uint32_t error) : error_(error) {}
-
-  PerfAnalyzerException(const std::string& message, uint32_t error)
-      : message_(message), error_(error)
-  {
-  }
-
-  virtual const char* what() const throw() { return message_.c_str(); }
-
-  inline int GetError() const { return error_; }
-
- private:
-  const std::string message_{""};
-  uint32_t error_;
-};
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/perf_analyzer_unit_tests.cc b/src/c++/perf_analyzer/perf_analyzer_unit_tests.cc
deleted file mode 100644
index bcc78fdd5..000000000
--- a/src/c++/perf_analyzer/perf_analyzer_unit_tests.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// This file exists to hold a macro-expanded main function for the unit test
-// runner executable.
-//
-// The old contents of main.cc are needed for the unit test runner to compile,
-// but since two main functions cannot be compiled in the same executable, the
-// contents of the old main.cc were moved to a new file/class, which are now
-// included in the compilation of the unit test runner executable.
-//
-// The new contents of main.cc just include the new file/class mentioned above
-// and run the primary function from there in a simplified main function, which
-// runs Perf Analyzer.
-#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
-#include "doctest.h"
diff --git a/src/c++/perf_analyzer/perf_utils.cc b/src/c++/perf_analyzer/perf_utils.cc
deleted file mode 100644
index 6088c1b6b..000000000
--- a/src/c++/perf_analyzer/perf_utils.cc
+++ /dev/null
@@ -1,416 +0,0 @@
-// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "perf_utils.h"
-
-#include <fcntl.h>
-#include <rapidjson/stringbuffer.h>
-#include <rapidjson/writer.h>
-#include <sys/mman.h>
-#include <unistd.h>
-
-#include <algorithm>
-#include <cctype>
-#include <iostream>
-#include <string>
-
-#include "client_backend/client_backend.h"
-#include "doctest.h"
-
-namespace triton { namespace perfanalyzer {
-
-cb::ProtocolType
-ParseProtocol(const std::string& str)
-{
-  std::string protocol(str);
-  std::transform(protocol.begin(), protocol.end(), protocol.begin(), ::tolower);
-  if (protocol == "http") {
-    return cb::ProtocolType::HTTP;
-  } else if (protocol == "grpc") {
-    return cb::ProtocolType::GRPC;
-  }
-  return cb::ProtocolType::UNKNOWN;
-}
-
-cb::Error
-ConvertDTypeFromTFS(const std::string& tf_dtype, std::string* datatype)
-{
-  if (tf_dtype == "DT_HALF") {
-    *datatype = "FP16";
-  } else if (tf_dtype == "DT_BFLOAT16") {
-    *datatype = "BF16";
-  } else if (tf_dtype == "DT_FLOAT") {
-    *datatype = "FP32";
-  } else if (tf_dtype == "DT_DOUBLE") {
-    *datatype = "FP64";
-  } else if (tf_dtype == "DT_INT32") {
-    *datatype = "INT32";
-  } else if (tf_dtype == "DT_INT16") {
-    *datatype = "INT16";
-  } else if (tf_dtype == "DT_UINT16") {
-    *datatype = "UINT16";
-  } else if (tf_dtype == "DT_INT8") {
-    *datatype = "INT8";
-  } else if (tf_dtype == "DT_UINT8") {
-    *datatype = "UINT8";
-  } else if (tf_dtype == "DT_STRING") {
-    *datatype = "BYTES";
-  } else if (tf_dtype == "DT_INT64") {
-    *datatype = "INT64";
-  } else if (tf_dtype == "DT_BOOL") {
-    *datatype = "BOOL";
-  } else if (tf_dtype == "DT_UINT32") {
-    *datatype = "UINT32";
-  } else if (tf_dtype == "DT_UINT64") {
-    *datatype = "UINT64";
-  } else {
-    return cb::Error(
-        "unsupported datatype encountered " + tf_dtype, pa::GENERIC_ERROR);
-  }
-
-  return cb::Error::Success;
-}
-
-bool
-IsDirectory(const std::string& path)
-{
-  struct stat s;
-  if (stat(path.c_str(), &s) == 0 && (s.st_mode & S_IFDIR)) {
-    return true;
-  } else {
-    return false;
-  }
-}
-
-bool
-IsFile(const std::string& complete_path)
-{
-  struct stat s;
-  if (stat(complete_path.c_str(), &s) == 0 && (s.st_mode & S_IFREG)) {
-    return true;
-  } else {
-    return false;
-  }
-}
-
-int64_t
-ByteSize(const std::vector<int64_t>& shape, const std::string& datatype)
-{
-  int one_element_size;
-  if ((datatype.compare("BOOL") == 0) || (datatype.compare("INT8") == 0) ||
-      (datatype.compare("UINT8") == 0)) {
-    one_element_size = 1;
-  } else if (
-      (datatype.compare("INT16") == 0) || (datatype.compare("UINT16") == 0) ||
-      (datatype.compare("FP16") == 0) || (datatype.compare("BF16") == 0)) {
-    one_element_size = 2;
-  } else if (
-      (datatype.compare("INT32") == 0) || (datatype.compare("UINT32") == 0) ||
-      (datatype.compare("FP32") == 0)) {
-    one_element_size = 4;
-  } else if (
-      (datatype.compare("INT64") == 0) || (datatype.compare("UINT64") == 0) ||
-      (datatype.compare("FP64") == 0)) {
-    one_element_size = 8;
-  } else {
-    return -1;
-  }
-
-  int64_t count = ElementCount(shape);
-  if (count < 0) {
-    return count;
-  }
-
-  return (one_element_size * count);
-}
-
-int64_t
-ElementCount(const std::vector<int64_t>& shape)
-{
-  int64_t count = 1;
-  bool is_dynamic = false;
-  for (const auto dim : shape) {
-    if (dim == -1) {
-      is_dynamic = true;
-    } else {
-      count *= dim;
-    }
-  }
-
-  if (is_dynamic) {
-    count = -1;
-  }
-  return count;
-}
-
-void
-SerializeStringTensor(
-    std::vector<std::string> string_tensor, std::vector<char>* serialized_data)
-{
-  std::string serialized = "";
-  for (auto s : string_tensor) {
-    uint32_t len = s.size();
-    serialized.append(reinterpret_cast<const char*>(&len), sizeof(uint32_t));
-    serialized.append(s);
-  }
-
-  std::copy(
-      serialized.begin(), serialized.end(),
-      std::back_inserter(*serialized_data));
-}
-
-cb::Error
-SerializeExplicitTensor(
-    const rapidjson::Value& tensor, const std::string& dt,
-    std::vector<char>* decoded_data)
-{
-  if (dt.compare("BYTES") == 0) {
-    std::string serialized = "";
-    for (const auto& value : tensor.GetArray()) {
-      if (!value.IsString()) {
-        return cb::Error(
-            "unable to find string data in json", pa::GENERIC_ERROR);
-      }
-      std::string element(value.GetString());
-      uint32_t len = element.size();
-      serialized.append(reinterpret_cast<const char*>(&len), sizeof(uint32_t));
-      serialized.append(element);
-    }
-    std::copy(
-        serialized.begin(), serialized.end(),
-        std::back_inserter(*decoded_data));
-  } else if (dt.compare("JSON") == 0) {
-    std::string serialized = "";
-
-    auto values = tensor.GetArray();
-    if (values.Size() != 1) {
-      return cb::Error(
-          "JSON format does not yet support multiple json objects in the "
-          "input");
-    }
-    for (const auto& value : values) {
-      rapidjson::StringBuffer buffer;
-      rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
-      value.Accept(writer);
-
-      std::string element = buffer.GetString();
-      uint32_t len = element.size();
-      serialized.append(element);
-    }
-    std::copy(
-        serialized.begin(), serialized.end(),
-        std::back_inserter(*decoded_data));
-  } else {
-    for (const auto& value : tensor.GetArray()) {
-      if (dt.compare("BOOL") == 0) {
-        if (!value.IsBool()) {
-          return cb::Error(
-              "unable to find bool data in json", pa::GENERIC_ERROR);
-        }
-        bool element(value.GetBool());
-        const char* src = reinterpret_cast<const char*>(&element);
-        decoded_data->insert(decoded_data->end(), src, src + sizeof(bool));
-      } else if (dt.compare("UINT8") == 0) {
-        if (!value.IsUint()) {
-          return cb::Error(
-              "unable to find uint8_t data in json", pa::GENERIC_ERROR);
-        }
-        uint8_t element(static_cast<uint8_t>(value.GetUint()));
-        const char* src = reinterpret_cast<const char*>(&element);
-        decoded_data->insert(decoded_data->end(), src, src + sizeof(uint8_t));
-      } else if (dt.compare("INT8") == 0) {
-        if (!value.IsInt()) {
-          return cb::Error(
-              "unable to find int8_t data in json", pa::GENERIC_ERROR);
-        }
-        int8_t element(static_cast<int8_t>(value.GetInt()));
-        const char* src = reinterpret_cast<const char*>(&element);
-        decoded_data->insert(decoded_data->end(), src, src + sizeof(int8_t));
-      } else if (dt.compare("UINT16") == 0) {
-        if (!value.IsUint()) {
-          return cb::Error(
-              "unable to find uint16_t data in json", pa::GENERIC_ERROR);
-        }
-        uint16_t element(static_cast<uint16_t>(value.GetUint()));
-        const char* src = reinterpret_cast<const char*>(&element);
-        decoded_data->insert(decoded_data->end(), src, src + sizeof(uint16_t));
-      } else if (dt.compare("INT16") == 0) {
-        if (!value.IsInt()) {
-          return cb::Error(
-              "unable to find int16_t data in json", pa::GENERIC_ERROR);
-        }
-        int16_t element(static_cast<int16_t>(value.GetInt()));
-        const char* src = reinterpret_cast<const char*>(&element);
-        decoded_data->insert(decoded_data->end(), src, src + sizeof(int16_t));
-      } else if (dt.compare("FP16") == 0) {
-        return cb::Error(
-            "Can not use explicit tensor description for fp16 datatype",
-            pa::GENERIC_ERROR);
-      } else if (dt.compare("BF16") == 0) {
-        return cb::Error(
-            "Can not use explicit tensor description for bf16 datatype",
-            pa::GENERIC_ERROR);
-      } else if (dt.compare("UINT32") == 0) {
-        if (!value.IsUint()) {
-          return cb::Error(
-              "unable to find uint32_t data in json", pa::GENERIC_ERROR);
-        }
-        uint32_t element(value.GetUint());
-        const char* src = reinterpret_cast<const char*>(&element);
-        decoded_data->insert(decoded_data->end(), src, src + sizeof(uint32_t));
-      } else if (dt.compare("INT32") == 0) {
-        if (!value.IsInt()) {
-          return cb::Error(
-              "unable to find int32_t data in json", pa::GENERIC_ERROR);
-        }
-        int32_t element(value.GetInt());
-        const char* src = reinterpret_cast<const char*>(&element);
-        decoded_data->insert(decoded_data->end(), src, src + sizeof(int32_t));
-      } else if (dt.compare("FP32") == 0) {
-        if (!value.IsDouble()) {
-          return cb::Error(
-              "unable to find float data in json", pa::GENERIC_ERROR);
-        }
-        float element(value.GetFloat());
-        const char* src = reinterpret_cast<const char*>(&element);
-        decoded_data->insert(decoded_data->end(), src, src + sizeof(float));
-      } else if (dt.compare("UINT64") == 0) {
-        if (!value.IsUint64()) {
-          return cb::Error(
-              "unable to find uint64_t data in json", pa::GENERIC_ERROR);
-        }
-        uint64_t element(value.GetUint64());
-        const char* src = reinterpret_cast<const char*>(&element);
-        decoded_data->insert(decoded_data->end(), src, src + sizeof(uint64_t));
-      } else if (dt.compare("INT64") == 0) {
-        if (!value.IsInt64()) {
-          return cb::Error(
-              "unable to find int64_t data in json", pa::GENERIC_ERROR);
-        }
-        int64_t element(value.GetInt64());
-        const char* src = reinterpret_cast<const char*>(&element);
-        decoded_data->insert(decoded_data->end(), src, src + sizeof(int64_t));
-      } else if (dt.compare("FP64") == 0) {
-        if (!value.IsDouble()) {
-          return cb::Error(
-              "unable to find fp64 data in json", pa::GENERIC_ERROR);
-        }
-        double element(value.GetDouble());
-        const char* src = reinterpret_cast<const char*>(&element);
-        decoded_data->insert(decoded_data->end(), src, src + sizeof(double));
-      } else {
-        return cb::Error("Unexpected type " + dt);
-      }
-    }
-  }
-  return cb::Error::Success;
-}
-
-std::string
-GetRandomString(const int string_length)
-{
-  std::mt19937_64 gen{std::random_device()()};
-  std::uniform_int_distribution<size_t> dist{0, character_set.length() - 1};
-  std::string random_string;
-  std::generate_n(std::back_inserter(random_string), string_length, [&] {
-    return character_set[dist(gen)];
-  });
-  return random_string;
-}
-
-std::string
-ShapeVecToString(const std::vector<int64_t> shape_vec, bool skip_first)
-{
-  bool first = true;
-  std::string str("[");
-  for (const auto& value : shape_vec) {
-    if (skip_first) {
-      skip_first = false;
-      continue;
-    }
-    if (!first) {
-      str += ",";
-    }
-    str += std::to_string(value);
-    first = false;
-  }
-
-  str += "]";
-  return str;
-}
-
-std::string
-TensorToRegionName(std::string name)
-{
-  // Remove slashes from the name, if any.
-  name.erase(
-      std::remove_if(
-          name.begin(), name.end(),
-          [](const char& c) { return ((c == '/') || (c == '\\')); }),
-      name.end());
-  return name;
-}
-
-template <>
-std::function<std::chrono::nanoseconds(std::mt19937&)>
-ScheduleDistribution<Distribution::POISSON>(const double request_rate)
-{
-  std::exponential_distribution<> dist =
-      std::exponential_distribution<>(request_rate);
-  return [dist](std::mt19937& gen) mutable {
-    return std::chrono::duration_cast<std::chrono::nanoseconds>(
-        std::chrono::duration<double>(dist(gen)));
-  };
-}
-
-template <>
-std::function<std::chrono::nanoseconds(std::mt19937&)>
-ScheduleDistribution<Distribution::CONSTANT>(const double request_rate)
-{
-  std::chrono::nanoseconds period =
-      std::chrono::duration_cast<std::chrono::nanoseconds>(
-          std::chrono::duration<double>(1.0 / request_rate));
-  return [period](std::mt19937& /*gen*/) { return period; };
-}
-
-cb::TensorFormat
-ParseTensorFormat(const std::string& content_type_str)
-{
-  std::string content_type_str_lowercase{content_type_str};
-  std::transform(
-      content_type_str.cbegin(), content_type_str.cend(),
-      content_type_str_lowercase.begin(),
-      [](unsigned char c) { return std::tolower(c); });
-  if (content_type_str_lowercase == "binary") {
-    return cb::TensorFormat::BINARY;
-  } else if (content_type_str_lowercase == "json") {
-    return cb::TensorFormat::JSON;
-  } else {
-    return cb::TensorFormat::UNKNOWN;
-  }
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/perf_utils.h b/src/c++/perf_analyzer/perf_utils.h
deleted file mode 100644
index 6975d694b..000000000
--- a/src/c++/perf_analyzer/perf_utils.h
+++ /dev/null
@@ -1,140 +0,0 @@
-// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <rapidjson/document.h>
-#include <rapidjson/rapidjson.h>
-#include <sys/stat.h>
-#include <time.h>
-
-#include <chrono>
-#include <fstream>
-#include <functional>
-#include <iomanip>
-#include <iostream>
-#include <memory>
-#include <random>
-
-#include "client_backend/client_backend.h"
-
-namespace pa = triton::perfanalyzer;
-namespace cb = triton::perfanalyzer::clientbackend;
-
-namespace triton { namespace perfanalyzer {
-
-constexpr uint64_t NANOS_PER_SECOND = 1000000000;
-constexpr uint64_t NANOS_PER_MILLIS = 1000000;
-#define CHRONO_TO_NANOS(TS)                                                    \
-  (std::chrono::duration_cast<std::chrono::nanoseconds>(TS.time_since_epoch()) \
-       .count())
-#define CHRONO_TO_MILLIS(TS) (CHRONO_TO_NANOS(TS) / pa::NANOS_PER_MILLIS)
-
-//==============================================================================
-
-// Will use the characters specified here to construct random strings
-std::string const character_set =
-    "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890 .?!";
-
-// A boolean flag to mark an interrupt and commencement of early exit
-extern volatile bool early_exit;
-
-enum Distribution { POISSON = 0, CONSTANT = 1, CUSTOM = 2 };
-enum SearchMode { LINEAR = 0, BINARY = 1, NONE = 2 };
-enum SharedMemoryType {
-  SYSTEM_SHARED_MEMORY = 0,
-  CUDA_SHARED_MEMORY = 1,
-  NO_SHARED_MEMORY = 2
-};
-
-constexpr uint64_t NO_LIMIT = 0;
-
-// Templated range class that tracks the start, stop, and step for a range.
-//
-template <typename T>
-class Range {
- public:
-  Range(T start, T end, T step) : start(start), end(end), step(step) {}
-
-  T start;
-  T end;
-  T step;
-};
-
-// Converts the datatype from tensorflow to perf analyzer space
-// \param tf_dtype The data type string returned from the model metadata.
-// \param datatype Returns the datatype in perf_analyzer space.
-// \return error status. Returns Non-Ok if an error is encountered during
-//  read operation.
-cb::Error ConvertDTypeFromTFS(
-    const std::string& tf_dtype, std::string* datatype);
-
-// Parse the communication protocol type
-cb::ProtocolType ParseProtocol(const std::string& str);
-
-// To check whether the path points to a valid system directory
-bool IsDirectory(const std::string& path);
-
-// To check whether the path points to a valid system file
-bool IsFile(const std::string& complete_path);
-
-// Calculates the byte size tensor for given shape and datatype.
-int64_t ByteSize(
-    const std::vector<int64_t>& shape, const std::string& datatype);
-
-// Get the number of elements in the tensor for given shape.
-int64_t ElementCount(const std::vector<int64_t>& shape);
-
-// Serializes the string tensor to length prepended bytes.
-void SerializeStringTensor(
-    std::vector<std::string> string_tensor, std::vector<char>* serialized_data);
-
-// Serializes an explicit tensor read from the data file to the
-// raw bytes.
-cb::Error SerializeExplicitTensor(
-    const rapidjson::Value& tensor, const std::string& dt,
-    std::vector<char>* decoded_data);
-
-// Generates a random string of specified length using characters specified in
-// character_set.
-std::string GetRandomString(const int string_length);
-
-// Returns the shape string containing the values provided in the vector
-std::string ShapeVecToString(
-    const std::vector<int64_t> shape_vec, bool skip_first = false);
-
-// Remove slashes from tensor name, if any
-std::string TensorToRegionName(std::string name);
-
-// Returns the request schedule distribution generator with the specified
-// request rate.
-template <Distribution distribution>
-std::function<std::chrono::nanoseconds(std::mt19937&)> ScheduleDistribution(
-    const double request_rate);
-
-// Parse the HTTP tensor format
-cb::TensorFormat ParseTensorFormat(const std::string& tensor_format_str);
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/periodic_concurrency_manager.cc b/src/c++/perf_analyzer/periodic_concurrency_manager.cc
deleted file mode 100644
index a8375ed65..000000000
--- a/src/c++/perf_analyzer/periodic_concurrency_manager.cc
+++ /dev/null
@@ -1,122 +0,0 @@
-// Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "periodic_concurrency_manager.h"
-
-namespace triton { namespace perfanalyzer {
-
-std::vector<RequestRecord>
-PeriodicConcurrencyManager::RunExperiment()
-{
-  AddConcurrentRequests(concurrency_range_.start);
-  WaitForRequestsToFinish();
-  return GetRequestRecords();
-}
-
-std::shared_ptr<IWorker>
-PeriodicConcurrencyManager::MakeWorker(
-    std::shared_ptr<ThreadStat> thread_stat,
-    std::shared_ptr<ThreadConfig> thread_config)
-{
-  uint32_t id = workers_.size();
-  auto worker = std::make_shared<PeriodicConcurrencyWorker>(
-      id, thread_stat, thread_config, parser_, data_loader_, factory_,
-      on_sequence_model_, async_, max_concurrency_, using_json_data_,
-      streaming_, batch_size_, wake_signal_, wake_mutex_, active_threads_,
-      execute_, infer_data_manager_, sequence_manager_, request_period_,
-      period_completed_callback_, request_completed_callback_);
-  return worker;
-};
-
-void
-PeriodicConcurrencyManager::AddConcurrentRequests(
-    uint64_t num_concurrent_requests)
-{
-  for (size_t i = 0; i < num_concurrent_requests; i++) {
-    AddConcurrentRequest(i);
-  }
-  num_incomplete_periods_ = num_concurrent_requests;
-}
-
-void
-PeriodicConcurrencyManager::AddConcurrentRequest(size_t seq_stat_index_offset)
-{
-  threads_stat_.emplace_back(std::make_shared<ThreadStat>());
-  threads_config_.emplace_back(
-      std::make_shared<ThreadConfig>(threads_config_.size()));
-  threads_config_.back()->concurrency_ = 1;
-  threads_config_.back()->seq_stat_index_offset_ = seq_stat_index_offset;
-  workers_.emplace_back(
-      MakeWorker(threads_stat_.back(), threads_config_.back()));
-  threads_.emplace_back(&IWorker::Infer, workers_.back());
-  active_threads_++;
-}
-
-void
-PeriodicConcurrencyManager::PeriodCompletedCallback()
-{
-  std::lock_guard<std::mutex> lock(period_completed_callback_mutex_);
-  num_incomplete_periods_--;
-  if (num_incomplete_periods_ == 0) {
-    steps_completed_++;
-    uint64_t num_requests_sent{steps_completed_ * concurrency_range_.step};
-    if (num_requests_sent < concurrency_range_.end) {
-      AddConcurrentRequests(concurrency_range_.step);
-    }
-  }
-}
-
-void
-PeriodicConcurrencyManager::RequestCompletedCallback()
-{
-  std::lock_guard<std::mutex> lock(request_completed_callback_mutex_);
-  num_completed_requests_++;
-  if (num_completed_requests_ == concurrency_range_.end) {
-    all_requests_completed_promise_.set_value(true);
-  }
-}
-
-void
-PeriodicConcurrencyManager::WaitForRequestsToFinish()
-{
-  std::future<bool> all_requests_completed_future{
-      all_requests_completed_promise_.get_future()};
-  all_requests_completed_future.get();
-}
-
-std::vector<RequestRecord>
-PeriodicConcurrencyManager::GetRequestRecords()
-{
-  std::vector<RequestRecord> request_records{};
-  for (const auto& thread_stat : threads_stat_) {
-    request_records.insert(
-        request_records.end(), thread_stat->request_records_.cbegin(),
-        thread_stat->request_records_.cend());
-  }
-  return request_records;
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/periodic_concurrency_manager.h b/src/c++/perf_analyzer/periodic_concurrency_manager.h
deleted file mode 100644
index 40a0634b4..000000000
--- a/src/c++/perf_analyzer/periodic_concurrency_manager.h
+++ /dev/null
@@ -1,92 +0,0 @@
-// Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <future>
-
-#include "concurrency_manager.h"
-#include "periodic_concurrency_worker.h"
-
-namespace triton { namespace perfanalyzer {
-
-/// @brief Concurrency manager for periodically increasing concurrency by a step
-/// amount based on the number of responses received (request period) by the
-/// latest N (step or start concurrency for first-issued concurrent requests)
-/// concurrent requests/workers.
-class PeriodicConcurrencyManager : public ConcurrencyManager {
- public:
-  PeriodicConcurrencyManager(
-      const bool async, const bool streaming, const int32_t batch_size,
-      const size_t max_threads, const size_t max_concurrency,
-      const SharedMemoryType shared_memory_type, const size_t output_shm_size,
-
-      const std::shared_ptr<ModelParser>& parser,
-      const std::shared_ptr<cb::ClientBackendFactory>& factory,
-      const Range<uint64_t> concurrency_range, const uint64_t request_period,
-      const std::unordered_map<std::string, cb::RequestParameter>&
-          request_parameters)
-      : ConcurrencyManager(
-            async, streaming, batch_size, max_threads, max_concurrency,
-            shared_memory_type, output_shm_size, parser, factory,
-            request_parameters),
-        concurrency_range_(concurrency_range), request_period_(request_period)
-  {
-  }
-
-  std::vector<RequestRecord> RunExperiment();
-
- private:
-  std::shared_ptr<IWorker> MakeWorker(
-      std::shared_ptr<ThreadStat> thread_stat,
-      std::shared_ptr<ThreadConfig> thread_config) override;
-
-  void AddConcurrentRequests(uint64_t num_concurrent_requests);
-
-  void AddConcurrentRequest(size_t seq_stat_index_offset);
-
-  void PeriodCompletedCallback();
-
-  void RequestCompletedCallback();
-
-  void WaitForRequestsToFinish();
-
-  std::vector<RequestRecord> GetRequestRecords();
-
-  Range<uint64_t> concurrency_range_{1, 1, 1};
-  uint64_t request_period_{0};
-  uint64_t steps_completed_{0};
-  uint64_t num_incomplete_periods_{0};
-  uint64_t num_completed_requests_{0};
-  std::mutex period_completed_callback_mutex_{};
-  std::mutex request_completed_callback_mutex_{};
-  std::promise<bool> all_requests_completed_promise_{};
-  std::function<void()> period_completed_callback_{
-      std::bind(&PeriodicConcurrencyManager::PeriodCompletedCallback, this)};
-  std::function<void()> request_completed_callback_{
-      std::bind(&PeriodicConcurrencyManager::RequestCompletedCallback, this)};
-};
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/periodic_concurrency_worker.cc b/src/c++/perf_analyzer/periodic_concurrency_worker.cc
deleted file mode 100644
index 9af3a9d87..000000000
--- a/src/c++/perf_analyzer/periodic_concurrency_worker.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "periodic_concurrency_worker.h"
-
-namespace triton { namespace perfanalyzer {
-
-void
-PeriodicConcurrencyWorker::Infer()
-{
-  CreateCtxIdTracker();
-  ReserveContexts();
-  RunInference();
-}
-
-std::shared_ptr<InferContext>
-PeriodicConcurrencyWorker::CreateInferContext()
-{
-  std::shared_ptr infer_context{std::make_shared<InferContext>(
-      id_, ctxs_.size(), async_, streaming_, on_sequence_model_,
-      using_json_data_, batch_size_, thread_stat_, data_loader_, parser_,
-      factory_, execute_, infer_data_manager_, sequence_manager_)};
-  infer_context->RegisterWorkerCallback(worker_callback_);
-  return infer_context;
-}
-
-void
-PeriodicConcurrencyWorker::WorkerCallback(uint32_t infer_context_id)
-{
-  if (ctxs_.at(infer_context_id)->GetNumResponsesForCurrentRequest() ==
-      request_period_) {
-    period_completed_callback_();
-  }
-  if (ctxs_.at(infer_context_id)->HasReceivedFinalResponse()) {
-    bool has_not_completed_period{
-        ctxs_.at(infer_context_id)->GetNumResponsesForCurrentRequest() <
-        request_period_};
-    if (has_not_completed_period) {
-      throw std::runtime_error(
-          "Request received final response before request period was reached. "
-          "Request period must be at most the total number of responses "
-          "received by any request.");
-    }
-    request_completed_callback_();
-  }
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/periodic_concurrency_worker.h b/src/c++/perf_analyzer/periodic_concurrency_worker.h
deleted file mode 100644
index 7242219b9..000000000
--- a/src/c++/perf_analyzer/periodic_concurrency_worker.h
+++ /dev/null
@@ -1,80 +0,0 @@
-// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <thread>
-
-#include "concurrency_worker.h"
-
-namespace triton { namespace perfanalyzer {
-
-/// @brief Worker class for periodic concurrency mode. Issues one request only
-/// and waits for all responses to come in. Notifies manager when N responses
-/// (request period) have been received. Notifies manager when final response
-/// has been received.
-class PeriodicConcurrencyWorker : public ConcurrencyWorker {
- public:
-  PeriodicConcurrencyWorker(
-      uint32_t id, std::shared_ptr<ThreadStat> thread_stat,
-      std::shared_ptr<ThreadConfig> thread_config,
-      const std::shared_ptr<ModelParser> parser,
-      std::shared_ptr<DataLoader> data_loader,
-      const std::shared_ptr<cb::ClientBackendFactory> factory,
-      const bool on_sequence_model, const bool async,
-      const size_t max_concurrency, const bool using_json_data,
-      const bool streaming, const int32_t batch_size,
-      std::condition_variable& wake_signal, std::mutex& wake_mutex,
-      size_t& active_threads, bool& execute,
-      const std::shared_ptr<IInferDataManager>& infer_data_manager,
-      std::shared_ptr<SequenceManager> sequence_manager,
-      uint64_t request_period, std::function<void()> period_completed_callback,
-      std::function<void()> request_completed_callback)
-      : ConcurrencyWorker(
-            id, thread_stat, thread_config, parser, data_loader, factory,
-            on_sequence_model, async, max_concurrency, using_json_data,
-            streaming, batch_size, wake_signal, wake_mutex, active_threads,
-            execute, infer_data_manager, sequence_manager),
-        request_period_(request_period),
-        period_completed_callback_(period_completed_callback),
-        request_completed_callback_(request_completed_callback)
-  {
-  }
-
-  void Infer() override;
-
-  std::shared_ptr<InferContext> CreateInferContext() override;
-
-  void WorkerCallback(uint32_t infer_context_id);
-
- private:
-  uint64_t request_period_{0};
-  std::function<void()> period_completed_callback_{nullptr};
-  std::function<void()> request_completed_callback_{nullptr};
-  std::function<void(uint32_t)> worker_callback_{std::bind(
-      &PeriodicConcurrencyWorker::WorkerCallback, this, std::placeholders::_1)};
-};
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/profile_data_collector.cc b/src/c++/perf_analyzer/profile_data_collector.cc
deleted file mode 100644
index 8cca26a70..000000000
--- a/src/c++/perf_analyzer/profile_data_collector.cc
+++ /dev/null
@@ -1,85 +0,0 @@
-// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "profile_data_collector.h"
-
-#include <memory>
-
-#include "perf_utils.h"
-
-namespace triton { namespace perfanalyzer {
-
-cb::Error
-ProfileDataCollector::Create(std::shared_ptr<ProfileDataCollector>* collector)
-{
-  std::shared_ptr<ProfileDataCollector> local_collector{
-      new ProfileDataCollector()};
-  *collector = std::move(local_collector);
-  return cb::Error::Success;
-}
-
-void
-ProfileDataCollector::AddWindow(
-    InferenceLoadMode& id, uint64_t window_start_ns, uint64_t window_end_ns)
-{
-  auto it = FindExperiment(id);
-
-  if (it == experiments_.end()) {
-    Experiment new_experiment{};
-    new_experiment.mode = id;
-    new_experiment.window_boundaries.push_back(window_start_ns);
-    new_experiment.window_boundaries.push_back(window_end_ns);
-
-    experiments_.push_back(new_experiment);
-  } else {
-    // Window timestamps are always increasing so it is safe to check only the
-    // last element
-    if (it->window_boundaries.back() != window_start_ns) {
-      it->window_boundaries.push_back(window_start_ns);
-    }
-    it->window_boundaries.push_back(window_end_ns);
-  }
-}
-
-void
-ProfileDataCollector::AddData(
-    InferenceLoadMode& id, std::vector<RequestRecord>&& request_records)
-{
-  auto it = FindExperiment(id);
-
-  if (it == experiments_.end()) {
-    Experiment new_experiment{};
-    new_experiment.mode = id;
-    new_experiment.requests = std::move(request_records);
-    experiments_.push_back(new_experiment);
-  } else {
-    it->requests.insert(
-        it->requests.end(), std::make_move_iterator(request_records.begin()),
-        std::make_move_iterator(request_records.end()));
-  }
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/profile_data_collector.h b/src/c++/perf_analyzer/profile_data_collector.h
deleted file mode 100644
index 3a726bbf4..000000000
--- a/src/c++/perf_analyzer/profile_data_collector.h
+++ /dev/null
@@ -1,122 +0,0 @@
-// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#pragma once
-
-#include <algorithm>
-#include <map>
-#include <tuple>
-
-#include "client_backend/client_backend.h"
-#include "constants.h"
-#include "perf_utils.h"
-#include "request_record.h"
-
-namespace triton { namespace perfanalyzer {
-
-/// Data structure to hold which inference load mode was used for an experiment.
-/// Only one data member will be nonzero, indicating the inference load mode for
-/// a particular experiment.
-struct InferenceLoadMode {
-  uint32_t concurrency;
-  double request_rate;
-
-  InferenceLoadMode()
-  {
-    concurrency = 0;
-    request_rate = 0.0;
-  }
-
-  InferenceLoadMode(uint64_t c, double rr)
-  {
-    concurrency = c;
-    request_rate = rr;
-  }
-
-  bool operator==(const InferenceLoadMode& rhs) const
-  {
-    return (concurrency == rhs.concurrency) &&
-           (request_rate == rhs.request_rate);
-  }
-};
-
-/// Data structure to hold profile export data for an experiment (e.g.
-/// concurrency 4 or request rate 50)
-struct Experiment {
-  InferenceLoadMode mode;
-  std::vector<RequestRecord> requests;
-  std::vector<uint64_t> window_boundaries;
-};
-
-#ifndef DOCTEST_CONFIG_DISABLE
-class NaggyMockProfileDataCollector;
-#endif
-
-/// Data structure and methods for storing profile export data.
-class ProfileDataCollector {
- public:
-  static cb::Error Create(std::shared_ptr<ProfileDataCollector>* collector);
-  ~ProfileDataCollector() = default;
-
-
-  /// Add a measurement window to the collector
-  /// @param id Identifier for the experiment
-  /// @param window_start_ns The window start timestamp in nanoseconds.
-  /// @param window_end_ns The window end timestamp in nanoseconds.
-  void AddWindow(
-      InferenceLoadMode& id, uint64_t window_start_ns, uint64_t window_end_ns);
-
-  /// Add request records to an experiment
-  /// @param id Identifier for the experiment
-  /// @param request_records The request information for the current experiment.
-  void AddData(
-      InferenceLoadMode& id, std::vector<RequestRecord>&& request_records);
-
-  /// Get the experiment data for the profile
-  /// @return Experiment data
-  std::vector<Experiment>& GetData() { return experiments_; }
-
-  std::string& GetVersion() { return version_; }
-
- private:
-  ProfileDataCollector() = default;
-
-  virtual std::vector<Experiment>::iterator FindExperiment(
-      InferenceLoadMode& id)
-  {
-    return std::find_if(
-        experiments_.begin(), experiments_.end(),
-        [&id](const Experiment& e) { return e.mode == id; });
-  };
-
-  std::vector<Experiment> experiments_{};
-  std::string version_{VERSION};
-
-#ifndef DOCTEST_CONFIG_DISABLE
-  friend NaggyMockProfileDataCollector;
-#endif
-};
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/profile_data_exporter.cc b/src/c++/perf_analyzer/profile_data_exporter.cc
deleted file mode 100644
index ea79d6856..000000000
--- a/src/c++/perf_analyzer/profile_data_exporter.cc
+++ /dev/null
@@ -1,302 +0,0 @@
-// Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-#include "profile_data_exporter.h"
-
-#include <rapidjson/filewritestream.h>
-#include <rapidjson/ostreamwrapper.h>
-#include <rapidjson/writer.h>
-
-#include "client_backend/client_backend.h"
-
-namespace triton { namespace perfanalyzer {
-
-cb::Error
-ProfileDataExporter::Create(std::shared_ptr<ProfileDataExporter>* exporter)
-{
-  std::shared_ptr<ProfileDataExporter> local_exporter{
-      new ProfileDataExporter()};
-  *exporter = std::move(local_exporter);
-  return cb::Error::Success;
-}
-
-void
-ProfileDataExporter::Export(
-    const std::vector<Experiment>& raw_experiments, std::string& raw_version,
-    std::string& file_path, cb::BackendKind& service_kind,
-    std::string& endpoint)
-{
-  ConvertToJson(raw_experiments, raw_version, service_kind, endpoint);
-  OutputToFile(file_path);
-}
-
-void
-ProfileDataExporter::ConvertToJson(
-    const std::vector<Experiment>& raw_experiments, std::string& raw_version,
-    cb::BackendKind& service_kind, std::string& endpoint)
-{
-  ClearDocument();
-  rapidjson::Value experiments(rapidjson::kArrayType);
-
-  for (const auto& raw_experiment : raw_experiments) {
-    rapidjson::Value entry(rapidjson::kObjectType);
-    rapidjson::Value experiment(rapidjson::kObjectType);
-    rapidjson::Value requests(rapidjson::kArrayType);
-    rapidjson::Value window_boundaries(rapidjson::kArrayType);
-
-    AddExperiment(entry, experiment, raw_experiment);
-    AddRequests(entry, requests, raw_experiment);
-    AddWindowBoundaries(entry, window_boundaries, raw_experiment);
-
-    experiments.PushBack(entry, document_.GetAllocator());
-  }
-
-  document_.AddMember("experiments", experiments, document_.GetAllocator());
-  AddVersion(raw_version);
-  AddServiceKind(service_kind);
-  AddEndpoint(endpoint);
-}
-
-void
-ProfileDataExporter::ClearDocument()
-{
-  rapidjson::Document d{};
-  document_.Swap(d);
-  document_.SetObject();
-}
-
-void
-ProfileDataExporter::AddExperiment(
-    rapidjson::Value& entry, rapidjson::Value& experiment,
-    const Experiment& raw_experiment)
-{
-  rapidjson::Value mode;
-  rapidjson::Value value;
-  if (raw_experiment.mode.concurrency != 0) {
-    mode = rapidjson::StringRef("concurrency");
-    value.SetUint64(raw_experiment.mode.concurrency);
-  } else {
-    mode = rapidjson::StringRef("request_rate");
-    value.SetDouble(raw_experiment.mode.request_rate);
-  }
-  experiment.AddMember("mode", mode, document_.GetAllocator());
-  experiment.AddMember("value", value, document_.GetAllocator());
-  entry.AddMember("experiment", experiment, document_.GetAllocator());
-}
-
-void
-ProfileDataExporter::AddRequests(
-    rapidjson::Value& entry, rapidjson::Value& requests,
-    const Experiment& raw_experiment)
-{
-  for (auto& raw_request : raw_experiment.requests) {
-    rapidjson::Value request(rapidjson::kObjectType);
-    rapidjson::Value timestamp;
-
-    timestamp.SetUint64(raw_request.start_time_.time_since_epoch().count());
-    request.AddMember("timestamp", timestamp, document_.GetAllocator());
-
-    if (raw_request.sequence_id_ != 0) {
-      rapidjson::Value sequence_id;
-      sequence_id.SetUint64(raw_request.sequence_id_);
-      request.AddMember("sequence_id", sequence_id, document_.GetAllocator());
-    }
-
-    rapidjson::Value request_inputs(rapidjson::kObjectType);
-    AddRequestInputs(request_inputs, raw_request.request_inputs_);
-    request.AddMember(
-        "request_inputs", request_inputs, document_.GetAllocator());
-
-    rapidjson::Value response_timestamps(rapidjson::kArrayType);
-    AddResponseTimestamps(
-        response_timestamps, raw_request.response_timestamps_);
-    request.AddMember(
-        "response_timestamps", response_timestamps, document_.GetAllocator());
-
-    rapidjson::Value response_outputs(rapidjson::kArrayType);
-    AddResponseOutputs(response_outputs, raw_request.response_outputs_);
-    request.AddMember(
-        "response_outputs", response_outputs, document_.GetAllocator());
-
-    requests.PushBack(request, document_.GetAllocator());
-  }
-  entry.AddMember("requests", requests, document_.GetAllocator());
-}
-
-void
-ProfileDataExporter::AddResponseTimestamps(
-    rapidjson::Value& timestamps_json,
-    const std::vector<std::chrono::time_point<std::chrono::system_clock>>&
-        timestamps)
-{
-  for (auto& timestamp : timestamps) {
-    rapidjson::Value timestamp_json;
-    timestamp_json.SetUint64(timestamp.time_since_epoch().count());
-    timestamps_json.PushBack(timestamp_json, document_.GetAllocator());
-  }
-}
-
-void
-ProfileDataExporter::AddRequestInputs(
-    rapidjson::Value& request_inputs_json,
-    const std::vector<RequestRecord::RequestInput>& request_inputs)
-{
-  for (const auto& request_input : request_inputs) {
-    for (const auto& input : request_input) {
-      const auto& name{input.first};
-      const auto& buf{input.second.data_.get()};
-      const auto& byte_size{input.second.size_};
-      const auto& data_type{input.second.data_type_};
-      rapidjson::Value name_json(name.c_str(), document_.GetAllocator());
-      rapidjson::Value input_json{};
-      // TMA-1777: support other data types
-      if (buf != nullptr) {
-        if (data_type == "BYTES" || data_type == "JSON") {
-          input_json.SetString(
-              reinterpret_cast<const char*>(buf), byte_size,
-              document_.GetAllocator());
-        } else if (data_type == "INT32") {
-          auto* val = reinterpret_cast<int32_t*>(buf);
-          input_json.SetInt(*val);
-        } else if (data_type == "BOOL") {
-          bool is_true = (*buf > 0);
-          input_json.SetBool(is_true);
-        } else {
-          std::cerr << "WARNING: data type '" + data_type +
-                           "' is not supported with JSON."
-                    << std::endl;
-        }
-      } else {
-        input_json.SetString("", 0, document_.GetAllocator());
-      }
-      request_inputs_json.AddMember(
-          name_json, input_json, document_.GetAllocator());
-    }
-  }
-}
-
-void
-ProfileDataExporter::AddResponseOutputs(
-    rapidjson::Value& outputs_json,
-    const std::vector<RequestRecord::ResponseOutput>& response_outputs)
-{
-  for (const auto& response_output : response_outputs) {
-    rapidjson::Value response_output_json(rapidjson::kObjectType);
-    for (const auto& output : response_output) {
-      const auto& name{output.first};
-      const auto& buf{output.second.data_.get()};
-      const auto& byte_size{output.second.size_};
-      rapidjson::Value name_json(name.c_str(), document_.GetAllocator());
-      rapidjson::Value output_json{};
-      // TMA-1777: support other data types
-      if (buf != nullptr) {
-        output_json.SetString(
-            reinterpret_cast<const char*>(buf), byte_size,
-            document_.GetAllocator());
-      } else {
-        output_json.SetString("", 0, document_.GetAllocator());
-      }
-      response_output_json.AddMember(
-          name_json, output_json, document_.GetAllocator());
-    }
-    outputs_json.PushBack(response_output_json, document_.GetAllocator());
-  }
-}
-
-void
-ProfileDataExporter::AddWindowBoundaries(
-    rapidjson::Value& entry, rapidjson::Value& window_boundaries,
-    const Experiment& raw_experiment)
-{
-  for (auto& window : raw_experiment.window_boundaries) {
-    rapidjson::Value w;
-    w.SetUint64(window);
-    window_boundaries.PushBack(w, document_.GetAllocator());
-  }
-  entry.AddMember(
-      "window_boundaries", window_boundaries, document_.GetAllocator());
-}
-
-void
-ProfileDataExporter::AddVersion(std::string& raw_version)
-{
-  rapidjson::Value version;
-  version = rapidjson::StringRef(raw_version.c_str());
-  document_.AddMember("version", version, document_.GetAllocator());
-}
-
-void
-ProfileDataExporter::AddServiceKind(cb::BackendKind& kind)
-{
-  std::string raw_service_kind{""};
-  if (kind == cb::BackendKind::TRITON) {
-    raw_service_kind = "triton";
-  } else if (kind == cb::BackendKind::TENSORFLOW_SERVING) {
-    raw_service_kind = "tfserving";
-  } else if (kind == cb::BackendKind::TORCHSERVE) {
-    raw_service_kind = "torchserve";
-  } else if (kind == cb::BackendKind::TRITON_C_API) {
-    raw_service_kind = "triton_c_api";
-  } else if (kind == cb::BackendKind::OPENAI) {
-    raw_service_kind = "openai";
-  } else {
-    std::cerr << "Unknown service kind detected. The 'service_kind' will not "
-                 "be specified."
-              << std::endl;
-  }
-
-  rapidjson::Value service_kind;
-  service_kind.SetString(raw_service_kind.c_str(), document_.GetAllocator());
-  document_.AddMember("service_kind", service_kind, document_.GetAllocator());
-}
-
-void
-ProfileDataExporter::AddEndpoint(std::string& raw_endpoint)
-{
-  rapidjson::Value endpoint;
-  endpoint = rapidjson::StringRef(raw_endpoint.c_str());
-  document_.AddMember("endpoint", endpoint, document_.GetAllocator());
-}
-
-void
-ProfileDataExporter::OutputToFile(std::string& file_path)
-{
-  FILE* fp = fopen(file_path.c_str(), "w");
-  if (fp == nullptr) {
-    throw PerfAnalyzerException(
-        "failed to open file for outputting raw profile data", GENERIC_ERROR);
-  }
-  char writeBuffer[65536];
-  rapidjson::FileWriteStream os(fp, writeBuffer, sizeof(writeBuffer));
-
-  rapidjson::Writer<rapidjson::FileWriteStream> writer(os);
-  document_.Accept(writer);
-
-  fclose(fp);
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/profile_data_exporter.h b/src/c++/perf_analyzer/profile_data_exporter.h
deleted file mode 100644
index 820148d7a..000000000
--- a/src/c++/perf_analyzer/profile_data_exporter.h
+++ /dev/null
@@ -1,102 +0,0 @@
-// Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#pragma once
-
-#include <rapidjson/document.h>
-
-#include "client_backend/client_backend.h"
-#include "profile_data_collector.h"
-
-namespace triton { namespace perfanalyzer {
-
-#ifndef DOCTEST_CONFIG_DISABLE
-class NaggyMockProfileDataExporter;
-#endif
-
-/// Exports profile data.
-class ProfileDataExporter {
- public:
-  static cb::Error Create(std::shared_ptr<ProfileDataExporter>* exporter);
-  ~ProfileDataExporter() = default;
-
-  /// Export profile data to json file
-  /// @param raw_experiments All of the raw data for the experiments run by perf
-  /// analyzer
-  /// @param raw_version String containing the version number for the json
-  /// output
-  /// @param file_path File path to export profile data to.
-  /// @param service_kind Service that Perf Analyzer generates load for.
-  /// @param endpoint Endpoint to send the requests.
-  void Export(
-      const std::vector<Experiment>& raw_experiments, std::string& raw_version,
-      std::string& file_path, cb::BackendKind& service_kind,
-      std::string& endpoint);
-
- private:
-  ProfileDataExporter() = default;
-  /// Convert the raw data collected to json output
-  /// @param raw_experiments All of the raw data for the experiments run by perf
-  /// analyzer
-  /// @param raw_version String containing the version number for the json
-  /// output
-  /// @param service_kind Service that Perf Analyzer generates load for.
-  /// @param endpoint Endpoint to send the requests.
-  virtual void ConvertToJson(
-      const std::vector<Experiment>& raw_experiments, std::string& raw_version,
-      cb::BackendKind& service_kind, std::string& endpoint);
-  virtual void OutputToFile(std::string& file_path);
-  virtual void AddExperiment(
-      rapidjson::Value& entry, rapidjson::Value& experiment,
-      const Experiment& raw_experiment);
-  void AddRequests(
-      rapidjson::Value& entry, rapidjson::Value& requests,
-      const Experiment& raw_experiment);
-  void AddRequestInputs(
-      rapidjson::Value& inputs_json,
-      const std::vector<RequestRecord::RequestInput>& inputs);
-  void AddResponseTimestamps(
-      rapidjson::Value& timestamps_json,
-      const std::vector<std::chrono::time_point<std::chrono::system_clock>>&
-          timestamps);
-  void AddResponseOutputs(
-      rapidjson::Value& outputs_json,
-      const std::vector<RequestRecord::ResponseOutput>& outputs);
-  void AddWindowBoundaries(
-      rapidjson::Value& entry, rapidjson::Value& window_boundaries,
-      const Experiment& raw_experiment);
-  void AddVersion(std::string& raw_version);
-  void AddServiceKind(cb::BackendKind& service_kind);
-  void AddEndpoint(std::string& endpoint);
-  void ClearDocument();
-
-  rapidjson::Document document_{};
-
-#ifndef DOCTEST_CONFIG_DISABLE
-  friend NaggyMockProfileDataExporter;
-#endif
-};
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/rand_ctx_id_tracker.h b/src/c++/perf_analyzer/rand_ctx_id_tracker.h
deleted file mode 100644
index e850909a1..000000000
--- a/src/c++/perf_analyzer/rand_ctx_id_tracker.h
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <random>
-
-#include "ictx_id_tracker.h"
-
-namespace triton { namespace perfanalyzer {
-
-// Context ID tracker that is always available and returns random Context IDs
-//
-class RandCtxIdTracker : public ICtxIdTracker {
- public:
-  RandCtxIdTracker() = default;
-
-  void Reset(size_t count) override
-  {
-    distribution_ = std::uniform_int_distribution<uint64_t>(0, count - 1);
-  }
-
-  void Restore(size_t id) override{};
-
-  size_t Get() override { return distribution_(rng_generator_); };
-
-  bool IsAvailable() override { return true; };
-
- private:
-  std::uniform_int_distribution<uint64_t> distribution_;
-  std::default_random_engine rng_generator_{};
-
-  size_t max = 0;
-};
-
-}};  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/rate_schedule.h b/src/c++/perf_analyzer/rate_schedule.h
deleted file mode 100644
index d45ecd31b..000000000
--- a/src/c++/perf_analyzer/rate_schedule.h
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#pragma once
-
-#include <chrono>
-#include <memory>
-#include <vector>
-
-namespace triton { namespace perfanalyzer {
-
-using NanoIntervals = std::vector<std::chrono::nanoseconds>;
-
-/// Defines a schedule, where the consumer should
-/// loop through the provided intervals, and then every time it loops back to
-/// the start add an additional amount equal to the duration
-///
-struct RateSchedule {
-  NanoIntervals intervals;
-  std::chrono::nanoseconds duration;
-
-  /// Returns the next timestamp in the schedule
-  ///
-  std::chrono::nanoseconds Next()
-  {
-    auto next = intervals[index_] + duration * rounds_;
-
-    index_++;
-    if (index_ >= intervals.size()) {
-      rounds_++;
-      index_ = 0;
-    }
-    return next;
-  }
-
- private:
-  size_t rounds_ = 0;
-  size_t index_ = 0;
-};
-
-using RateSchedulePtr_t = std::shared_ptr<RateSchedule>;
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/report_writer.cc b/src/c++/perf_analyzer/report_writer.cc
deleted file mode 100644
index 3d9cac6a2..000000000
--- a/src/c++/perf_analyzer/report_writer.cc
+++ /dev/null
@@ -1,391 +0,0 @@
-// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "report_writer.h"
-
-#include <algorithm>
-#include <fstream>
-
-#include "constants.h"
-#include "perf_analyzer_exception.h"
-
-namespace triton { namespace perfanalyzer {
-
-
-cb::Error
-ReportWriter::Create(
-    const std::string& filename, const bool target_concurrency,
-    const std::vector<pa::PerfStatus>& summary, const bool verbose_csv,
-    const bool include_server_stats, const int32_t percentile,
-    const std::shared_ptr<ModelParser>& parser,
-    std::unique_ptr<ReportWriter>* writer, const bool should_output_metrics)
-{
-  std::unique_ptr<ReportWriter> local_writer(new ReportWriter(
-      filename, target_concurrency, summary, verbose_csv, include_server_stats,
-      percentile, parser, should_output_metrics));
-
-  *writer = std::move(local_writer);
-
-  return cb::Error::Success;
-}
-
-ReportWriter::ReportWriter(
-    const std::string& filename, const bool target_concurrency,
-    const std::vector<pa::PerfStatus>& summary, const bool verbose_csv,
-    const bool include_server_stats, const int32_t percentile,
-    const std::shared_ptr<ModelParser>& parser,
-    const bool should_output_metrics)
-    : filename_(filename), target_concurrency_(target_concurrency),
-      summary_(summary), verbose_csv_(verbose_csv),
-      include_server_stats_(include_server_stats), percentile_(percentile),
-      parser_(parser), should_output_metrics_(should_output_metrics)
-{
-}
-
-
-void
-ReportWriter::GenerateReport()
-{
-  if (!filename_.empty()) {
-    std::ofstream ofs(filename_, std::ofstream::out);
-    if (target_concurrency_) {
-      ofs << "Concurrency,";
-    } else {
-      ofs << "Request Rate,";
-    }
-    ofs << "Inferences/Second,";
-    if (parser_->IsDecoupled()) {
-      ofs << "Response Throughput,";
-    }
-    ofs << "Client Send,";
-    if (include_server_stats_) {
-      ofs << "Network+Server Send/Recv,Server Queue,"
-          << "Server Compute Input,Server Compute Infer,"
-          << "Server Compute Output,";
-      // Only include cache hit if enabled, keep out for backwards
-      // compatibility if disabled
-      if (parser_->ResponseCacheEnabled()) {
-        ofs << "Server Cache Hit,";
-        ofs << "Server Cache Miss,";
-      }
-    }
-    ofs << "Client Recv";
-    for (const auto& percentile :
-         summary_[0].client_stats.percentile_latency_ns) {
-      ofs << ",p" << percentile.first << " latency";
-    }
-    if (verbose_csv_) {
-      if (percentile_ == -1) {
-        ofs << ",Avg latency";
-      }
-      ofs << ",request/response";
-      ofs << ",response wait";
-      if (should_output_metrics_) {
-        ofs << ",Avg GPU Utilization";
-        ofs << ",Avg GPU Power Usage";
-        ofs << ",Max GPU Memory Usage";
-        ofs << ",Total GPU Memory";
-      }
-    }
-    ofs << std::endl;
-
-    // Sort summary results in order of increasing infer/sec.
-    std::sort(
-        summary_.begin(), summary_.end(),
-        [](const pa::PerfStatus& a, const pa::PerfStatus& b) -> bool {
-          return a.client_stats.infer_per_sec < b.client_stats.infer_per_sec;
-        });
-
-    for (pa::PerfStatus& status : summary_) {
-      if (target_concurrency_) {
-        ofs << status.concurrency << ",";
-      } else {
-        ofs << status.request_rate << ",";
-      }
-
-      ofs << status.client_stats.infer_per_sec << ",";
-      if (parser_->IsDecoupled()) {
-        ofs << status.client_stats.responses_per_sec << ",";
-      }
-      ofs << (status.client_stats.avg_send_time_ns / 1000) << ",";
-      if (include_server_stats_) {
-        uint64_t avg_queue_ns = status.server_stats.queue_count > 0
-                                    ? (status.server_stats.queue_time_ns /
-                                       status.server_stats.queue_count)
-                                    : 0;
-        uint64_t avg_compute_input_ns =
-            status.server_stats.compute_input_count > 0
-                ? (status.server_stats.compute_input_time_ns /
-                   status.server_stats.compute_input_count)
-                : 0;
-        uint64_t avg_compute_infer_ns =
-            status.server_stats.compute_infer_count > 0
-                ? (status.server_stats.compute_infer_time_ns /
-                   status.server_stats.compute_infer_count)
-                : 0;
-        uint64_t avg_compute_output_ns =
-            status.server_stats.compute_output_count > 0
-                ? (status.server_stats.compute_output_time_ns /
-                   status.server_stats.compute_output_count)
-                : 0;
-        uint64_t compute_time_ns = status.server_stats.compute_input_time_ns +
-                                   status.server_stats.compute_infer_time_ns +
-                                   status.server_stats.compute_output_time_ns;
-        if (status.server_stats.compute_input_count !=
-                status.server_stats.compute_infer_count ||
-            status.server_stats.compute_infer_count !=
-                status.server_stats.compute_output_count) {
-          throw std::runtime_error(
-              "Server side statistics compute counts must be the same.");
-        }
-        uint64_t compute_cnt = status.server_stats.compute_input_count;
-        uint64_t avg_compute_ns =
-            compute_cnt > 0 ? compute_time_ns / compute_cnt : 0;
-        uint64_t avg_cache_hit_ns =
-            status.server_stats.cache_hit_count > 0
-                ? (status.server_stats.cache_hit_time_ns /
-                   status.server_stats.cache_hit_count)
-                : 0;
-        uint64_t avg_cache_miss_ns =
-            status.server_stats.cache_miss_count > 0
-                ? (status.server_stats.cache_miss_time_ns /
-                   status.server_stats.cache_miss_count)
-                : 0;
-
-        uint64_t avg_client_wait_ns = status.client_stats.avg_latency_ns -
-                                      status.client_stats.avg_send_time_ns -
-                                      status.client_stats.avg_receive_time_ns;
-        // Network misc is calculated by subtracting data from different
-        // measurements (server v.s. client), so the result needs to be capped
-        // at 0
-        uint64_t avg_accounted_time = avg_queue_ns + avg_compute_ns +
-                                      avg_cache_hit_ns + avg_cache_miss_ns;
-        uint64_t avg_network_misc_ns =
-            avg_client_wait_ns > avg_accounted_time
-                ? (avg_client_wait_ns - avg_accounted_time)
-                : 0;
-
-        if (avg_network_misc_ns == 0) {
-          std::cerr << "Server average accounted time was larger than client "
-                       "average wait time due to small sample size. Increase "
-                       "the measurement interval with `--measurement-interval`."
-                    << std::endl;
-        }
-
-        ofs << (avg_network_misc_ns / 1000) << "," << (avg_queue_ns / 1000)
-            << "," << (avg_compute_input_ns / 1000) << ","
-            << (avg_compute_infer_ns / 1000) << ","
-            << (avg_compute_output_ns / 1000) << ",";
-
-        if (parser_->ResponseCacheEnabled()) {
-          ofs << (avg_cache_hit_ns / 1000) << ",";
-          ofs << (avg_cache_miss_ns / 1000) << ",";
-        }
-      }
-      ofs << (status.client_stats.avg_receive_time_ns / 1000);
-      for (const auto& percentile : status.client_stats.percentile_latency_ns) {
-        ofs << "," << (percentile.second / 1000);
-      }
-      if (verbose_csv_) {
-        const uint64_t avg_latency_us =
-            status.client_stats.avg_latency_ns / 1000;
-        const uint64_t avg_send_time_us =
-            status.client_stats.avg_send_time_ns / 1000;
-        const uint64_t avg_receive_time_us =
-            status.client_stats.avg_receive_time_ns / 1000;
-        const uint64_t avg_request_time_us =
-            status.client_stats.avg_request_time_ns / 1000;
-        const uint64_t avg_response_wait_time_us =
-            avg_request_time_us - avg_send_time_us - avg_receive_time_us;
-        if (percentile_ == -1) {
-          ofs << "," << avg_latency_us;
-        }
-        ofs << "," << std::to_string(avg_send_time_us + avg_receive_time_us);
-        ofs << "," << std::to_string(avg_response_wait_time_us);
-        if (should_output_metrics_) {
-          if (status.metrics.size() == 1) {
-            WriteGpuMetrics(ofs, status.metrics[0]);
-          } else {
-            throw PerfAnalyzerException(
-                "There should only be one entry in the metrics vector.",
-                GENERIC_ERROR);
-          }
-        }
-      }
-      ofs << std::endl;
-    }
-    ofs.close();
-
-    if (include_server_stats_) {
-      // Record composing model stat in a separate file.
-      if (!summary_.front().server_stats.composing_models_stat.empty()) {
-        // For each of the composing model, generate CSV file in the same
-        // format as the one for ensemble.
-        for (const auto& model_identifier :
-             summary_[0].server_stats.composing_models_stat) {
-          const auto& name = model_identifier.first.first;
-          const auto& version = model_identifier.first.second;
-          const auto name_ver = name + "_v" + version;
-
-          std::ofstream ofs(name_ver + "." + filename_, std::ofstream::out);
-          if (target_concurrency_) {
-            ofs << "Concurrency,";
-          } else {
-            ofs << "Request Rate,";
-          }
-          ofs << "Inferences/Second,Client Send,"
-              << "Network+Server Send/Recv,Server Queue,"
-              << "Server Compute Input,Server Compute Infer,"
-              << "Server Compute Output,";
-
-          // Only include cache hit if enabled, keep out for backwards
-          // compatibility if disabled
-          if (parser_->ResponseCacheEnabled()) {
-            ofs << "Server Cache Hit,";
-            ofs << "Server Cache Miss,";
-          }
-          ofs << "Client Recv" << std::endl;
-
-          for (pa::PerfStatus& status : summary_) {
-            auto it = status.server_stats.composing_models_stat.find(
-                model_identifier.first);
-            const auto& stats = it->second;
-            uint64_t avg_queue_ns =
-                stats.queue_count > 0 ? stats.queue_time_ns / stats.queue_count
-                                      : 0;
-            uint64_t avg_compute_input_ns =
-                stats.compute_input_count > 0
-                    ? stats.compute_input_time_ns / stats.compute_input_count
-                    : 0;
-            uint64_t avg_compute_infer_ns =
-                stats.compute_infer_count > 0
-                    ? stats.compute_infer_time_ns / stats.compute_infer_count
-                    : 0;
-            uint64_t avg_compute_output_ns =
-                stats.compute_output_count > 0
-                    ? stats.compute_output_time_ns / stats.compute_output_count
-                    : 0;
-            uint64_t compute_time_ns = stats.compute_input_time_ns +
-                                       stats.compute_infer_time_ns +
-                                       stats.compute_output_time_ns;
-            if (stats.compute_input_count != stats.compute_infer_count ||
-                stats.compute_infer_count != stats.compute_output_count) {
-              throw std::runtime_error(
-                  "Server side statistics compute counts must be the same.");
-            }
-            uint64_t compute_cnt = stats.compute_input_count;
-            uint64_t avg_compute_ns =
-                compute_cnt > 0 ? compute_time_ns / compute_cnt : 0;
-            uint64_t avg_cache_hit_ns =
-                stats.cache_hit_count > 0
-                    ? stats.cache_hit_time_ns / stats.cache_hit_count
-                    : 0;
-            uint64_t avg_cache_miss_ns =
-                stats.cache_miss_count > 0
-                    ? stats.cache_miss_time_ns / stats.cache_miss_count
-                    : 0;
-
-            uint64_t avg_overhead_ns =
-                stats.success_count > 0
-                    ? stats.cumm_time_ns / stats.success_count
-                    : 0;
-            const uint64_t avg_accounted_time = avg_queue_ns + avg_compute_ns +
-                                                avg_cache_hit_ns +
-                                                avg_cache_miss_ns;
-            avg_overhead_ns = (avg_overhead_ns > avg_accounted_time)
-                                  ? (avg_overhead_ns - avg_accounted_time)
-                                  : 0;
-
-            if (avg_overhead_ns == 0) {
-              std::cerr
-                  << "Server average accounted time was larger than client "
-                     "average wait time due to small sample size. Increase "
-                     "the measurement interval with `--measurement-interval`."
-                  << std::endl;
-            }
-
-            // infer / sec of the composing model is calculated using the
-            // request count ratio between the composing model and the
-            // ensemble
-            double infer_ratio = status.server_stats.success_count > 0
-                                     ? (1.0 * stats.success_count /
-                                        status.server_stats.success_count)
-                                     : 0.0;
-            double infer_per_sec =
-                infer_ratio * status.client_stats.infer_per_sec;
-            if (target_concurrency_) {
-              ofs << status.concurrency << ",";
-            } else {
-              ofs << status.request_rate << ",";
-            }
-            ofs << infer_per_sec << ",0," << (avg_overhead_ns / 1000) << ","
-                << (avg_queue_ns / 1000) << "," << (avg_compute_input_ns / 1000)
-                << "," << (avg_compute_infer_ns / 1000) << ","
-                << (avg_compute_output_ns / 1000) << ",";
-
-            // Only include cache hit if enabled, keep out for backwards
-            // compatibility if disabled
-            if (parser_->ResponseCacheEnabled()) {
-              ofs << (avg_cache_hit_ns / 1000) << ",";
-              ofs << (avg_cache_miss_ns / 1000) << ",";
-            }
-            // Client recv
-            ofs << "0" << std::endl;
-          }
-        }
-        ofs.close();
-      }
-    }
-  }
-}
-
-void
-ReportWriter::WriteGpuMetrics(std::ostream& ofs, const Metrics& metric)
-{
-  auto& gpu_util_map = metric.gpu_utilization_per_gpu;
-  auto& gpu_power_usage_map = metric.gpu_power_usage_per_gpu;
-  auto& gpu_mem_usage_map = metric.gpu_memory_used_bytes_per_gpu;
-  auto& gpu_total_mem_map = metric.gpu_memory_total_bytes_per_gpu;
-  // Currently assume GPU metrics will be appended to existing line
-  ofs << ",";
-  for (auto& entry : gpu_util_map) {
-    ofs << entry.first << ":" << entry.second << ";";
-  }
-  ofs << ",";
-  for (auto& entry : gpu_power_usage_map) {
-    ofs << entry.first << ":" << entry.second << ";";
-  }
-  ofs << ",";
-  for (auto& entry : gpu_mem_usage_map) {
-    ofs << entry.first << ":" << entry.second << ";";
-  }
-  ofs << ",";
-  for (auto& entry : gpu_total_mem_map) {
-    ofs << entry.first << ":" << entry.second << ";";
-  }
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/report_writer.h b/src/c++/perf_analyzer/report_writer.h
deleted file mode 100644
index eeb09c9a4..000000000
--- a/src/c++/perf_analyzer/report_writer.h
+++ /dev/null
@@ -1,108 +0,0 @@
-// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <ostream>
-
-#include "client_backend/client_backend.h"
-#include "inference_profiler.h"
-#include "metrics.h"
-#include "model_parser.h"
-#include "perf_utils.h"
-
-namespace triton { namespace perfanalyzer {
-
-#ifndef DOCTEST_CONFIG_DISABLE
-class TestReportWriter;
-#endif
-
-//==============================================================================
-/// ReportWriter is a helper class to generate csv files from the profiled data.
-///
-class ReportWriter {
- public:
-  ~ReportWriter() = default;
-
-  /// Create a ReportWriter that is responsible for generating csv output files.
-  /// \param filename Name of csv file.
-  /// \param target_concurrency Is there a concurrency range or request rate
-  /// range?
-  /// \param summary Returns the trace of the measurement along the
-  /// search path.
-  /// \param verbose_csv Print extra information for Model Analyzer
-  /// \param include_server_stats Are server stats included in output
-  /// \param percentile The percentile in terms of latency to be reported.
-  /// if it is a valid percentile value, the percentile latency will reported
-  /// and used as stable criteria instead of average latency. If it is -1,
-  /// average latency will be reported and used as stable criteria.
-  /// \param parser The ModelParse object which holds all the details about the
-  /// model.
-  /// \param writer Returns a new ReportWriter object.
-  /// \param should_output_metrics Whether server-side inference server metrics
-  /// should be output.
-  /// \return cb::Error object indicating success or failure.
-  static cb::Error Create(
-      const std::string& filename, const bool target_concurrency,
-      const std::vector<pa::PerfStatus>& summary, const bool verbose_csv,
-      const bool include_server_stats, const int32_t percentile,
-      const std::shared_ptr<ModelParser>& parser,
-      std::unique_ptr<ReportWriter>* writer, const bool should_output_metrics);
-
-  void GenerateReport();
-
-  /// Output gpu metrics to a stream
-  /// \param ofs A stream to output the csv data
-  /// \param metric The metric container for a particular concurrency or request
-  /// rate
-  void WriteGpuMetrics(std::ostream& ofs, const Metrics& metric);
-
- private:
-  ReportWriter(
-      const std::string& filename, const bool target_concurrency,
-      const std::vector<pa::PerfStatus>& summary, const bool verbose_csv,
-      const bool include_server_stats, const int32_t percentile,
-      const std::shared_ptr<ModelParser>& parser,
-      const bool should_output_metrics);
-
-
-  const std::string& filename_{""};
-  const bool target_concurrency_{true};
-  const bool include_server_stats_{true};
-  const bool verbose_csv_{true};
-  const int32_t percentile_{90};
-  std::vector<pa::PerfStatus> summary_{};
-  const std::shared_ptr<ModelParser>& parser_{nullptr};
-  const bool should_output_metrics_{false};
-
-#ifndef DOCTEST_CONFIG_DISABLE
-  friend TestReportWriter;
-
- public:
-  ReportWriter() = default;
-#endif
-};
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/request_rate_manager.cc b/src/c++/perf_analyzer/request_rate_manager.cc
deleted file mode 100644
index be12282ab..000000000
--- a/src/c++/perf_analyzer/request_rate_manager.cc
+++ /dev/null
@@ -1,305 +0,0 @@
-// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "request_rate_manager.h"
-
-namespace triton { namespace perfanalyzer {
-
-RequestRateManager::~RequestRateManager()
-{
-  // The destruction of derived class should wait for all the request generator
-  // threads to finish
-  StopWorkerThreads();
-}
-
-cb::Error
-RequestRateManager::Create(
-    const bool async, const bool streaming,
-    const uint64_t measurement_window_ms, const size_t max_trials,
-    Distribution request_distribution, const int32_t batch_size,
-    const size_t max_threads, const uint32_t num_of_sequences,
-    const SharedMemoryType shared_memory_type, const size_t output_shm_size,
-    const bool serial_sequences, const std::shared_ptr<ModelParser>& parser,
-    const std::shared_ptr<cb::ClientBackendFactory>& factory,
-    std::unique_ptr<LoadManager>* manager,
-    const std::unordered_map<std::string, cb::RequestParameter>&
-        request_parameters)
-{
-  std::unique_ptr<RequestRateManager> local_manager(new RequestRateManager(
-      async, streaming, request_distribution, batch_size, measurement_window_ms,
-      max_trials, max_threads, num_of_sequences, shared_memory_type,
-      output_shm_size, serial_sequences, parser, factory, request_parameters));
-
-  *manager = std::move(local_manager);
-
-  return cb::Error::Success;
-}
-
-RequestRateManager::RequestRateManager(
-    const bool async, const bool streaming, Distribution request_distribution,
-    int32_t batch_size, const uint64_t measurement_window_ms,
-    const size_t max_trials, const size_t max_threads,
-    const uint32_t num_of_sequences, const SharedMemoryType shared_memory_type,
-    const size_t output_shm_size, const bool serial_sequences,
-    const std::shared_ptr<ModelParser>& parser,
-    const std::shared_ptr<cb::ClientBackendFactory>& factory,
-    const std::unordered_map<std::string, cb::RequestParameter>&
-        request_parameters)
-    : LoadManager(
-          async, streaming, batch_size, max_threads, shared_memory_type,
-          output_shm_size, parser, factory, request_parameters),
-      request_distribution_(request_distribution), execute_(false),
-      num_of_sequences_(num_of_sequences), serial_sequences_(serial_sequences)
-{
-  gen_duration_.reset(new std::chrono::nanoseconds(
-      max_trials * measurement_window_ms * NANOS_PER_MILLIS));
-
-  threads_config_.reserve(max_threads);
-}
-
-void
-RequestRateManager::InitManagerFinalize()
-{
-  if (on_sequence_model_) {
-    sequence_manager_->InitSequenceStatuses(num_of_sequences_);
-  }
-}
-
-cb::Error
-RequestRateManager::ChangeRequestRate(
-    const double request_rate, const size_t request_count)
-{
-  PauseWorkers();
-  ConfigureThreads(request_count);
-  // Can safely update the schedule
-  GenerateSchedule(request_rate);
-  ResumeWorkers();
-
-  return cb::Error::Success;
-}
-
-void
-RequestRateManager::GenerateSchedule(const double request_rate)
-{
-  std::chrono::nanoseconds max_duration;
-  std::function<std::chrono::nanoseconds(std::mt19937&)> distribution;
-
-  if (request_distribution_ == Distribution::POISSON) {
-    distribution = ScheduleDistribution<Distribution::POISSON>(request_rate);
-    // Poisson distribution needs to generate a schedule for the maximum
-    // possible duration to make sure that it is as random and as close to the
-    // desired rate as possible
-    max_duration = *gen_duration_;
-  } else if (request_distribution_ == Distribution::CONSTANT) {
-    distribution = ScheduleDistribution<Distribution::CONSTANT>(request_rate);
-    // Constant distribution only needs one entry per worker -- that one value
-    // can be repeated over and over to emulate a full schedule of any length
-    max_duration = std::chrono::nanoseconds(1);
-  } else {
-    return;
-  }
-
-  auto worker_schedules = CreateWorkerSchedules(max_duration, distribution);
-  GiveSchedulesToWorkers(worker_schedules);
-}
-
-std::vector<RateSchedulePtr_t>
-RequestRateManager::CreateWorkerSchedules(
-    std::chrono::nanoseconds max_duration,
-    std::function<std::chrono::nanoseconds(std::mt19937&)> distribution)
-{
-  std::mt19937 schedule_rng;
-
-  std::vector<RateSchedulePtr_t> worker_schedules =
-      CreateEmptyWorkerSchedules();
-  std::vector<size_t> thread_ids{CalculateThreadIds()};
-
-  std::chrono::nanoseconds next_timestamp(0);
-  size_t thread_id_index = 0;
-  size_t worker_index = 0;
-
-
-  // Generate schedule until we hit max_duration, but also make sure that all
-  // worker schedules follow the thread id distribution
-  //
-  while (next_timestamp < max_duration ||
-         thread_id_index % thread_ids.size() != 0) {
-    next_timestamp = next_timestamp + distribution(schedule_rng);
-    worker_index = thread_ids[thread_id_index];
-    thread_id_index = ++thread_id_index % thread_ids.size();
-    worker_schedules[worker_index]->intervals.emplace_back(next_timestamp);
-  }
-
-  SetScheduleDurations(worker_schedules);
-
-  return worker_schedules;
-}
-
-std::vector<RateSchedulePtr_t>
-RequestRateManager::CreateEmptyWorkerSchedules()
-{
-  std::vector<RateSchedulePtr_t> worker_schedules;
-  for (size_t i = 0; i < workers_.size(); i++) {
-    worker_schedules.push_back(std::make_shared<RateSchedule>());
-  }
-  return worker_schedules;
-}
-
-std::vector<size_t>
-RequestRateManager::CalculateThreadIds()
-{
-  std::vector<size_t> thread_ids{};
-  // Determine number of ids to loop over for time stamps
-  size_t num_ids = 0;
-  if (on_sequence_model_) {
-    num_ids = num_of_sequences_;
-  } else {
-    num_ids = max_threads_;
-  }
-
-  for (size_t i = 0; i < num_ids; i++) {
-    size_t t = i % DetermineNumThreads();
-    thread_ids.push_back(t);
-  }
-  return thread_ids;
-}
-
-void
-RequestRateManager::SetScheduleDurations(
-    std::vector<RateSchedulePtr_t>& schedules)
-{
-  RateSchedulePtr_t last_schedule = schedules.back();
-
-  std::chrono::nanoseconds duration = last_schedule->intervals.back();
-
-  for (auto schedule : schedules) {
-    duration = std::max(schedule->intervals.back(), duration);
-  }
-
-  for (auto schedule : schedules) {
-    schedule->duration = duration;
-  }
-}
-
-
-void
-RequestRateManager::GiveSchedulesToWorkers(
-    const std::vector<RateSchedulePtr_t>& worker_schedules)
-{
-  for (size_t i = 0; i < workers_.size(); i++) {
-    auto w = std::dynamic_pointer_cast<IScheduler>(workers_[i]);
-    w->SetSchedule(worker_schedules[i]);
-  }
-}
-
-void
-RequestRateManager::PauseWorkers()
-{
-  // Pause all the threads
-  execute_ = false;
-
-  // Wait to see all threads are paused.
-  for (auto& thread_config : threads_config_) {
-    while (!thread_config->is_paused_) {
-      std::this_thread::sleep_for(std::chrono::milliseconds(10));
-    }
-  }
-}
-
-void
-RequestRateManager::ConfigureThreads(const size_t request_count)
-{
-  if (threads_.empty()) {
-    size_t num_of_threads = DetermineNumThreads();
-    while (workers_.size() < num_of_threads) {
-      // Launch new thread for inferencing
-      threads_stat_.emplace_back(new ThreadStat());
-      threads_config_.emplace_back(new ThreadConfig(workers_.size()));
-
-      workers_.push_back(
-          MakeWorker(threads_stat_.back(), threads_config_.back()));
-    }
-    // Compute the number of sequences for each thread (take floor)
-    // and spread the remaining value
-    size_t avg_num_seqs = num_of_sequences_ / workers_.size();
-    size_t num_seqs_add_one = num_of_sequences_ % workers_.size();
-    size_t seq_offset = 0;
-
-    size_t avg_req_count = request_count / workers_.size();
-    size_t req_count_add_one = request_count % workers_.size();
-
-
-    for (size_t i = 0; i < workers_.size(); i++) {
-      size_t num_of_seq = avg_num_seqs + (i < num_seqs_add_one ? 1 : 0);
-      threads_config_[i]->num_sequences_ = num_of_seq;
-      threads_config_[i]->seq_stat_index_offset_ = seq_offset;
-      seq_offset += num_of_seq;
-
-      size_t thread_num_reqs = avg_req_count + (i < req_count_add_one ? 1 : 0);
-      threads_config_[i]->num_requests_ = thread_num_reqs;
-
-      threads_.emplace_back(&IWorker::Infer, workers_[i]);
-    }
-  }
-}
-
-void
-RequestRateManager::ResumeWorkers()
-{
-  // Update the start_time_ to point to current time
-  start_time_ = std::chrono::steady_clock::now();
-
-  // Wake up all the threads to begin execution
-  execute_ = true;
-  wake_signal_.notify_all();
-}
-
-std::shared_ptr<IWorker>
-RequestRateManager::MakeWorker(
-    std::shared_ptr<ThreadStat> thread_stat,
-    std::shared_ptr<ThreadConfig> thread_config)
-{
-  size_t id = workers_.size();
-  size_t num_of_threads = DetermineNumThreads();
-  return std::make_shared<RequestRateWorker>(
-      id, thread_stat, thread_config, parser_, data_loader_, factory_,
-      on_sequence_model_, async_, num_of_threads, using_json_data_, streaming_,
-      batch_size_, wake_signal_, wake_mutex_, execute_, start_time_,
-      serial_sequences_, infer_data_manager_, sequence_manager_);
-}
-
-size_t
-RequestRateManager::DetermineNumThreads()
-{
-  size_t num_of_threads = max_threads_;
-  if (on_sequence_model_) {
-    num_of_threads = std::min(max_threads_, num_of_sequences_);
-  }
-  return num_of_threads;
-}
-
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/request_rate_manager.h b/src/c++/perf_analyzer/request_rate_manager.h
deleted file mode 100644
index 8c9131bb4..000000000
--- a/src/c++/perf_analyzer/request_rate_manager.h
+++ /dev/null
@@ -1,172 +0,0 @@
-// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <condition_variable>
-
-#include "load_manager.h"
-#include "request_rate_worker.h"
-
-namespace triton { namespace perfanalyzer {
-
-#ifndef DOCTEST_CONFIG_DISABLE
-class TestRequestRateManager;
-#endif
-
-//==============================================================================
-/// RequestRateManager is a helper class to send inference requests to
-/// inference server in accordance with a Poisson distribution. This
-/// distribution models the real-world traffic patterns.
-///
-/// An instance of this load manager will be created at the beginning of the
-/// perf analyzer and it will be used to simulate load with different target
-/// requests per second values and to collect per-request statistic.
-///
-/// Detail:
-/// Request Rate Manager will try to follow a pre-computed schedule while
-/// issuing requests to the server and maintain a constant request rate. The
-/// manager will spawn max_threads many worker thread to meet the timeline
-/// imposed by the schedule. The worker threads will record the start time and
-/// end time of each request into a shared vector which will be used to report
-/// the observed latencies in serving requests. Additionally, they will report a
-/// vector of the number of requests missed their schedule.
-///
-class RequestRateManager : public LoadManager {
- public:
-  ~RequestRateManager();
-
-  /// Create an object of realistic load manager that is responsible to maintain
-  /// specified load on inference server.
-  /// \param async Whether to use asynchronous or synchronous API for infer
-  /// request.
-  /// \param streaming Whether to use gRPC streaming API for infer request
-  /// \param measurement_window_ms The time window for measurements.
-  /// \param max_trials The maximum number of windows that will be measured
-  /// \param request_distribution The kind of distribution to use for drawing
-  /// out intervals between successive requests.
-  /// \param batch_size The batch size used for each request.
-  /// \param max_threads The maximum number of working threads to be spawned.
-  /// \param num_of_sequences The number of concurrent sequences that must be
-  /// maintained on the server.
-  /// \param string_length The length of the string to create for input.
-  /// \param string_data The data to use for generating string input.
-  /// \param zero_input Whether to fill the input tensors with zero.
-  /// \param user_data The vector containing path/paths to user-provided data
-  /// that can be a directory or path to a json data file.
-  /// \param shared_memory_type The type of shared memory to use for inputs.
-  /// \param output_shm_size The size of the shared memory to allocate for the
-  /// output.
-  /// \param serial_sequences Enable serial sequence mode.
-  /// \param parser The ModelParser object to get the model details.
-  /// \param factory The ClientBackendFactory object used to create
-  /// client to the server.
-  /// \param manager Returns a new ConcurrencyManager object.
-  /// \param request_parameters Custom request parameters to send to the server
-  /// \return cb::Error object indicating success or failure.
-  static cb::Error Create(
-      const bool async, const bool streaming,
-      const uint64_t measurement_window_ms, const size_t max_trials,
-      Distribution request_distribution, const int32_t batch_size,
-      const size_t max_threads, const uint32_t num_of_sequences,
-      const SharedMemoryType shared_memory_type, const size_t output_shm_size,
-      const bool serial_sequences, const std::shared_ptr<ModelParser>& parser,
-      const std::shared_ptr<cb::ClientBackendFactory>& factory,
-      std::unique_ptr<LoadManager>* manager,
-      const std::unordered_map<std::string, cb::RequestParameter>&
-          request_parameters);
-
-  /// Adjusts the rate of issuing requests to be the same as 'request_rate'
-  /// \param target_request_rate The rate at which requests must be issued to
-  /// the server.
-  /// \param request_count The number of requests to generate when profiling. If
-  /// 0, then there is no limit, and it will generate until told to stop.
-  /// \return cb::Error object indicating success or failure.
-  cb::Error ChangeRequestRate(
-      const double target_request_rate, const size_t request_count = 0);
-
- protected:
-  RequestRateManager(
-      const bool async, const bool streaming, Distribution request_distribution,
-      const int32_t batch_size, const uint64_t measurement_window_ms,
-      const size_t max_trials, const size_t max_threads,
-      const uint32_t num_of_sequences,
-      const SharedMemoryType shared_memory_type, const size_t output_shm_size,
-      const bool serial_sequences, const std::shared_ptr<ModelParser>& parser,
-      const std::shared_ptr<cb::ClientBackendFactory>& factory,
-      const std::unordered_map<std::string, cb::RequestParameter>&
-          request_parameters);
-
-  void InitManagerFinalize() override;
-
-  /// Generates and update the request schedule as per the given request rate.
-  /// \param request_rate The request rate to use for new schedule.
-  void GenerateSchedule(const double request_rate);
-
-  std::vector<RateSchedulePtr_t> CreateWorkerSchedules(
-      std::chrono::nanoseconds duration,
-      std::function<std::chrono::nanoseconds(std::mt19937&)> distribution);
-
-  std::vector<RateSchedulePtr_t> CreateEmptyWorkerSchedules();
-
-  std::vector<size_t> CalculateThreadIds();
-
-  void SetScheduleDurations(std::vector<RateSchedulePtr_t>& schedules);
-
-  void GiveSchedulesToWorkers(
-      const std::vector<RateSchedulePtr_t>& worker_schedules);
-
-  // Pauses the worker threads
-  void PauseWorkers();
-
-  void ConfigureThreads(const size_t request_count = 0);
-
-  // Resets the counters and resumes the worker threads
-  void ResumeWorkers();
-
-  // Makes a new worker
-  virtual std::shared_ptr<IWorker> MakeWorker(
-      std::shared_ptr<ThreadStat>, std::shared_ptr<ThreadConfig>);
-
-  size_t DetermineNumThreads();
-
-  std::vector<std::shared_ptr<ThreadConfig>> threads_config_;
-
-  std::shared_ptr<std::chrono::nanoseconds> gen_duration_;
-  Distribution request_distribution_;
-  std::chrono::steady_clock::time_point start_time_;
-  bool execute_;
-  const size_t num_of_sequences_{0};
-  const bool serial_sequences_{false};
-
-#ifndef DOCTEST_CONFIG_DISABLE
-  friend TestRequestRateManager;
-
- public:
-  RequestRateManager() = default;
-#endif
-};
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/request_rate_worker.cc b/src/c++/perf_analyzer/request_rate_worker.cc
deleted file mode 100644
index 48ccb361b..000000000
--- a/src/c++/perf_analyzer/request_rate_worker.cc
+++ /dev/null
@@ -1,168 +0,0 @@
-// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "request_rate_worker.h"
-
-#include <algorithm>
-#include <thread>
-
-#include "client_backend/client_backend.h"
-#include "data_loader.h"
-#include "perf_utils.h"
-
-namespace triton { namespace perfanalyzer {
-
-void
-RequestRateWorker::Infer()
-{
-  CreateCtxIdTracker();
-  CreateContexts();
-
-  // run inferencing until receiving exit signal to maintain server load.
-  do {
-    HandleExecuteOff();
-
-    bool is_delayed = SleepIfNecessary();
-    uint32_t ctx_id = GetCtxId();
-    SendInferRequest(ctx_id, is_delayed);
-    RestoreFreeCtxId(ctx_id);
-
-    if (HandleExitConditions()) {
-      return;
-    }
-
-  } while (true);
-}
-
-void
-RequestRateWorker::CreateCtxIdTracker()
-{
-  bool is_concurrency = false;
-
-  ctx_id_tracker_ = CtxIdTrackerFactory::CreateTracker(
-      is_concurrency, on_sequence_model_, serial_sequences_);
-}
-
-void
-RequestRateWorker::CreateContexts()
-{
-  size_t active_ctx_cnt =
-      on_sequence_model_ ? thread_config_->num_sequences_ : 1;
-  while (ctxs_.size() < active_ctx_cnt) {
-    CreateContext();
-  }
-
-  ResetFreeCtxIds();
-}
-
-void
-RequestRateWorker::ResetFreeCtxIds()
-{
-  std::lock_guard<std::mutex> lock(cb_mtx_);
-  ctx_id_tracker_->Reset(ctxs_.size());
-}
-
-void
-RequestRateWorker::SetSchedule(RateSchedulePtr_t schedule)
-{
-  schedule_ = schedule;
-}
-
-std::chrono::nanoseconds
-RequestRateWorker::GetNextTimestamp()
-{
-  return schedule_->Next();
-}
-
-
-uint32_t
-RequestRateWorker::GetSeqStatIndex(uint32_t ctx_id)
-{
-  return (thread_config_->seq_stat_index_offset_ + ctx_id);
-}
-
-void
-RequestRateWorker::HandleExecuteOff()
-{
-  // Should wait till main thread signals execution start
-  if (!execute_) {
-    CompleteOngoingSequences();
-    WaitForOngoingRequests();
-
-    // Reset Ctx IDs because CompleteOngoingSequences()
-    // has destructive side affects
-    ResetFreeCtxIds();
-
-    // Wait if no request should be sent and it is not exiting
-    thread_config_->is_paused_ = true;
-    std::unique_lock<std::mutex> lock(wake_mutex_);
-    wake_signal_.wait(lock, [this]() { return early_exit || execute_; });
-  }
-
-  thread_config_->is_paused_ = false;
-}
-
-bool
-RequestRateWorker::SleepIfNecessary()
-{
-  WaitForFreeCtx();
-
-  std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now();
-  std::chrono::nanoseconds next_timestamp = GetNextTimestamp();
-  std::chrono::nanoseconds current_timestamp = now - start_time_;
-  std::chrono::nanoseconds wait_time = next_timestamp - current_timestamp;
-
-  bool delayed = false;
-  if (wait_time.count() < 0) {
-    delayed = true;
-  } else {
-    thread_stat_->idle_timer.Start();
-    std::this_thread::sleep_for(wait_time);
-    thread_stat_->idle_timer.Stop();
-  }
-  return delayed;
-}
-
-void
-RequestRateWorker::WaitForFreeCtx()
-{
-  if (!ctx_id_tracker_->IsAvailable()) {
-    notified_ = false;
-    // wait for signal from callback.
-    std::unique_lock<std::mutex> lk(cb_mtx_);
-    thread_stat_->idle_timer.Start();
-    cb_cv_.wait(lk, [this] {
-      if (notified_) {
-        notified_ = false;
-        return true;
-      }
-      return false;
-    });
-    thread_stat_->idle_timer.Stop();
-  }
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/request_rate_worker.h b/src/c++/perf_analyzer/request_rate_worker.h
deleted file mode 100644
index e6d1804c6..000000000
--- a/src/c++/perf_analyzer/request_rate_worker.h
+++ /dev/null
@@ -1,126 +0,0 @@
-// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <memory>
-
-#include "ischeduler.h"
-#include "load_worker.h"
-#include "model_parser.h"
-#include "sequence_manager.h"
-#include "thread_config.h"
-
-namespace triton { namespace perfanalyzer {
-
-
-#ifndef DOCTEST_CONFIG_DISABLE
-class NaggyMockRequestRateWorker;
-class TestRequestRateManager;
-class TestCustomLoadManager;
-#endif
-
-/// Worker thread for RequestRateManager
-///
-/// If the model is non-sequence model, each worker uses only one context
-/// to maintain concurrency assigned to worker.
-/// If the model is sequence model, each worker has to use multiples contexts
-/// to maintain (sequence) concurrency assigned to worker.
-///
-class RequestRateWorker : public LoadWorker, public IScheduler {
- public:
-  RequestRateWorker(
-      uint32_t id, std::shared_ptr<ThreadStat> thread_stat,
-      std::shared_ptr<ThreadConfig> thread_config,
-      const std::shared_ptr<ModelParser> parser,
-      std::shared_ptr<DataLoader> data_loader,
-      const std::shared_ptr<cb::ClientBackendFactory> factory,
-      const bool on_sequence_model, const bool async, const size_t num_threads,
-      const bool using_json_data, const bool streaming,
-      const int32_t batch_size, std::condition_variable& wake_signal,
-      std::mutex& wake_mutex, bool& execute,
-      std::chrono::steady_clock::time_point& start_time,
-      const bool serial_sequences,
-      const std::shared_ptr<IInferDataManager>& infer_data_manager,
-      std::shared_ptr<SequenceManager> sequence_manager)
-      : LoadWorker(
-            id, thread_stat, thread_config, parser, data_loader, factory,
-            on_sequence_model, async, streaming, batch_size, using_json_data,
-            wake_signal, wake_mutex, execute, infer_data_manager,
-            sequence_manager),
-        num_threads_(num_threads), start_time_(start_time),
-        serial_sequences_(serial_sequences)
-  {
-  }
-
-  void Infer() override;
-
-  /// Provides the schedule that should be followed
-  ///
-  void SetSchedule(RateSchedulePtr_t schedule) override;
-
- private:
-  RateSchedulePtr_t schedule_;
-
-  const size_t num_threads_;
-  const bool serial_sequences_;
-  std::chrono::steady_clock::time_point& start_time_;
-
-  void CreateCtxIdTracker();
-
-  std::chrono::nanoseconds GetNextTimestamp();
-
-  uint32_t GetSeqStatIndex(uint32_t ctx_id) override;
-
-  void CreateContexts();
-
-  void HandleExecuteOff();
-  void ResetFreeCtxIds();
-
-  // Sleep until it is time for the next part of the schedule
-  // Returns true if the request was delayed
-  bool SleepIfNecessary();
-
-  void WaitForFreeCtx();
-
-  void CreateContextFinalize(std::shared_ptr<InferContext> ctx) override
-  {
-    ctx->RegisterAsyncCallbackFinalize(std::bind(
-        &RequestRateWorker::AsyncCallbackFinalize, this,
-        std::placeholders::_1));
-
-    ctx->SetNumActiveThreads(num_threads_);
-  }
-
-#ifndef DOCTEST_CONFIG_DISABLE
-  friend NaggyMockRequestRateWorker;
-  friend TestCustomLoadManager;
-  friend TestRequestRateManager;
-
-#endif
-};
-
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/request_record.h b/src/c++/perf_analyzer/request_record.h
deleted file mode 100644
index 91b5ca19e..000000000
--- a/src/c++/perf_analyzer/request_record.h
+++ /dev/null
@@ -1,101 +0,0 @@
-// Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <chrono>
-#include <cstdint>
-#include <tuple>
-#include <unordered_map>
-#include <vector>
-
-namespace triton { namespace perfanalyzer {
-
-/// A record containing the data of a single request input or response output
-struct RecordData {
-  RecordData(const uint8_t* buf, size_t size, std::string data_type = "")
-  {
-    uint8_t* array = new uint8_t[size];
-    std::memcpy(array, buf, size);
-    data_ = std::shared_ptr<uint8_t>(array, [](uint8_t* p) { delete[] p; });
-    size_ = size;
-    data_type_ = data_type;
-  }
-
-  // Define equality comparison operator so it can be inserted into maps
-  bool operator==(const RecordData& other) const
-  {
-    if (size_ != other.size_)
-      return false;
-    // Compare the contents of the arrays
-    return std::memcmp(data_.get(), other.data_.get(), size_) == 0;
-  }
-
-  std::shared_ptr<uint8_t> data_;
-  size_t size_;
-  std::string data_type_;
-};
-
-
-/// A record of an individual request
-struct RequestRecord {
-  using RequestInput = std::unordered_map<std::string, RecordData>;
-  using ResponseOutput = std::unordered_map<std::string, RecordData>;
-
-  RequestRecord(
-      std::chrono::time_point<std::chrono::system_clock> start_time =
-          std::chrono::time_point<std::chrono::system_clock>(),
-      std::vector<std::chrono::time_point<std::chrono::system_clock>>
-          response_timestamps = {},
-      std::vector<RequestInput> request_inputs = {},
-      std::vector<ResponseOutput> response_outputs = {},
-      bool sequence_end = true, bool delayed = false, uint64_t sequence_id = 0,
-      bool has_null_last_response = false)
-      : start_time_(start_time), response_timestamps_(response_timestamps),
-        request_inputs_(request_inputs), response_outputs_(response_outputs),
-        sequence_end_(sequence_end), delayed_(delayed),
-        sequence_id_(sequence_id),
-        has_null_last_response_(has_null_last_response)
-  {
-  }
-  // The timestamp of when the request was started.
-  std::chrono::time_point<std::chrono::system_clock> start_time_;
-  // Collection of response timestamps
-  std::vector<std::chrono::time_point<std::chrono::system_clock>>
-      response_timestamps_;
-
-  std::vector<RequestInput> request_inputs_;
-  std::vector<ResponseOutput> response_outputs_;
-  // Whether or not the request is at the end of a sequence.
-  bool sequence_end_;
-  // Whether or not the request is delayed as per schedule.
-  bool delayed_;
-  // Sequence ID of the request
-  uint64_t sequence_id_;
-  // Whether the last response is null
-  bool has_null_last_response_;
-};
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/sequence_manager.cc b/src/c++/perf_analyzer/sequence_manager.cc
deleted file mode 100644
index eaf5d6e00..000000000
--- a/src/c++/perf_analyzer/sequence_manager.cc
+++ /dev/null
@@ -1,178 +0,0 @@
-// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "sequence_manager.h"
-
-namespace triton { namespace perfanalyzer {
-
-SequenceManager::SequenceManager(
-    const uint64_t start_sequence_id, const uint64_t sequence_id_range,
-    const size_t sequence_length, const bool sequence_length_specified,
-    const double sequence_length_variation, const bool using_json_data,
-    std::shared_ptr<DataLoader> data_loader)
-    : start_sequence_id_(start_sequence_id),
-      sequence_id_range_(sequence_id_range), sequence_length_(sequence_length),
-      sequence_length_specified_(sequence_length_specified),
-      sequence_length_variation_(sequence_length_variation),
-      using_json_data_(using_json_data), data_loader_(data_loader)
-{
-  distribution_ = std::uniform_int_distribution<uint64_t>(
-      0, data_loader_->GetDataStreamsCount() - 1);
-}
-
-void
-SequenceManager::InitSequenceStatuses(size_t num_sequence_statuses)
-{
-  sequence_statuses_.clear();
-  for (size_t sequence_status_index{0};
-       sequence_status_index < num_sequence_statuses; sequence_status_index++) {
-    sequence_statuses_.push_back(std::make_shared<SequenceStatus>());
-  }
-}
-
-const uint64_t
-SequenceManager::GetSequenceID(size_t sequence_status_index) const
-{
-  return sequence_statuses_.at(sequence_status_index)->seq_id_;
-}
-
-std::mutex&
-SequenceManager::GetMutex(size_t sequence_status_index)
-{
-  return sequence_statuses_.at(sequence_status_index)->mtx_;
-}
-
-const uint64_t
-SequenceManager::GetDataStreamID(size_t sequence_status_index) const
-{
-  return sequence_statuses_.at(sequence_status_index)->data_stream_id_;
-}
-
-const size_t
-SequenceManager::GetRemainingQueries(size_t sequence_status_index) const
-{
-  return sequence_statuses_.at(sequence_status_index)->remaining_queries_;
-}
-
-void
-SequenceManager::SetRemainingQueries(
-    size_t sequence_status_index, size_t remaining_queries)
-{
-  sequence_statuses_.at(sequence_status_index)->remaining_queries_ =
-      remaining_queries;
-}
-
-void
-SequenceManager::DecrementRemainingQueries(size_t sequence_status_index)
-{
-  sequence_statuses_.at(sequence_status_index)->remaining_queries_--;
-}
-
-const size_t
-SequenceManager::GetNumSequenceStatuses() const
-{
-  return sequence_statuses_.size();
-}
-
-void
-SequenceManager::SetInferSequenceOptions(
-    const uint32_t seq_stat_index, std::unique_ptr<cb::InferOptions>& options)
-{
-  options->sequence_start_ =
-      (sequence_statuses_[seq_stat_index]->remaining_queries_ == 0);
-
-  // New sequence must be initialized before setting the id.
-  if (options->sequence_start_) {
-    InitNewSequence(seq_stat_index);
-  }
-  options->sequence_id_ = sequence_statuses_[seq_stat_index]->seq_id_;
-  options->sequence_end_ =
-      (sequence_statuses_[seq_stat_index]->remaining_queries_ == 1);
-}
-
-const size_t
-SequenceManager::GetSequenceLength(size_t sequence_status_index) const
-{
-  return sequence_statuses_.at(sequence_status_index)->sequence_length_;
-}
-
-void
-SequenceManager::InitNewSequence(int seq_stat_index)
-{
-  sequence_statuses_[seq_stat_index]->seq_id_ = GetNextSeqId(seq_stat_index);
-  if (!using_json_data_) {
-    size_t new_length = GetRandomSequenceLength(sequence_length_variation_);
-    sequence_statuses_[seq_stat_index]->remaining_queries_ =
-        new_length == 0 ? 1 : new_length;
-  } else {
-    // Selecting next available data stream based on uniform distribution.
-    const uint64_t data_stream_id{GetNewDataStreamId()};
-    sequence_statuses_[seq_stat_index]->data_stream_id_ = data_stream_id;
-    const size_t total_steps{data_loader_->GetTotalSteps(data_stream_id)};
-    if (sequence_length_specified_) {
-      const size_t varied_sequence_length{
-          GetRandomSequenceLength(sequence_length_variation_)};
-      sequence_statuses_[seq_stat_index]->sequence_length_ =
-          varied_sequence_length;
-    } else {
-      sequence_statuses_[seq_stat_index]->sequence_length_ = total_steps;
-    }
-    sequence_statuses_[seq_stat_index]->remaining_queries_ =
-        sequence_statuses_[seq_stat_index]->sequence_length_;
-  }
-}
-
-uint64_t
-SequenceManager::GetNextSeqId(int seq_stat_index)
-{
-  uint64_t old_seq_id = sequence_statuses_[seq_stat_index]->seq_id_;
-  uint64_t next_seq_id =
-      curr_seq_id_++ % sequence_id_range_ + start_sequence_id_;
-
-  // If the next sequence ID is still in use, reuse the same sequence ID
-  // that this sequence_status used last time
-  //
-  for (uint i = 0; i < sequence_statuses_.size(); i++) {
-    if (next_seq_id == sequence_statuses_[i]->seq_id_) {
-      next_seq_id = old_seq_id;
-      break;
-    }
-  }
-  return next_seq_id;
-}
-
-size_t
-SequenceManager::GetRandomSequenceLength(double offset_ratio)
-{
-  int random_offset = ((2.0 * rand() / double(RAND_MAX)) - 1.0) * offset_ratio /
-                      100.0 * sequence_length_;
-  if (int(sequence_length_) + random_offset <= 0) {
-    return 1;
-  }
-  return sequence_length_ + random_offset;
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/sequence_manager.h b/src/c++/perf_analyzer/sequence_manager.h
deleted file mode 100644
index c419a87f0..000000000
--- a/src/c++/perf_analyzer/sequence_manager.h
+++ /dev/null
@@ -1,218 +0,0 @@
-// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <atomic>
-#include <cstdint>
-#include <memory>
-#include <random>
-#include <vector>
-
-#include "client_backend/client_backend.h"
-#include "data_loader.h"
-#include "sequence_status.h"
-
-namespace triton { namespace perfanalyzer {
-
-#ifndef DOCTEST_CONFIG_DISABLE
-class NaggyMockSequenceManager;
-#endif
-
-/// Manages operations related to preparing requests to sequence models.
-///
-class SequenceManager {
- public:
-  /// Constructs the sequence manager object. Involves initializing the
-  /// distribution for randomly assigning input data streams to new sequences.
-  /// \param start_sequence_id See associated data member description.
-  /// \param sequence_id_range See associated data member description.
-  /// \param sequence_length See associated data member description.
-  /// \param sequence_length_specified See associated data member description.
-  /// \param sequence_length_variation See associated data member description.
-  /// \param using_json_data See associated data member description.
-  /// \param data_loader See associated data member description.
-  /// \return The constructed sequence manager object.
-  ///
-  SequenceManager(
-      const uint64_t start_sequence_id, const uint64_t sequence_id_range,
-      const size_t sequence_length, const bool sequence_length_specified,
-      const double sequence_length_variation, const bool using_json_data,
-      std::shared_ptr<DataLoader> data_loader);
-
-  /// Initializes the sequence statuses data structure.
-  /// \param num_sequence_statuses The number of sequence status objects to
-  /// create.
-  ///
-  void InitSequenceStatuses(size_t num_sequence_statuses);
-
-  /// Gets the sequence ID for the specified sequence status object.
-  /// \param sequence_status_index The index of the sequence status object.
-  /// \return The sequence ID for the specified sequence status object.
-  ///
-  const uint64_t GetSequenceID(size_t sequence_status_index) const;
-
-  /// Gets a non-const reference to the mutex for the specified sequence status
-  /// object.
-  /// \param sequence_status_index The index of the sequence status object.
-  /// \return A non-const reference to the mutex for the specified sequence
-  /// status object.
-  ///
-  std::mutex& GetMutex(size_t sequence_status_index);
-
-  /// Gets the data stream ID for the specified sequence status object.
-  /// \param sequence_status_index The index of the sequence status object.
-  /// \return The data stream ID for the specified sequence status object.
-  ///
-  const uint64_t GetDataStreamID(size_t sequence_status_index) const;
-
-  /// Gets the remaining queries for the specified sequence status object.
-  /// \param sequence_status_index The index of the sequence status object.
-  /// \return The remaining queries for the specified sequence status object.
-  ///
-  const size_t GetRemainingQueries(size_t sequence_status_index) const;
-
-  /// Sets the remaining queries for the specified sequence status object.
-  /// \param sequence_status_index The index of the sequence status object.
-  /// \param remaining_queries The new value of the remaining queries for the
-  /// specified sequence status object.
-  ///
-  void SetRemainingQueries(
-      size_t sequence_status_index, size_t remaining_queries);
-
-  /// Decrements the remaining queries for the specified sequence status object.
-  /// \param sequence_status_index The index of the sequence status object.
-  ///
-  void DecrementRemainingQueries(size_t sequence_status_index);
-
-  /// Gets the number of sequence status objects in the sequence statuses data
-  /// structure.
-  /// \param sequence_status_index The index of the sequence status object.
-  /// \return The number of sequence status objects in the sequence statuses
-  /// data structure.
-  ///
-  const size_t GetNumSequenceStatuses() const;
-
-  /// Sets options related to a single request to a sequence model.
-  /// \param seq_stat_index The index for the sequence status object that is
-  /// having its options set.
-  /// \param options The options object for the request that is being prepared.
-  ///
-  virtual void SetInferSequenceOptions(
-      const uint32_t seq_stat_index,
-      std::unique_ptr<cb::InferOptions>& options);
-
-  /// Gets the sequence length for the specified sequence status object.
-  /// \param sequence_status_index The index of the sequence status object.
-  /// \return The sequence length for the specified sequence status object.
-  ///
-  const size_t GetSequenceLength(size_t sequence_status_index) const;
-
- private:
-  /// Initializes values for a sequence status object.
-  /// \param seq_stat_index The index for the sequence status object that is
-  /// being initialized.
-  ///
-  virtual void InitNewSequence(int seq_stat_index);
-
-  /// Determines an appropriate next sequence ID for a renewed sequence status
-  /// object.
-  /// \param seq_stat_index The index for the sequence for which a request is
-  /// being prepared.
-  /// \return The potentially new sequence ID to be used by a renewed sequence
-  /// status object.
-  ///
-  virtual uint64_t GetNextSeqId(int seq_stat_index);
-
-  virtual uint64_t GetNewDataStreamId()
-  {
-    return distribution_(rng_generator_);
-  }
-
-  /// Generates a random sequence length based on a threshold.
-  /// \param offset_ratio The offset ratio/threshold of the generated length.
-  /// \return A random sequence length.
-  ///
-  virtual size_t GetRandomSequenceLength(double offset_ratio);
-
-  /// Data structure holding sequence status objects
-  ///
-  std::vector<std::shared_ptr<SequenceStatus>> sequence_statuses_{};
-
-  /// Current sequence id (for issuing new sequences)
-  ///
-  std::atomic<uint64_t> curr_seq_id_{0};
-
-  /// Data loader to be used for various sequence operations.
-  ///
-  std::shared_ptr<DataLoader> data_loader_{nullptr};
-
-  /// The starting sequence ID to be used for iterating through valid sequence
-  /// IDs.
-  ///
-  const uint64_t start_sequence_id_{0};
-
-  /// The maximum sequence ID to be used for iterating through valid sequence
-  /// IDs.
-  ///
-  const uint64_t sequence_id_range_{0};
-
-  /// The base length of new sequences.
-  ///
-  const size_t sequence_length_{0};
-
-  /// Whether the user specified the sequence length.
-  ///
-  const bool sequence_length_specified_{false};
-
-  /// The percentage variation in length of sequences using autogenerated data
-  /// as input.
-  ///
-  const double sequence_length_variation_{0.0};
-
-  /// Indicates whether to generate sequence request input data or read it from
-  /// a JSON file.
-  ///
-  const bool using_json_data_{false};
-
-  /// The distribution for randomly assigning new sequences a data stream in the
-  /// input data JSON.
-  ///
-  std::uniform_int_distribution<uint64_t> distribution_;
-
-  /// The random number generator for randomly assigning new sequences a data
-  /// stream in the input data JSON.
-  ///
-  std::default_random_engine rng_generator_{};
-
-#ifndef DOCTEST_CONFIG_DISABLE
-  friend NaggyMockSequenceManager;
-
- public:
-  SequenceManager() = default;
-#endif
-};
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/sequence_status.h b/src/c++/perf_analyzer/sequence_status.h
deleted file mode 100644
index 16ec3bf40..000000000
--- a/src/c++/perf_analyzer/sequence_status.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <cstdint>
-#include <mutex>
-
-namespace triton { namespace perfanalyzer {
-
-// Holds the status of the inflight sequence
-struct SequenceStatus {
-  SequenceStatus(uint64_t seq_id = 0)
-      : seq_id_(seq_id), data_stream_id_(0), remaining_queries_(0)
-  {
-  }
-  // The unique correlation id allocated to the sequence
-  uint64_t seq_id_;
-  // The data stream id providing data for the sequence
-  uint64_t data_stream_id_;
-  // The number of queries remaining to complete the sequence
-  size_t remaining_queries_;
-  // The length of the sequence
-  size_t sequence_length_{0};
-  // A lock to protect sequence data
-  std::mutex mtx_;
-};
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/tensor_data.h b/src/c++/perf_analyzer/tensor_data.h
deleted file mode 100644
index 6f5cf7191..000000000
--- a/src/c++/perf_analyzer/tensor_data.h
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-namespace triton { namespace perfanalyzer {
-
-/// Data for one input or output tensor
-///
-struct TensorData {
-  const uint8_t* data_ptr{nullptr};
-  size_t batch1_size{0};
-  bool is_valid{false};
-  std::string name;
-};
-
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/test_command_line_parser.cc b/src/c++/perf_analyzer/test_command_line_parser.cc
deleted file mode 100644
index 2d17bbc24..000000000
--- a/src/c++/perf_analyzer/test_command_line_parser.cc
+++ /dev/null
@@ -1,1904 +0,0 @@
-// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-#include <getopt.h>
-
-#include <array>
-
-#include "command_line_parser.h"
-#include "doctest.h"
-#include "perf_analyzer_exception.h"
-
-namespace triton { namespace perfanalyzer {
-
-inline void
-CHECK_STRING(const char* name, const std::string& str, const std::string& val)
-{
-  CHECK_MESSAGE(
-      !str.compare(val), name, " expecting '", val, "', found '", str, "'");
-}
-
-inline void
-CHECK_STRING(std::string act, std::string exp)
-{
-  CHECK_MESSAGE(
-      !act.compare(exp), "Expecting: '", exp, "', Found: '", act, "'");
-}
-
-std::string
-CreateUsageMessage(const std::string& option_name, const std::string& msg)
-{
-  return "Failed to parse " + option_name + ". " + msg;
-}
-
-// Performs a doc test check against all the individual parameters
-// in a PAParams object.
-//
-// /param act actual object under test
-// /param exp expected value for object
-//
-inline void
-CHECK_PARAMS(PAParamsPtr act, PAParamsPtr exp)
-{
-  CHECK(act->verbose == exp->verbose);
-  CHECK(act->streaming == exp->streaming);
-  CHECK(act->extra_verbose == exp->extra_verbose);
-  CHECK(act->max_threads == exp->max_threads);
-  CHECK(act->max_threads_specified == exp->max_threads_specified);
-  CHECK(act->sequence_length == exp->sequence_length);
-  CHECK(act->percentile == exp->percentile);
-  REQUIRE(act->user_data.size() == exp->user_data.size());
-  for (size_t i = 0; i < act->user_data.size(); i++) {
-    CHECK_STRING(act->user_data[i], exp->user_data[i]);
-  }
-  CHECK(act->input_shapes.size() == exp->input_shapes.size());
-  for (auto act_shape : act->input_shapes) {
-    auto exp_shape = exp->input_shapes.find(act_shape.first);
-    REQUIRE_MESSAGE(
-        exp_shape != exp->input_shapes.end(),
-        "Unexpected input_shape: ", act_shape.first);
-    REQUIRE(act_shape.second.size() == exp_shape->second.size());
-    for (size_t i = 0; i < act_shape.second.size(); i++) {
-      CHECK_MESSAGE(
-          act_shape.second[i] == exp_shape->second[i],
-          "Unexpected shape value for: ", act_shape.first, "[", i, "]");
-    }
-  }
-  CHECK(act->measurement_window_ms == exp->measurement_window_ms);
-  CHECK(act->using_concurrency_range == exp->using_concurrency_range);
-  CHECK(act->concurrency_range.start == exp->concurrency_range.start);
-  CHECK(act->concurrency_range.end == exp->concurrency_range.end);
-  CHECK(act->concurrency_range.step == exp->concurrency_range.step);
-  CHECK(act->latency_threshold_ms == exp->latency_threshold_ms);
-  CHECK(act->stability_threshold == doctest::Approx(act->stability_threshold));
-  CHECK(act->max_trials == exp->max_trials);
-  CHECK(act->zero_input == exp->zero_input);
-  CHECK(act->string_length == exp->string_length);
-  CHECK_STRING(act->string_data, exp->string_data);
-  CHECK(act->async == exp->async);
-  CHECK(act->forced_sync == exp->forced_sync);
-  CHECK(act->using_request_rate_range == exp->using_request_rate_range);
-  CHECK(
-      act->request_rate_range[0] ==
-      doctest::Approx(exp->request_rate_range[0]));
-  CHECK(
-      act->request_rate_range[1] ==
-      doctest::Approx(exp->request_rate_range[1]));
-  CHECK(
-      act->request_rate_range[2] ==
-      doctest::Approx(exp->request_rate_range[2]));
-  CHECK(act->num_of_sequences == exp->num_of_sequences);
-  CHECK(act->search_mode == exp->search_mode);
-  CHECK(act->request_distribution == exp->request_distribution);
-  CHECK(act->using_custom_intervals == exp->using_custom_intervals);
-  CHECK_STRING(act->request_intervals_file, exp->request_intervals_file);
-  CHECK(act->shared_memory_type == exp->shared_memory_type);
-  CHECK(act->output_shm_size == exp->output_shm_size);
-  CHECK(act->kind == exp->kind);
-  CHECK_STRING(act->model_signature_name, exp->model_signature_name);
-  CHECK(act->using_grpc_compression == exp->using_grpc_compression);
-  CHECK(act->compression_algorithm == exp->compression_algorithm);
-  CHECK(act->measurement_mode == exp->measurement_mode);
-  CHECK(act->measurement_request_count == exp->measurement_request_count);
-  CHECK_STRING(act->triton_server_path, exp->triton_server_path);
-  CHECK_STRING(act->model_repository_path, exp->model_repository_path);
-  CHECK(act->start_sequence_id == exp->start_sequence_id);
-  CHECK(act->sequence_id_range == exp->sequence_id_range);
-  CHECK_STRING(
-      act->ssl_options.ssl_grpc_certificate_chain_file,
-      exp->ssl_options.ssl_grpc_certificate_chain_file);
-  CHECK_STRING(
-      act->ssl_options.ssl_grpc_private_key_file,
-      exp->ssl_options.ssl_grpc_private_key_file);
-  CHECK_STRING(
-      act->ssl_options.ssl_grpc_root_certifications_file,
-      exp->ssl_options.ssl_grpc_root_certifications_file);
-  CHECK(act->ssl_options.ssl_grpc_use_ssl == exp->ssl_options.ssl_grpc_use_ssl);
-  CHECK_STRING(
-      act->ssl_options.ssl_https_ca_certificates_file,
-      exp->ssl_options.ssl_https_ca_certificates_file);
-  CHECK_STRING(
-      act->ssl_options.ssl_https_client_certificate_file,
-      exp->ssl_options.ssl_https_client_certificate_file);
-  CHECK_STRING(
-      act->ssl_options.ssl_https_client_certificate_type,
-      exp->ssl_options.ssl_https_client_certificate_type);
-  CHECK_STRING(
-      act->ssl_options.ssl_https_private_key_file,
-      exp->ssl_options.ssl_https_private_key_file);
-  CHECK_STRING(
-      act->ssl_options.ssl_https_private_key_type,
-      exp->ssl_options.ssl_https_private_key_type);
-  CHECK(
-      act->ssl_options.ssl_https_verify_host ==
-      exp->ssl_options.ssl_https_verify_host);
-  CHECK(
-      act->ssl_options.ssl_https_verify_peer ==
-      exp->ssl_options.ssl_https_verify_peer);
-  CHECK(act->verbose_csv == exp->verbose_csv);
-  CHECK(act->enable_mpi == exp->enable_mpi);
-  CHECK(act->trace_options.size() == exp->trace_options.size());
-  CHECK(act->using_old_options == exp->using_old_options);
-  CHECK(act->dynamic_concurrency_mode == exp->dynamic_concurrency_mode);
-  CHECK(act->url_specified == exp->url_specified);
-  CHECK_STRING(act->url, exp->url);
-  CHECK_STRING(act->model_name, exp->model_name);
-  CHECK_STRING(act->model_version, exp->model_version);
-  CHECK(act->batch_size == exp->batch_size);
-  CHECK(act->using_batch_size == exp->using_batch_size);
-  CHECK(act->concurrent_request_count == exp->concurrent_request_count);
-  CHECK(act->protocol == exp->protocol);
-  CHECK(act->http_headers->size() == exp->http_headers->size());
-  CHECK(act->max_concurrency == exp->max_concurrency);
-  CHECK_STRING(act->filename, act->filename);
-  CHECK(act->mpi_driver != nullptr);
-  CHECK_STRING(act->memory_type, exp->memory_type);
-  CHECK(
-      act->is_using_periodic_concurrency_mode ==
-      exp->is_using_periodic_concurrency_mode);
-  CHECK(
-      act->periodic_concurrency_range.start ==
-      exp->periodic_concurrency_range.start);
-  CHECK(
-      act->periodic_concurrency_range.end ==
-      exp->periodic_concurrency_range.end);
-  CHECK(
-      act->periodic_concurrency_range.step ==
-      exp->periodic_concurrency_range.step);
-  CHECK(act->request_period == exp->request_period);
-  CHECK(act->request_parameters.size() == exp->request_parameters.size());
-  for (auto act_param : act->request_parameters) {
-    auto exp_param = exp->request_parameters.find(act_param.first);
-    REQUIRE_MESSAGE(
-        exp_param != exp->request_parameters.end(),
-        "Unexpected parameter: ", act_param.first);
-
-    CHECK(act_param.second.value == exp_param->second.value);
-    CHECK(act_param.second.type == exp_param->second.type);
-  }
-}
-
-
-#define CHECK_INT_OPTION(option_name, exp_val, msg)                          \
-  SUBCASE("valid value")                                                     \
-  {                                                                          \
-    int argc = 5;                                                            \
-    char* argv[argc] = {app_name, "-m", model_name, option_name, "2000"};    \
-    CAPTURE(argv[3]);                                                        \
-    CAPTURE(argv[4]);                                                        \
-                                                                             \
-    REQUIRE_NOTHROW(act = parser.Parse(argc, argv));                         \
-    CHECK(!parser.UsageCalled());                                            \
-    CAPTURE(parser.GetUsageMessage());                                       \
-                                                                             \
-    exp_val = 2000;                                                          \
-    CAPTURE(exp_val);                                                        \
-  }                                                                          \
-                                                                             \
-  SUBCASE("negative value")                                                  \
-  {                                                                          \
-    int argc = 5;                                                            \
-    char* argv[argc] = {app_name, "-m", model_name, option_name, "-2000"};   \
-    CHECK_THROWS_WITH_AS(                                                    \
-        act = parser.Parse(argc, argv), msg.c_str(), PerfAnalyzerException); \
-                                                                             \
-    check_params = false;                                                    \
-  }                                                                          \
-                                                                             \
-  SUBCASE("floating point value")                                            \
-  {                                                                          \
-    int argc = 5;                                                            \
-    char* argv[argc] = {app_name, "-m", model_name, option_name, "29.5"};    \
-                                                                             \
-    REQUIRE_NOTHROW(act = parser.Parse(argc, argv));                         \
-    CHECK(!parser.UsageCalled());                                            \
-                                                                             \
-    exp_val = 29;                                                            \
-  }                                                                          \
-                                                                             \
-  SUBCASE("missing value")                                                   \
-  {                                                                          \
-    int argc = 4;                                                            \
-    char* argv[argc] = {app_name, "-m", model_name, option_name};            \
-                                                                             \
-    CHECK_THROWS_WITH_AS(                                                    \
-        act = parser.Parse(argc, argv), "", PerfAnalyzerException);          \
-                                                                             \
-    check_params = false;                                                    \
-  }
-
-
-TEST_CASE("Testing PerfAnalyzerParameters")
-{
-  PAParamsPtr params(new PerfAnalyzerParameters{});
-
-  CHECK(params->verbose == false);
-  CHECK(params->streaming == false);
-  CHECK(params->extra_verbose == false);
-  CHECK(params->max_threads == 4);
-  CHECK(params->max_threads_specified == false);
-  CHECK(params->sequence_length == 20);
-  CHECK(params->percentile == -1);
-  CHECK(params->request_count == 0);
-  CHECK(params->user_data.size() == 0);
-  CHECK_STRING("endpoint", params->endpoint, "");
-  CHECK(params->input_shapes.size() == 0);
-  CHECK(params->measurement_window_ms == 5000);
-  CHECK(params->using_concurrency_range == false);
-  CHECK(params->concurrency_range.start == 1);
-  CHECK(params->concurrency_range.end == 1);
-  CHECK(params->concurrency_range.step == 1);
-  CHECK(params->latency_threshold_ms == NO_LIMIT);
-  CHECK(params->stability_threshold == doctest::Approx(0.1));
-  CHECK(params->max_trials == 10);
-  CHECK(params->zero_input == false);
-  CHECK(params->string_length == 128);
-  CHECK_STRING("string_data", params->string_data, "");
-  CHECK(params->async == false);
-  CHECK(params->forced_sync == false);
-  CHECK(params->using_request_rate_range == false);
-  CHECK(params->request_rate_range[0] == doctest::Approx(1.0));
-  CHECK(params->request_rate_range[1] == doctest::Approx(1.0));
-  CHECK(params->request_rate_range[2] == doctest::Approx(1.0));
-  CHECK(params->num_of_sequences == 4);
-  CHECK(params->search_mode == SearchMode::LINEAR);
-  CHECK(params->request_distribution == Distribution::CONSTANT);
-  CHECK(params->using_custom_intervals == false);
-  CHECK_STRING("request_intervals_file", params->request_intervals_file, "");
-  CHECK(params->shared_memory_type == NO_SHARED_MEMORY);
-  CHECK(params->output_shm_size == 102400);
-  CHECK(params->kind == clientbackend::BackendKind::TRITON);
-  CHECK_STRING(
-      "model_signature_name", params->model_signature_name, "serving_default");
-  CHECK(params->using_grpc_compression == false);
-  CHECK(
-      params->compression_algorithm ==
-      clientbackend::GrpcCompressionAlgorithm::COMPRESS_NONE);
-  CHECK(params->measurement_mode == MeasurementMode::TIME_WINDOWS);
-  CHECK(params->measurement_request_count == 50);
-  CHECK_STRING(
-      "triton_server_path", params->triton_server_path, "/opt/tritonserver");
-  CHECK_STRING("model_repository_path", params->model_repository_path, "");
-  CHECK(params->start_sequence_id == 1);
-  CHECK(params->sequence_id_range == UINT32_MAX);
-  CHECK_STRING(
-      "ssl_grpc_certificate_chain_file",
-      params->ssl_options.ssl_grpc_certificate_chain_file, "");
-  CHECK_STRING(
-      "ssl_grpc_private_key_file",
-      params->ssl_options.ssl_grpc_private_key_file, "");
-  CHECK_STRING(
-      "ssl_grpc_root_certifications_file",
-      params->ssl_options.ssl_grpc_root_certifications_file, "");
-  CHECK(params->ssl_options.ssl_grpc_use_ssl == false);
-  CHECK_STRING(
-      "ssl_https_ca_certificates_file",
-      params->ssl_options.ssl_https_ca_certificates_file, "");
-  CHECK_STRING(
-      "ssl_https_client_certificate_file",
-      params->ssl_options.ssl_https_client_certificate_file, "");
-  CHECK_STRING(
-      "ssl_https_client_certificate_type",
-      params->ssl_options.ssl_https_client_certificate_type, "");
-  CHECK_STRING(
-      "ssl_https_private_key_file",
-      params->ssl_options.ssl_https_private_key_file, "");
-  CHECK_STRING(
-      "ssl_https_private_key_type",
-      params->ssl_options.ssl_https_private_key_type, "");
-  CHECK(params->ssl_options.ssl_https_verify_host == 2);
-  CHECK(params->ssl_options.ssl_https_verify_peer == 1);
-  CHECK(params->verbose_csv == false);
-  CHECK(params->enable_mpi == false);
-  CHECK(params->trace_options.size() == 0);
-  CHECK(params->using_old_options == false);
-  CHECK(params->dynamic_concurrency_mode == false);
-  CHECK(params->url_specified == false);
-  CHECK_STRING("url", params->url, "localhost:8000");
-  CHECK_STRING("model_name", params->model_name, "");
-  CHECK_STRING("model_version", params->model_version, "");
-  CHECK(params->batch_size == 1);
-  CHECK(params->using_batch_size == false);
-  CHECK(params->concurrent_request_count == 1);
-  CHECK(params->protocol == clientbackend::ProtocolType::HTTP);
-  CHECK(params->http_headers->size() == 0);
-  CHECK(params->max_concurrency == 0);
-  CHECK_STRING("filename", params->filename, "");
-  CHECK(params->mpi_driver == nullptr);
-  CHECK_STRING("memory_type", params->memory_type, "system");
-}
-
-// Test CLParser Class that captures the usage string but suppresses the output
-//
-class TestCLParser : public CLParser {
- public:
-  std::string GetUsageMessage() const { return usage_message_; }
-  bool UsageCalled() const { return usage_called_; }
-
- private:
-  std::string usage_message_;
-  bool usage_called_ = false;
-
-  virtual void Usage(const std::string& msg = std::string())
-  {
-    throw PerfAnalyzerException(msg, GENERIC_ERROR);
-  }
-};
-
-void
-CheckValidRange(
-    std::vector<char*>& args, char* option_name, TestCLParser& parser,
-    PAParamsPtr& act, bool& using_range, Range<uint64_t>& range,
-    size_t* max_threads)
-{
-  SUBCASE("start:end provided")
-  {
-    *max_threads = 400;
-    args.push_back(option_name);
-    args.push_back("100:400");  // start:end
-
-    int argc = args.size();
-    char* argv[argc];
-    std::copy(args.begin(), args.end(), argv);
-
-    REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-    CHECK(!parser.UsageCalled());
-
-    using_range = true;
-    range.start = 100;
-    range.end = 400;
-  }
-
-  SUBCASE("start:end:step provided")
-  {
-    *max_threads = 400;
-    args.push_back(option_name);
-    args.push_back("100:400:10");  // start:end:step
-
-    int argc = args.size();
-    char* argv[argc];
-    std::copy(args.begin(), args.end(), argv);
-
-    REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-    CHECK(!parser.UsageCalled());
-
-    using_range = true;
-    range.start = 100;
-    range.end = 400;
-    range.step = 10;
-  }
-}
-
-void
-CheckInvalidRange(
-    std::vector<char*>& args, char* option_name, TestCLParser& parser,
-    PAParamsPtr& act, bool& check_params)
-{
-  std::string expected_msg;
-
-  SUBCASE("too many input values")
-  {
-    args.push_back(option_name);
-    args.push_back("200:100:25:10");
-
-    int argc = args.size();
-    char* argv[argc];
-    std::copy(args.begin(), args.end(), argv);
-
-    expected_msg = CreateUsageMessage(
-        option_name, "The value does not match <start:end:step>.");
-    CHECK_THROWS_WITH_AS(
-        act = parser.Parse(argc, argv), expected_msg.c_str(),
-        PerfAnalyzerException);
-
-    check_params = false;
-  }
-
-  SUBCASE("invalid start value")
-  {
-    args.push_back(option_name);
-    args.push_back("bad:400:10");
-
-    int argc = args.size();
-    char* argv[argc];
-    std::copy(args.begin(), args.end(), argv);
-
-    expected_msg =
-        CreateUsageMessage(option_name, "Invalid value provided: bad:400:10");
-    CHECK_THROWS_WITH_AS(
-        act = parser.Parse(argc, argv), expected_msg.c_str(),
-        PerfAnalyzerException);
-
-    check_params = false;
-  }
-
-  SUBCASE("invalid end value")
-  {
-    args.push_back(option_name);
-    args.push_back("100:bad:10");
-
-    int argc = args.size();
-    char* argv[argc];
-    std::copy(args.begin(), args.end(), argv);
-
-    expected_msg =
-        CreateUsageMessage(option_name, "Invalid value provided: 100:bad:10");
-    CHECK_THROWS_WITH_AS(
-        act = parser.Parse(argc, argv), expected_msg.c_str(),
-        PerfAnalyzerException);
-
-    check_params = false;
-  }
-
-  SUBCASE("invalid step value")
-  {
-    args.push_back(option_name);
-    args.push_back("100:400:bad");
-
-    int argc = args.size();
-    char* argv[argc];
-    std::copy(args.begin(), args.end(), argv);
-
-    expected_msg =
-        CreateUsageMessage(option_name, "Invalid value provided: 100:400:bad");
-    CHECK_THROWS_WITH_AS(
-        act = parser.Parse(argc, argv), expected_msg.c_str(),
-        PerfAnalyzerException);
-
-    check_params = false;
-  }
-
-  SUBCASE("no input values")
-  {
-    args.push_back(option_name);
-
-    int argc = args.size();
-    char* argv[argc];
-    std::copy(args.begin(), args.end(), argv);
-
-    // BUG (TMA-1307): Usage message does not contain error. Error statement
-    // "option '--concurrency-range' requires an argument" written directly
-    // to std::out
-    //
-    CHECK_THROWS_WITH_AS(
-        act = parser.Parse(argc, argv), "", PerfAnalyzerException);
-
-    check_params = false;
-  }
-}
-
-
-TEST_CASE("Testing Command Line Parser")
-{
-  char* model_name = "my_model";
-  char* app_name = "test_perf_analyzer";
-
-  std::string expected_msg;
-  std::vector<char*> args{app_name, "-m", model_name};
-
-  opterr = 1;  // Enable error output for GetOpt library
-  bool check_params = true;
-
-  TestCLParser parser;  // Command Line parser under test
-  PAParamsPtr act;      // Actual options parsed from parser
-  PAParamsPtr exp{new PerfAnalyzerParameters()};  // Expected results
-
-  // Most common defaults
-  exp->model_name = model_name;  // model_name;
-  exp->max_threads = DEFAULT_MAX_THREADS;
-
-  SUBCASE("with no parameters")
-  {
-    int argc = 1;
-    char* argv[argc] = {app_name};
-
-    expected_msg =
-        CreateUsageMessage("-m (model name)", "The value must be specified.");
-    CHECK_THROWS_WITH_AS(
-        act = parser.Parse(argc, argv), expected_msg.c_str(),
-        PerfAnalyzerException);
-
-    check_params = false;
-  }
-
-  SUBCASE("with min parameters")
-  {
-    int argc = 3;
-    char* argv[argc] = {app_name, "-m", model_name};
-
-    REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-    REQUIRE(!parser.UsageCalled());
-  }
-
-  SUBCASE("Option : --streaming")
-  {
-    SUBCASE("streaming option - without model")
-    {
-      int argc = 2;
-      char* argv[argc] = {app_name, "--streaming"};
-
-      expected_msg =
-          CreateUsageMessage("-m (model name)", "The value must be specified.");
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), expected_msg.c_str(),
-          PerfAnalyzerException);
-
-      check_params = false;
-    }
-
-    SUBCASE("with model")
-    {
-      int argc = 4;
-      char* argv[argc] = {app_name, "-m", model_name, "--streaming"};
-
-      // NOTE: This is not an informative error message, how do I specify a gRPC
-      // protocol? Error output should list missing params.
-      //
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv),
-          "Streaming is only allowed with gRPC protocol.",
-          PerfAnalyzerException);
-
-      check_params = false;
-    }
-
-    SUBCASE("with model last")
-    {
-      int argc = 4;
-      char* argv[argc] = {app_name, "--streaming", "-m", model_name};
-
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv),
-          "Streaming is only allowed with gRPC protocol.",
-          PerfAnalyzerException);
-
-      check_params = false;
-    }
-  }
-
-  SUBCASE("Option : --max-threads")
-  {
-    SUBCASE("set to 1")
-    {
-      int argc = 5;
-      char* argv[argc] = {app_name, "-m", model_name, "--max-threads", "1"};
-
-      REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      REQUIRE(!parser.UsageCalled());
-
-      exp->max_threads = 1;
-      exp->max_threads_specified = true;
-    }
-
-    SUBCASE("set to max")
-    {
-      int argc = 5;
-      char* argv[argc] = {app_name, "-m", model_name, "--max-threads", "65535"};
-
-      REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      REQUIRE(!parser.UsageCalled());
-
-      exp->max_threads = 65535;
-      exp->max_threads_specified = true;
-    }
-
-    SUBCASE("missing value")
-    {
-      int argc = 4;
-      char* argv[argc] = {app_name, "-m", model_name, "--max-threads"};
-
-      // NOTE: Empty message is not helpful
-      //
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), "", PerfAnalyzerException);
-
-      // BUG: Dumping string "option '--max-threads' requires an argument"
-      // directly to std::out, instead of through usage()
-      //
-      check_params = false;
-    }
-
-    SUBCASE("bad value")
-    {
-      int argc = 4;
-      char* argv[argc] = {app_name, "-m", model_name, "--max-threads", "bad"};
-
-      // NOTE: Empty message is not helpful
-      //
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), "", PerfAnalyzerException);
-
-      // BUG: Dumping string "option '--max-threads' requires an argument"
-      // directly to std::out, instead of through usage()
-      //
-      check_params = false;
-    }
-  }
-
-  SUBCASE("Option : --sequence-length")
-  {
-    SUBCASE("set to 2000")
-    {
-      int argc = 5;
-      char* argv[argc] = {
-          app_name, "-m", model_name, "--sequence-length", "2000"};
-
-      REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      CHECK(!parser.UsageCalled());
-
-      exp->sequence_length = 2000;
-    }
-
-    SUBCASE("set to 0")
-    {
-      int argc = 5;
-      char* argv[argc] = {app_name, "-m", model_name, "--sequence-length", "0"};
-
-      REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      CHECK(!parser.UsageCalled());
-
-      exp->sequence_length = 20;
-    }
-  }
-
-  SUBCASE("Option : --sequence-length-variation")
-  {
-    SUBCASE("non-negative")
-    {
-      int argc = 5;
-      char* argv[argc] = {
-          app_name, "-m", model_name, "--sequence-length-variation", "33.3"};
-
-      REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      CHECK(!parser.UsageCalled());
-
-      exp->sequence_length_variation = 33.3;
-    }
-
-    SUBCASE("negative")
-    {
-      int argc = 5;
-      char* argv[argc] = {
-          app_name, "-m", model_name, "--sequence-length-variation", "-10"};
-
-      expected_msg = CreateUsageMessage(
-          "--sequence-length-variation", "The value must be >= 0.0.");
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), expected_msg.c_str(),
-          PerfAnalyzerException);
-
-      check_params = false;
-    }
-  }
-
-  SUBCASE("Option : --percentile")
-  {
-    SUBCASE("set to 25")
-    {
-      int argc = 5;
-      char* argv[argc] = {app_name, "-m", model_name, "--percentile", "25"};
-
-      REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      CHECK(!parser.UsageCalled());
-
-      exp->percentile = 25;
-    }
-
-    SUBCASE("set to 225 - overflow check")
-    {
-      int argc = 5;
-      char* argv[argc] = {app_name, "-m", model_name, "--percentile", "225"};
-
-      expected_msg = CreateUsageMessage(
-          "--percentile",
-          "The value must be -1 for not reporting or in range (0, 100).");
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), expected_msg.c_str(),
-          PerfAnalyzerException);
-
-      check_params = false;
-    }
-
-    SUBCASE("set to -1 - use average latency")
-    {
-      int argc = 5;
-      char* argv[argc] = {app_name, "-m", model_name, "--percentile", "-1"};
-
-      REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      CHECK(!parser.UsageCalled());
-
-      exp->percentile = -1;
-    }
-  }
-
-  SUBCASE("Option : --data-directory")
-  {
-    SUBCASE("set to `/usr/data`")
-    {
-      int argc = 5;
-      char* argv[argc] = {
-          app_name, "-m", model_name, "--data-directory", "/usr/data"};
-
-      REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      CHECK(!parser.UsageCalled());
-
-      exp->user_data.push_back("/usr/data");
-    }
-
-    SUBCASE("call twice")
-    {
-      // QUESTION: Is this the expected behavior? There is not enough details in
-      // in the output. It is marked as deprecated, what does that mean? Is it
-      // used?
-      //
-      int argc = 7;
-      char* argv[argc] = {app_name,           "-m",        model_name,
-                          "--data-directory", "/usr/data", "--data-directory",
-                          "/another/dir"};
-
-      REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      CHECK(!parser.UsageCalled());
-
-      exp->user_data.push_back("/usr/data");
-      exp->user_data.push_back("/another/dir");
-    }
-  }
-
-  SUBCASE("Option : --sequence-id-range")
-  {
-    SUBCASE("One arg")
-    {
-      int argc = 5;
-      char* argv[argc] = {
-          app_name, "-m", model_name, "--sequence-id-range", "53"};
-
-      REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      CHECK(!parser.UsageCalled());
-
-      exp->start_sequence_id = 53;
-      exp->sequence_id_range = UINT32_MAX;
-    }
-    SUBCASE("Two args")
-    {
-      int argc = 5;
-      char* argv[argc] = {
-          app_name, "-m", model_name, "--sequence-id-range", "53:67"};
-
-      REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      CHECK(!parser.UsageCalled());
-
-      exp->start_sequence_id = 53;
-      exp->sequence_id_range = 14;
-    }
-    SUBCASE("Three args")
-    {
-      int argc = 5;
-      char* argv[argc] = {
-          app_name, "-m", model_name, "--sequence-id-range", "53:67:92"};
-
-      expected_msg = CreateUsageMessage(
-          "--sequence-id-range", "The value does not match <start:end>.");
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), expected_msg.c_str(),
-          PerfAnalyzerException);
-
-      check_params = false;
-    }
-    SUBCASE("Not a number")
-    {
-      int argc = 5;
-      char* argv[argc] = {
-          app_name, "-m", model_name, "--sequence-id-range", "BAD"};
-
-      expected_msg = CreateUsageMessage(
-          "--sequence-id-range", "Invalid value provided: BAD");
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), expected_msg.c_str(),
-          PerfAnalyzerException);
-
-      check_params = false;  // Usage message called
-    }
-    SUBCASE("Not a number 2")
-    {
-      int argc = 5;
-      char* argv[argc] = {
-          app_name, "-m", model_name, "--sequence-id-range", "53:BAD"};
-
-      expected_msg = CreateUsageMessage(
-          "--sequence-id-range", "Invalid value provided: 53:BAD");
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), expected_msg.c_str(),
-          PerfAnalyzerException);
-
-      check_params = false;  // Usage message called
-    }
-  }
-
-
-  SUBCASE("Option : --input-tensor-format")
-  {
-    SUBCASE("binary")
-    {
-      int argc = 5;
-      char* argv[argc] = {
-          app_name, "-m", model_name, "--input-tensor-format", "binary"};
-
-      REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      CHECK(!parser.UsageCalled());
-
-      exp->input_tensor_format = cb::TensorFormat::BINARY;
-    }
-    SUBCASE("json")
-    {
-      int argc = 5;
-      char* argv[argc] = {
-          app_name, "-m", model_name, "--input-tensor-format", "json"};
-
-      REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      CHECK(!parser.UsageCalled());
-
-      exp->input_tensor_format = cb::TensorFormat::JSON;
-    }
-    SUBCASE("invalid")
-    {
-      int argc = 5;
-      char* argv[argc] = {
-          app_name, "-m", model_name, "--input-tensor-format", "invalid"};
-
-      expected_msg = CreateUsageMessage(
-          "--input-tensor-format",
-          "Unsupported type provided: 'invalid'. The available options are "
-          "'binary' or 'json'.");
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), expected_msg.c_str(),
-          PerfAnalyzerException);
-
-      check_params = false;
-    }
-  }
-
-
-  SUBCASE("Option : --shape")
-  {
-    SUBCASE("expected input, single shape")
-    {
-      int argc = 5;
-      char* argv[argc] = {
-          app_name, "-m", model_name, "--shape", "input_name:1,2,3"};
-
-      REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      CHECK(!parser.UsageCalled());
-
-      exp->input_shapes.emplace(
-          std::string("input_name"), std::vector<int64_t>{1, 2, 3});
-    }
-
-    SUBCASE("expected input, multiple shapes")
-    {
-      int argc = 9;
-      char* argv[argc] = {
-          app_name,
-          "-m",
-          model_name,
-          "--shape",
-          "input_name:1,2,3",
-          "--shape",
-          "alpha:10,24",
-          "--shape",
-          "beta:10,200,34,15,9000"};
-
-      REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      CHECK(!parser.UsageCalled());
-
-      exp->input_shapes.emplace(
-          std::string("input_name"), std::vector<int64_t>{1, 2, 3});
-      exp->input_shapes.emplace(
-          std::string("alpha"), std::vector<int64_t>{10, 24});
-      exp->input_shapes.emplace(
-          std::string("beta"), std::vector<int64_t>{10, 200, 34, 15, 9000});
-    }
-
-    SUBCASE("using negative dims")
-    {
-      int argc = 5;
-      char* argv[argc] = {
-          app_name, "-m", model_name, "--shape", "input_name:-1,2,3"};
-
-      expected_msg = CreateUsageMessage(
-          "--shape", "The dimensions of input tensor must be > 0.");
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), expected_msg.c_str(),
-          PerfAnalyzerException);
-
-      check_params = false;
-    }
-
-    SUBCASE("equals sign, not colon")
-    {
-      int argc = 5;
-      char* argv[argc] = {
-          app_name, "-m", model_name, "--shape", "input_name=-1,2,3"};
-
-      expected_msg = CreateUsageMessage(
-          "--shape", "There must be a colon after input name.");
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), expected_msg.c_str(),
-          PerfAnalyzerException);
-
-      check_params = false;
-    }
-
-    SUBCASE("missing shape")
-    {
-      int argc = 5;
-      char* argv[argc] = {app_name, "-m", model_name, "--shape", "input_name"};
-
-      expected_msg = CreateUsageMessage(
-          "--shape", "There must be a colon after input name.");
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), expected_msg.c_str(),
-          PerfAnalyzerException);
-
-      check_params = false;
-    }
-
-    SUBCASE("missing colon")
-    {
-      int argc = 5;
-      char* argv[argc] = {
-          app_name, "-m", model_name, "--shape", "input_name1,2,3"};
-
-      expected_msg = CreateUsageMessage(
-          "--shape", "There must be a colon after input name.");
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), expected_msg.c_str(),
-          PerfAnalyzerException);
-
-      check_params = false;
-    }
-
-    SUBCASE("bad shapes - a,b,c")
-    {
-      int argc = 5;
-      char* argv[argc] = {
-          app_name, "-m", model_name, "--shape", "input_name:a,b,c"};
-
-      expected_msg = CreateUsageMessage(
-          "--shape", "Invalid value provided: input_name:a,b,c");
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), expected_msg.c_str(),
-          PerfAnalyzerException);
-
-      check_params = false;  // Usage message called
-    }
-
-    SUBCASE("bad shapes - [1,2,3]")
-    {
-      int argc = 5;
-      char* argv[argc] = {
-          app_name, "-m", model_name, "--shape", "input_name:[1,2,3]"};
-
-      expected_msg = CreateUsageMessage(
-          "--shape", "Invalid value provided: input_name:[1,2,3]");
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), expected_msg.c_str(),
-          PerfAnalyzerException);
-
-      check_params = false;  // Usage message called
-    }
-  }
-
-  SUBCASE("Option : --measurement-interval")
-  {
-    SUBCASE("set to 500")
-    {
-      int argc = 5;
-      char* argv[argc] = {app_name, "-m", model_name, "", "500"};
-
-      SUBCASE("Long form")
-      {
-        argv[3] = "--measurement-interval";
-      }
-
-      SUBCASE("Short form")
-      {
-        argv[3] = "-p";
-      }
-
-      CAPTURE(argv[3]);
-
-      REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      CHECK(!parser.UsageCalled());
-
-      exp->measurement_window_ms = 500;
-    }
-
-    SUBCASE("set to -200")
-    {
-      int argc = 5;
-      char* argv[argc] = {app_name, "-m", model_name, "", "-200"};
-
-      SUBCASE("Long form")
-      {
-        argv[3] = "--measurement-interval";
-      }
-
-      SUBCASE("Short form")
-      {
-        argv[3] = "-p";
-      }
-
-      CAPTURE(argv[3]);
-
-      expected_msg = CreateUsageMessage(
-          "--measurement-interval (-p)", "The value must be > 0 msec.");
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), expected_msg.c_str(),
-          PerfAnalyzerException);
-
-      check_params = false;
-    }
-
-    SUBCASE("set to non-numeric value")
-    {
-      int argc = 5;
-      char* argv[argc] = {app_name, "-m", model_name, "", "foobar"};
-
-      SUBCASE("Long form")
-      {
-        argv[3] = "--measurement-interval";
-        expected_msg = CreateUsageMessage(
-            "--measurement-interval", "Invalid value provided: foobar");
-      }
-
-      SUBCASE("Short form")
-      {
-        argv[3] = "-p";
-        expected_msg =
-            CreateUsageMessage("-p", "Invalid value provided: foobar");
-      }
-
-      CAPTURE(argv[3]);
-
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), expected_msg.c_str(),
-          PerfAnalyzerException);
-
-      check_params = false;  // Usage message called
-    }
-  }
-
-  SUBCASE("Option : --concurrency-range")
-  {
-    char* option_name = "--concurrency-range";
-    uint64_t concurrency_range_start;
-    uint64_t concurrency_range_end;
-
-    SUBCASE("start provided")
-    {
-      concurrency_range_start = 100;
-      std::string concurrency_range_str =
-          std::to_string(concurrency_range_start);
-      args.push_back(option_name);
-      args.push_back(concurrency_range_str.data());  // start
-
-      int argc = args.size();
-      char* argv[argc];
-      std::copy(args.begin(), args.end(), argv);
-
-      REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      CHECK(!parser.UsageCalled());
-
-      exp->using_concurrency_range = true;
-      exp->concurrency_range.start = concurrency_range_start;
-      exp->max_threads = DEFAULT_MAX_THREADS;
-    }
-
-    CheckValidRange(
-        args, option_name, parser, act, exp->using_concurrency_range,
-        exp->concurrency_range, &(exp->max_threads));
-    CheckInvalidRange(args, option_name, parser, act, check_params);
-
-    SUBCASE("wrong separator")
-    {
-      args.push_back(option_name);
-      args.push_back("100,400,10");
-
-      int argc = args.size();
-      char* argv[argc];
-      std::copy(args.begin(), args.end(), argv);
-
-      REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      CHECK(!parser.UsageCalled());
-
-      // BUG (TMA-1307): Should detect this and through an error. User will
-      // enter this and have no clue why the end and step sizes are not used
-      // correctly.
-      //
-
-      check_params = false;
-    }
-
-    SUBCASE("invalid condition - end and latency threshold are 0")
-    {
-      args.push_back(option_name);
-      args.push_back("100:0:25");
-      args.push_back("--latency-threshold");
-      args.push_back("0");
-
-      int argc = args.size();
-      char* argv[argc];
-      std::copy(args.begin(), args.end(), argv);
-
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv),
-          "The end of the search range and the latency limit can not be both 0 "
-          "(or 0.0) simultaneously",
-          PerfAnalyzerException);
-
-      check_params = false;
-    }
-
-    concurrency_range_start = 10;
-    SUBCASE("Max threads set to default when concurrency-range.end < 16")
-    {
-      concurrency_range_end = 10;
-      std::string concurrency_range_str =
-          std::to_string(concurrency_range_start) + ":" +
-          std::to_string(concurrency_range_end);
-      args.push_back(option_name);
-      args.push_back(concurrency_range_str.data());
-
-      int argc = args.size();
-      char* argv[argc];
-      std::copy(args.begin(), args.end(), argv);
-
-      REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      CHECK(!parser.UsageCalled());
-
-      exp->using_concurrency_range = true;
-      exp->concurrency_range.start = concurrency_range_start;
-      exp->concurrency_range.end = concurrency_range_end;
-      exp->max_threads = DEFAULT_MAX_THREADS;
-    }
-
-    SUBCASE("Max_threads set to default when concurrency-range.end = 16")
-    {
-      concurrency_range_end = 16;
-      std::string concurrency_range_str =
-          std::to_string(concurrency_range_start) + ":" +
-          std::to_string(concurrency_range_end);
-      args.push_back(option_name);
-      args.push_back(concurrency_range_str.data());
-
-      int argc = args.size();
-      char* argv[argc];
-      std::copy(args.begin(), args.end(), argv);
-
-      REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      CHECK(!parser.UsageCalled());
-
-      exp->using_concurrency_range = true;
-      exp->concurrency_range.start = concurrency_range_start;
-      exp->concurrency_range.end = concurrency_range_end;
-      exp->max_threads = DEFAULT_MAX_THREADS;
-    }
-
-    SUBCASE(
-        "Max_threads set to concurrency-range.end when concurrency-range.end > "
-        "16")
-    {
-      concurrency_range_end = 40;
-      std::string concurrency_range_str =
-          std::to_string(concurrency_range_start) + ":" +
-          std::to_string(concurrency_range_end);
-      args.push_back(option_name);
-      args.push_back(concurrency_range_str.data());
-
-      int argc = args.size();
-      char* argv[argc];
-      std::copy(args.begin(), args.end(), argv);
-
-      REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      CHECK(!parser.UsageCalled());
-
-      exp->using_concurrency_range = true;
-      exp->concurrency_range.start = concurrency_range_start;
-      exp->concurrency_range.end = concurrency_range_end;
-      exp->max_threads = exp->concurrency_range.end;
-    }
-  }
-
-  SUBCASE("Option : --periodic-concurrency-range")
-  {
-    char* option_name = "--periodic-concurrency-range";
-
-    // Add required args that specifies where to dump profiled data
-    args.insert(
-        args.end(), {"-i", "grpc", "--async", "--streaming",
-                     "--profile-export-file", "profile.json"});
-    exp->protocol = cb::ProtocolType::GRPC;
-    exp->async = true;
-    exp->streaming = true;
-    exp->url = "localhost:8001";  // gRPC url
-
-    SUBCASE("start provided")
-    {
-      args.push_back(option_name);
-      args.push_back("100");  // start
-
-      int argc = args.size();
-      char* argv[argc];
-      std::copy(args.begin(), args.end(), argv);
-
-      expected_msg = CreateUsageMessage(
-          option_name, "Both <start> and <end> values must be provided.");
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), expected_msg.c_str(),
-          PerfAnalyzerException);
-
-      check_params = false;
-    }
-
-    exp->max_threads = 400;
-
-    CheckValidRange(
-        args, option_name, parser, act, exp->is_using_periodic_concurrency_mode,
-        exp->periodic_concurrency_range, &(exp->max_threads));
-
-    CheckInvalidRange(args, option_name, parser, act, check_params);
-
-    SUBCASE("more than one load mode")
-    {
-      args.push_back(option_name);
-      args.push_back("100:400");
-      args.push_back("--concurrency-range");
-      args.push_back("10:40");
-
-      int argc = args.size();
-      char* argv[argc];
-      std::copy(args.begin(), args.end(), argv);
-
-      expected_msg =
-          "Cannot specify more then one inference load mode. Please choose "
-          "only one of the following modes: --concurrency-range, "
-          "--periodic-concurrency-range, --request-rate-range, or "
-          "--request-intervals.";
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), expected_msg.c_str(),
-          PerfAnalyzerException);
-
-      check_params = false;
-    }
-
-    SUBCASE("no export file specified")
-    {
-      // Remove the export file args
-      args.pop_back();
-      args.pop_back();
-
-      args.push_back(option_name);
-      args.push_back("100:400");
-
-      int argc = args.size();
-      char* argv[argc];
-      std::copy(args.begin(), args.end(), argv);
-
-      expected_msg =
-          "Must provide --profile-export-file when using the "
-          "--periodic-concurrency-range option.";
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), expected_msg.c_str(),
-          PerfAnalyzerException);
-
-      check_params = false;
-    }
-
-    SUBCASE("step is not factor of range size")
-    {
-      args.push_back(option_name);
-      args.push_back("100:400:7");
-
-      int argc = args.size();
-      char* argv[argc];
-      std::copy(args.begin(), args.end(), argv);
-
-      expected_msg = CreateUsageMessage(
-          option_name,
-          "The <step> value must be a factor of the range size (<end> - "
-          "<start>).");
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), expected_msg.c_str(),
-          PerfAnalyzerException);
-
-      check_params = false;
-    }
-
-    SUBCASE("step is zero")
-    {
-      args.push_back(option_name);
-      args.push_back("10:400:0");
-
-      int argc = args.size();
-      char* argv[argc];
-      std::copy(args.begin(), args.end(), argv);
-
-      expected_msg =
-          CreateUsageMessage(option_name, "The <step> value must be > 0.");
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), expected_msg.c_str(),
-          PerfAnalyzerException);
-
-      check_params = false;
-    }
-  }
-
-  SUBCASE("Option : --request-period")
-  {
-    expected_msg =
-        CreateUsageMessage("--request-period", "The value must be > 0");
-    CHECK_INT_OPTION("--request-period", exp->request_period, expected_msg);
-
-    SUBCASE("set to 0")
-    {
-      args.push_back("--request-period");
-      args.push_back("0");
-
-      int argc = args.size();
-      char* argv[argc];
-      std::copy(args.begin(), args.end(), argv);
-
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), expected_msg.c_str(),
-          PerfAnalyzerException);
-
-      check_params = false;
-    }
-  }
-
-  SUBCASE("Option : --request-parameter")
-  {
-    char* option_name = "--request-parameter";
-
-    // Add required args that specifies where to dump profiled data
-    args.insert(args.end(), {"-i", "grpc", "--async", "--streaming"});
-    exp->protocol = cb::ProtocolType::GRPC;
-    exp->async = true;
-    exp->streaming = true;
-    exp->url = "localhost:8001";  // gRPC url
-
-    SUBCASE("valid parameter")
-    {
-      args.push_back(option_name);
-      args.push_back("max_tokens:256:int");
-
-      int argc = args.size();
-      char* argv[argc];
-      std::copy(args.begin(), args.end(), argv);
-
-      REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      CHECK(!parser.UsageCalled());
-
-      cb::RequestParameter param;
-      param.value = "256";
-      param.type = "int";
-      exp->request_parameters["max_tokens"] = param;
-    }
-
-    SUBCASE("missing type")
-    {
-      args.push_back(option_name);
-      args.push_back("max_tokens:256");
-
-      int argc = args.size();
-      char* argv[argc];
-      std::copy(args.begin(), args.end(), argv);
-
-      expected_msg = CreateUsageMessage(
-          option_name, "The value does not match <name:value:type>.");
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), expected_msg.c_str(),
-          PerfAnalyzerException);
-
-      check_params = false;
-    }
-  }
-
-  SUBCASE("Option : --latency-threshold")
-  {
-    expected_msg = CreateUsageMessage(
-        "--latency-threshold (-l)", "The value must be >= 0 msecs.");
-    CHECK_INT_OPTION(
-        "--latency-threshold", exp->latency_threshold_ms, expected_msg);
-
-    SUBCASE("set to 0")
-    {
-      int argc = 5;
-      char* argv[argc] = {
-          app_name, "-m", model_name, "--latency-threshold", "0"};
-
-      REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      CHECK(!parser.UsageCalled());
-    }
-  }
-
-  SUBCASE("Option : --stability-percentage")
-  {
-    SUBCASE("valid value")
-    {
-      int argc = 5;
-      char* argv[argc] = {
-          app_name, "-m", model_name, "--stability-percentage", "80"};
-
-      REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      CHECK(!parser.UsageCalled());
-
-      exp->stability_threshold = .8f;
-    }
-
-    SUBCASE("set to 0")
-    {
-      int argc = 5;
-      char* argv[argc] = {
-          app_name, "-m", model_name, "--stability-percentage", "0"};
-
-      REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      CHECK(!parser.UsageCalled());
-    }
-
-    SUBCASE("negative value")
-    {
-      int argc = 5;
-      char* argv[argc] = {
-          app_name, "-m", model_name, "--stability-percentage", "-20"};
-
-      expected_msg = CreateUsageMessage(
-          "--stability-percentage (-s)", "The value must be >= 0.0.");
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), expected_msg.c_str(),
-          PerfAnalyzerException);
-
-      check_params = false;
-    }
-
-    SUBCASE("floating point value")
-    {
-      int argc = 5;
-      char* argv[argc] = {
-          app_name, "-m", model_name, "--stability-percentage", "29.5"};
-
-      REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      CHECK(!parser.UsageCalled());
-
-      exp->stability_threshold = .295f;
-    }
-
-    SUBCASE("missing value")
-    {
-      int argc = 4;
-      char* argv[argc] = {app_name, "-m", model_name, "--stability-percentage"};
-
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), "", PerfAnalyzerException);
-
-      check_params = false;
-    }
-  }
-
-  SUBCASE("Option : --max-trials")
-  {
-    expected_msg =
-        CreateUsageMessage("--max-trials (-r)", "The value must be > 0.");
-    CHECK_INT_OPTION("--max-trials", exp->max_trials, expected_msg);
-
-    SUBCASE("set to 0")
-    {
-      int argc = 5;
-      char* argv[argc] = {app_name, "-m", model_name, "--max-trials", "0"};
-
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), expected_msg.c_str(),
-          PerfAnalyzerException);
-
-      check_params = false;
-    }
-  }
-
-  SUBCASE("Option : --request-count")
-  {
-    SUBCASE("valid value")
-    {
-      int argc = 5;
-      char* argv[argc] = {app_name, "-m", model_name, "--request-count", "500"};
-
-      REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      CHECK(!parser.UsageCalled());
-
-      exp->request_count = 500;
-      exp->measurement_mode = MeasurementMode::COUNT_WINDOWS;
-      exp->measurement_request_count = 500;
-    }
-    SUBCASE("negative value")
-    {
-      int argc = 5;
-      char* argv[argc] = {app_name, "-m", model_name, "--request-count", "-2"};
-
-      expected_msg =
-          CreateUsageMessage("--request-count", "The value must be > 0.");
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), expected_msg.c_str(),
-          PerfAnalyzerException);
-      check_params = false;
-    }
-    SUBCASE("less than request rate")
-    {
-      int argc = 7;
-      char* argv[argc] = {app_name,   "-m",
-                          model_name, "--request-count",
-                          "2",        "--request-rate-range",
-                          "5"};
-
-      expected_msg = "request-count can not be less than request-rate";
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), expected_msg.c_str(),
-          PerfAnalyzerException);
-      check_params = false;
-    }
-    SUBCASE("less than concurrency")
-    {
-      int argc = 7;
-      char* argv[argc] = {app_name,   "-m",
-                          model_name, "--request-count",
-                          "2",        "--concurrency-range",
-                          "5"};
-
-      expected_msg = "request-count can not be less than concurrency";
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), expected_msg.c_str(),
-          PerfAnalyzerException);
-      check_params = false;
-    }
-    SUBCASE("multiple request rate")
-    {
-      int argc = 7;
-      char* argv[argc] = {app_name,   "-m",
-                          model_name, "--request-count",
-                          "20",       "--request-rate-range",
-                          "5:6:1"};
-
-      expected_msg =
-          "request-count not supported with multiple request-rate values in "
-          "one run";
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), expected_msg.c_str(),
-          PerfAnalyzerException);
-      check_params = false;
-    }
-    SUBCASE("multiple concurrency")
-    {
-      int argc = 7;
-      char* argv[argc] = {app_name,   "-m",
-                          model_name, "--request-count",
-                          "20",       "--concurrency-range",
-                          "5:6:1"};
-
-      expected_msg =
-          "request-count not supported with multiple concurrency values in "
-          "one run";
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), expected_msg.c_str(),
-          PerfAnalyzerException);
-      check_params = false;
-    }
-
-    SUBCASE("mode and count are overwritten with non-zero request-count")
-    {
-      int argc = 9;
-      char* argv[argc] = {
-          app_name,
-          "-m",
-          model_name,
-          "--request-count",
-          "2000",
-          "--measurement-mode",
-          "time_windows",
-          "measurement-request-count",
-          "30"};
-
-      REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      CHECK(!parser.UsageCalled());
-
-      exp->request_count = 2000;
-      exp->measurement_mode = MeasurementMode::COUNT_WINDOWS;
-      exp->measurement_request_count = 2000;
-    }
-    SUBCASE("zero value (no override to measurement mode)")
-    {
-      int argc = 7;
-      char* argv[argc] = {app_name,          "-m", model_name,
-                          "--request-count", "0",  "--measurement-mode",
-                          "time_windows"};
-
-      REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      CHECK(!parser.UsageCalled());
-
-      exp->request_count = 0;
-      exp->measurement_mode = MeasurementMode::TIME_WINDOWS;
-    }
-    SUBCASE("zero value (no override to measurement request count)")
-    {
-      int argc = 9;
-      char* argv[argc] = {
-          app_name,
-          "-m",
-          model_name,
-          "--request-count",
-          "0",
-          "--measurement-mode",
-          "count_windows",
-          "--measurement-request-count",
-          "50"};
-
-      REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      CHECK(!parser.UsageCalled());
-
-      exp->request_count = 0;
-      exp->measurement_mode = MeasurementMode::COUNT_WINDOWS;
-      exp->measurement_request_count = 50;
-    }
-  }
-
-  SUBCASE("Option : --collect-metrics")
-  {
-    SUBCASE("with --service-kind != triton")
-    {
-      int argc = 8;
-      char* argv[argc] = {
-          app_name,         "-m",        model_name, "--collect-metrics",
-          "--service-kind", "tfserving", "-i",       "grpc"};
-
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv),
-          "Server-side metric collection is only supported with Triton client "
-          "backend.",
-          PerfAnalyzerException);
-
-      check_params = false;
-    }
-  }
-
-  SUBCASE("Option : --metrics-url")
-  {
-    // missing --collect-metrics
-    int argc = 5;
-    char* argv[argc] = {
-        app_name, "-m", model_name, "--metrics-url", "localhost:8002/metrics"};
-
-    CHECK_THROWS_WITH_AS(
-        act = parser.Parse(argc, argv),
-        "Must specify --collect-metrics when using the --metrics-url option.",
-        PerfAnalyzerException);
-
-    check_params = false;
-  }
-
-  SUBCASE("Option : --metrics-interval")
-  {
-    SUBCASE("missing --collect-metrics")
-    {
-      int argc = 5;
-      char* argv[argc] = {
-          app_name, "-m", model_name, "--metrics-interval", "1000"};
-
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv),
-          "Must specify --collect-metrics when using the --metrics-interval "
-          "option.",
-          PerfAnalyzerException);
-
-      check_params = false;
-    }
-
-    SUBCASE("metrics interval 0")
-    {
-      int argc = 6;
-      char* argv[argc] = {
-          app_name, "-m", model_name, "--collect-metrics", "--metrics-interval",
-          "0"};
-
-      expected_msg = CreateUsageMessage(
-          "--metrics-interval", "The value must be > 0 msecs.");
-      CHECK_THROWS_WITH_AS(
-          act = parser.Parse(argc, argv), expected_msg.c_str(),
-          PerfAnalyzerException);
-
-      check_params = false;
-    }
-  }
-
-  SUBCASE("Option : --bls-composing-models")
-  {
-    int argc = 5;
-
-    SUBCASE("one model")
-    {
-      char* argv[argc] = {
-          app_name, "-m", model_name, "--bls-composing-models", "a"};
-      REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      CHECK(act->bls_composing_models.size() == 1);
-      CHECK_STRING(act->bls_composing_models[0].first, "a");
-      CHECK_STRING(act->bls_composing_models[0].second, "");
-    }
-    SUBCASE("lists with no version")
-    {
-      SUBCASE("a,b,c")
-      {
-        char* argv[argc] = {
-            app_name, "-m", model_name, "--bls-composing-models", "a,b,c"};
-        REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      }
-      SUBCASE("a, b, c")
-      {
-        char* argv[argc] = {
-            app_name, "-m", model_name, "--bls-composing-models", "a, b, c"};
-        REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      }
-      SUBCASE("a,b, c")
-      {
-        char* argv[argc] = {
-            app_name, "-m", model_name, "--bls-composing-models", "a,b, c"};
-        REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      }
-      SUBCASE("a, b,c")
-      {
-        char* argv[argc] = {
-            app_name, "-m", model_name, "--bls-composing-models", "a, b,c"};
-        REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      }
-      SUBCASE("a, b,  c")
-      {
-        char* argv[argc] = {
-            app_name, "-m", model_name, "--bls-composing-models", "a, b,  c"};
-        REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      }
-
-      CHECK(!parser.UsageCalled());
-      REQUIRE(act->bls_composing_models.size() == 3);
-      CHECK_STRING(act->bls_composing_models[0].first, "a");
-      CHECK_STRING(act->bls_composing_models[1].first, "b");
-      CHECK_STRING(act->bls_composing_models[2].first, "c");
-      CHECK_STRING(act->bls_composing_models[0].second, "");
-      CHECK_STRING(act->bls_composing_models[1].second, "");
-      CHECK_STRING(act->bls_composing_models[2].second, "");
-    }
-    SUBCASE("list with version")
-    {
-      SUBCASE("a:1,b:2,c:1")
-      {
-        char* argv[argc] = {
-            app_name, "-m", model_name, "--bls-composing-models",
-            "a:1,b:2,c:1"};
-        REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      }
-      SUBCASE("a:1, b:2, c:1")
-      {
-        char* argv[argc] = {
-            app_name, "-m", model_name, "--bls-composing-models",
-            "a:1, b:2, c:1"};
-        REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      }
-      SUBCASE("a:1,  b:2, c:1")
-      {
-        char* argv[argc] = {
-            app_name, "-m", model_name, "--bls-composing-models",
-            "a:1,  b:2, c:1"};
-        REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      }
-      SUBCASE("a:1 ,  b:2, c:1")
-      {
-        char* argv[argc] = {
-            app_name, "-m", model_name, "--bls-composing-models",
-            "a:1 ,  b:2, c:1"};
-        REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      }
-      CHECK(!parser.UsageCalled());
-      REQUIRE(act->bls_composing_models.size() == 3);
-      CHECK_STRING(act->bls_composing_models[0].first, "a");
-      CHECK_STRING(act->bls_composing_models[1].first, "b");
-      CHECK_STRING(act->bls_composing_models[2].first, "c");
-      CHECK_STRING(act->bls_composing_models[0].second, "1");
-      CHECK_STRING(act->bls_composing_models[1].second, "2");
-      CHECK_STRING(act->bls_composing_models[2].second, "1");
-    }
-    SUBCASE("list with some versions")
-    {
-      SUBCASE("a,b:3,c")
-      {
-        char* argv[argc] = {
-            app_name, "-m", model_name, "--bls-composing-models", "a,b:3,c"};
-        REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      }
-      CHECK(!parser.UsageCalled());
-      REQUIRE(act->bls_composing_models.size() == 3);
-      CHECK_STRING(act->bls_composing_models[0].first, "a");
-      CHECK_STRING(act->bls_composing_models[1].first, "b");
-      CHECK_STRING(act->bls_composing_models[2].first, "c");
-      CHECK_STRING(act->bls_composing_models[0].second, "");
-      CHECK_STRING(act->bls_composing_models[1].second, "3");
-      CHECK_STRING(act->bls_composing_models[2].second, "");
-    }
-    SUBCASE("multiple versions of the same model")
-    {
-      SUBCASE("a:1,b:2,a:2")
-      {
-        char* argv[argc] = {
-            app_name, "-m", model_name, "--bls-composing-models", "a:1,b,a:2"};
-        REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
-      }
-      CHECK(!parser.UsageCalled());
-      REQUIRE(act->bls_composing_models.size() == 3);
-      CHECK_STRING(act->bls_composing_models[0].first, "a");
-      CHECK_STRING(act->bls_composing_models[1].first, "b");
-      CHECK_STRING(act->bls_composing_models[2].first, "a");
-      CHECK_STRING(act->bls_composing_models[0].second, "1");
-      CHECK_STRING(act->bls_composing_models[1].second, "");
-      CHECK_STRING(act->bls_composing_models[2].second, "2");
-    }
-  }
-
-  if (check_params) {
-    if (act == nullptr) {
-      std::cerr
-          << "Error: Attempting to access `act` but was not initialized. Check "
-             "if the test cases are missing `check_params = false` statement."
-          << std::endl;
-      exit(1);
-    }
-    CHECK_PARAMS(act, exp);
-  }
-  optind = 1;  // Reset GotOpt index, needed to parse the next command line
-}
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/test_concurrency_manager.cc b/src/c++/perf_analyzer/test_concurrency_manager.cc
deleted file mode 100644
index 1941a018e..000000000
--- a/src/c++/perf_analyzer/test_concurrency_manager.cc
+++ /dev/null
@@ -1,941 +0,0 @@
-// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <future>
-#include <memory>
-
-#include "command_line_parser.h"
-#include "concurrency_manager.h"
-#include "doctest.h"
-#include "mock_client_backend.h"
-#include "mock_concurrency_worker.h"
-#include "mock_data_loader.h"
-#include "mock_infer_data_manager.h"
-#include "mock_model_parser.h"
-#include "mock_sequence_manager.h"
-#include "sequence_manager.h"
-#include "test_load_manager_base.h"
-#include "test_utils.h"
-
-namespace triton { namespace perfanalyzer {
-
-class TestConcurrencyManager : public TestLoadManagerBase,
-                               public ConcurrencyManager {
- public:
-  TestConcurrencyManager(
-      PerfAnalyzerParameters params, bool is_sequence_model = false,
-      bool is_decoupled_model = false, bool use_mock_infer = false)
-      : use_mock_infer_(use_mock_infer),
-        TestLoadManagerBase(params, is_sequence_model, is_decoupled_model),
-        ConcurrencyManager(
-            params.async, params.streaming, params.batch_size,
-            params.max_threads, params.max_concurrency,
-            params.shared_memory_type, params.output_shm_size, GetParser(),
-            GetFactory(), params.request_parameters)
-  {
-  }
-
-  std::shared_ptr<IWorker> MakeWorker(
-      std::shared_ptr<ThreadStat> thread_stat,
-      std::shared_ptr<ThreadConfig> thread_config) override
-  {
-    size_t id = workers_.size();
-
-    auto worker = std::make_shared<MockConcurrencyWorker>(
-        id, thread_stat, thread_config, parser_, data_loader_, factory_,
-        on_sequence_model_, async_, max_concurrency_, using_json_data_,
-        streaming_, batch_size_, wake_signal_, wake_mutex_, active_threads_,
-        execute_, infer_data_manager_, sequence_manager_);
-
-    if (use_mock_infer_) {
-      EXPECT_CALL(*worker, Infer())
-          .WillRepeatedly(testing::Invoke(
-              worker.get(), &MockConcurrencyWorker::EmptyInfer));
-    }
-    return worker;
-  }
-
-
-  void TestReconfigThreads(
-      const size_t concurrent_request_count, const size_t num_requests,
-      std::vector<ThreadConfig>& expected_configs)
-  {
-    ConcurrencyManager::ReconfigThreads(concurrent_request_count, num_requests);
-
-    auto expected_size = expected_configs.size();
-
-    // Check that the correct number of threads are created
-    //
-    CHECK(threads_.size() == expected_size);
-
-    // Check that threads_config has correct concurrency and seq stat index
-    // offset
-    for (auto i = 0; i < expected_configs.size(); i++) {
-      CHECK(
-          threads_config_[i]->concurrency_ == expected_configs[i].concurrency_);
-      CHECK(
-          threads_config_[i]->seq_stat_index_offset_ ==
-          expected_configs[i].seq_stat_index_offset_);
-      CHECK(
-          threads_config_[i]->num_requests_ ==
-          expected_configs[i].num_requests_);
-    }
-  }
-
-  void StopWorkerThreads() { LoadManager::StopWorkerThreads(); }
-
-  /// Test that the correct Infer function is called in the backend
-  ///
-  void TestInferType()
-  {
-    // FIXME TMA-982: This delay is to avoid deadlock. Investigate why delay is
-    // needed.
-    stats_->SetDelays({50});
-
-    ChangeConcurrencyLevel(params_.max_concurrency);
-
-    std::this_thread::sleep_for(std::chrono::milliseconds(500));
-
-    CheckInferType();
-  }
-
-  /// Test that the correct concurrency is maintained in the load manager
-  ///
-  void TestConcurrency(
-      size_t response_delay, std::chrono::milliseconds sleep_time)
-  {
-    stats_->SetDelays({response_delay});
-
-    ChangeConcurrencyLevel(params_.max_concurrency);
-    std::this_thread::sleep_for(sleep_time);
-
-    CheckConcurrency();
-  }
-
-  /// Test sequence handling
-  ///
-  void TestSequences()
-  {
-    size_t delay_ms = 10;
-    stats_->SetDelays({delay_ms});
-
-    auto stats = cb::InferStat();
-    double concurrency1 = params_.max_concurrency / 2;
-    double concurrency2 = params_.max_concurrency;
-    int sleep_ms = 500;
-
-    auto sleep_time = std::chrono::milliseconds(sleep_ms);
-    size_t expected_count1 = sleep_ms * concurrency1 / delay_ms;
-    size_t expected_count2 =
-        sleep_ms * concurrency2 / delay_ms + expected_count1;
-
-    // Run and check request rate 1
-    //
-    ChangeConcurrencyLevel(concurrency1);
-    std::this_thread::sleep_for(sleep_time);
-
-    stats = cb::InferStat();
-    GetAccumulatedClientStat(&stats);
-    CHECK(
-        stats.completed_request_count ==
-        doctest::Approx(expected_count1).epsilon(0.10));
-
-    PauseSequenceWorkers();
-    CheckSequences(concurrency1);
-
-    // Make sure that the client and the manager are in agreement on the request
-    // count in between rates
-    //
-    stats = cb::InferStat();
-    GetAccumulatedClientStat(&stats);
-    int client_total_requests = stats_->num_async_infer_calls +
-                                stats_->num_async_stream_infer_calls +
-                                stats_->num_infer_calls;
-    CHECK(stats.completed_request_count == client_total_requests);
-
-    ResetStats();
-
-    // Run and check request rate 2
-    //
-    ChangeConcurrencyLevel(concurrency2);
-    std::this_thread::sleep_for(sleep_time);
-
-    stats = cb::InferStat();
-    GetAccumulatedClientStat(&stats);
-    CHECK(
-        stats.completed_request_count ==
-        doctest::Approx(expected_count2).epsilon(0.10));
-
-    // Stop all threads and make sure everything is as expected
-    //
-    StopWorkerThreads();
-
-    CheckSequences(concurrency2);
-  }
-
-  /// Test that tries to find deadlocks and livelocks
-  ///
-  void TestTimeouts()
-  {
-    TestWatchDog watchdog(1000);
-    ChangeConcurrencyLevel(params_.max_concurrency);
-    std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    StopWorkerThreads();
-    watchdog.stop();
-  }
-
-  /// Test that idle time is tracked correctly
-  void TestOverhead()
-  {
-    stats_->SetDelays({1});
-    ChangeConcurrencyLevel(params_.max_concurrency);
-    std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    // During a run of 100 ms (100,000,000 ns), make sure that the idle time is
-    // at least 95% of that
-    //
-    auto idle_time_ns = GetIdleTime();
-    CHECK(idle_time_ns > 95000000);
-    StopWorkerThreads();
-  }
-
-  std::shared_ptr<ModelParser>& parser_{LoadManager::parser_};
-  std::shared_ptr<DataLoader>& data_loader_{LoadManager::data_loader_};
-  std::shared_ptr<SequenceManager>& sequence_manager_{
-      LoadManager::sequence_manager_};
-  bool& using_json_data_{LoadManager::using_json_data_};
-  bool& execute_{ConcurrencyManager::execute_};
-  size_t& batch_size_{LoadManager::batch_size_};
-  size_t& max_threads_{LoadManager::max_threads_};
-  std::shared_ptr<cb::ClientBackendFactory> factory_{
-      TestLoadManagerBase::factory_};
-  std::shared_ptr<IInferDataManager>& infer_data_manager_{
-      LoadManager::infer_data_manager_};
-
- private:
-  bool use_mock_infer_{false};
-
-  void CheckConcurrency()
-  {
-    if (params_.max_concurrency < 4) {
-      CHECK(stats_->num_active_infer_calls == params_.max_concurrency);
-    } else {
-      CHECK(
-          stats_->num_active_infer_calls ==
-          doctest::Approx(params_.max_concurrency).epsilon(0.25));
-    }
-  }
-
-
-  std::shared_ptr<SequenceManager> MakeSequenceManager(
-      const uint64_t start_sequence_id, const uint64_t sequence_id_range,
-      const size_t sequence_length, const bool sequence_length_specified,
-      const double sequence_length_variation, const bool using_json_data,
-      std::shared_ptr<DataLoader> data_loader) override
-  {
-    return std::make_shared<MockSequenceManager>(
-        start_sequence_id, sequence_id_range, sequence_length,
-        sequence_length_specified, sequence_length_variation, using_json_data,
-        data_loader);
-  }
-};
-
-/// Test that the correct Infer function is called in the backend
-///
-TEST_CASE("concurrency_infer_type")
-{
-  PerfAnalyzerParameters params{};
-
-  params.max_concurrency = 1;
-
-  SUBCASE("async_streaming")
-  {
-    params.async = true;
-    params.streaming = true;
-  }
-  SUBCASE("async_no_streaming")
-  {
-    params.async = true;
-    params.streaming = false;
-  }
-  SUBCASE("no_async_streaming")
-  {
-    params.async = false;
-    params.streaming = true;
-  }
-  SUBCASE("no_async_no_streaming")
-  {
-    params.async = false;
-    params.streaming = false;
-  }
-
-
-  TestConcurrencyManager tcm(params);
-
-  tcm.InitManager(
-      params.string_length, params.string_data, params.zero_input,
-      params.user_data, params.start_sequence_id, params.sequence_id_range,
-      params.sequence_length, params.sequence_length_specified,
-      params.sequence_length_variation);
-
-  tcm.TestInferType();
-}
-
-/// Test that the correct concurrency is maintained in the load manager
-///
-TEST_CASE("concurrency_concurrency")
-{
-  PerfAnalyzerParameters params{};
-  size_t response_delay{50};
-  std::chrono::milliseconds sleep_time{225};
-
-  SUBCASE("sync, no-streaming, 1 concurrency, 1 thread")
-  {
-    params.forced_sync = true;
-    params.async = false;
-    params.streaming = false;
-    params.max_concurrency = 1;
-    params.max_threads = 1;
-  }
-
-  SUBCASE("sync, no-streaming, 4 concurrency, 4 threads")
-  {
-    params.forced_sync = true;
-    params.async = false;
-    params.streaming = false;
-    params.max_concurrency = 4;
-    params.max_threads = 4;
-  }
-
-  SUBCASE("async, no-streaming, 1 concurrency, 1 thread")
-  {
-    params.forced_sync = false;
-    params.async = true;
-    params.streaming = false;
-    params.max_concurrency = 1;
-    params.max_threads = 1;
-  }
-
-  SUBCASE("async, no-streaming, 4 concurrency, 1 thread")
-  {
-    params.forced_sync = false;
-    params.async = true;
-    params.streaming = false;
-    params.max_concurrency = 4;
-    params.max_threads = 1;
-  }
-
-  SUBCASE("async, no-streaming, 4 concurrency, 2 threads")
-  {
-    params.forced_sync = false;
-    params.async = true;
-    params.streaming = false;
-    params.max_concurrency = 4;
-    params.max_threads = 2;
-  }
-
-  SUBCASE("async, no-streaming, 4 concurrency, 4 threads")
-  {
-    params.forced_sync = false;
-    params.async = true;
-    params.streaming = false;
-    params.max_concurrency = 4;
-    params.max_threads = 4;
-  }
-
-  SUBCASE("async, streaming, 1 concurrency, 1 thread")
-  {
-    params.forced_sync = false;
-    params.async = true;
-    params.streaming = true;
-    params.max_concurrency = 1;
-    params.max_threads = 1;
-  }
-
-  SUBCASE("async, streaming, 4 concurrency, 1 thread")
-  {
-    params.forced_sync = false;
-    params.async = true;
-    params.streaming = true;
-    params.max_concurrency = 4;
-    params.max_threads = 1;
-  }
-
-  SUBCASE("async, streaming, 4 concurrency, 2 threads")
-  {
-    params.forced_sync = false;
-    params.async = true;
-    params.streaming = true;
-    params.max_concurrency = 4;
-    params.max_threads = 2;
-  }
-
-  SUBCASE("async, streaming, 4 concurrency, 4 threads")
-  {
-    params.forced_sync = false;
-    params.async = true;
-    params.streaming = true;
-    params.max_concurrency = 4;
-    params.max_threads = 4;
-  }
-
-
-  TestConcurrencyManager tcm(params);
-
-  tcm.InitManager(
-      params.string_length, params.string_data, params.zero_input,
-      params.user_data, params.start_sequence_id, params.sequence_id_range,
-      params.sequence_length, params.sequence_length_specified,
-      params.sequence_length_variation);
-
-  tcm.TestConcurrency(response_delay, sleep_time);
-}
-
-/// Check that the inference requests for sequences follow all rules and
-/// parameters
-///
-TEST_CASE("concurrency_sequence")
-{
-  PerfAnalyzerParameters params = TestLoadManagerBase::GetSequenceTestParams();
-  const bool is_sequence_model{true};
-
-  TestConcurrencyManager tcm(params, is_sequence_model);
-
-  tcm.InitManager(
-      params.string_length, params.string_data, params.zero_input,
-      params.user_data, params.start_sequence_id, params.sequence_id_range,
-      params.sequence_length, params.sequence_length_specified,
-      params.sequence_length_variation);
-  tcm.TestSequences();
-}
-
-/// Create the case where the sequences do NOT go round robin due to
-/// the first request taking longer than the rest.
-///
-/// This exposed a bug where we were constantly resetting ctx IDs
-/// and issuing over and over again to the first sequence even though
-/// it was the only sequence that should NOT be issued because it was
-/// still outstanding
-///
-TEST_CASE("concurrency_free_ctx_ids")
-{
-  PerfAnalyzerParameters params{};
-  params.async = true;
-  params.streaming = true;
-  params.max_concurrency = 6;
-
-  bool is_sequence_model{true};
-
-
-  TestConcurrencyManager tcm(params, is_sequence_model);
-
-
-  tcm.InitManager(
-      params.string_length, params.string_data, params.zero_input,
-      params.user_data, params.start_sequence_id, params.sequence_id_range,
-      params.sequence_length, params.sequence_length_specified,
-      params.sequence_length_variation);
-
-  // Have the first request (sequence ID 1) take very long, and all the other
-  // requests are fast
-  //
-  tcm.stats_->SetDelays({50, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5});
-
-  std::shared_ptr<ThreadStat> thread_stat{std::make_shared<ThreadStat>()};
-  std::shared_ptr<ThreadConfig> thread_config{
-      std::make_shared<ThreadConfig>(0)};
-  thread_config->concurrency_ = 4;
-
-  std::shared_ptr<IWorker> worker{tcm.MakeWorker(thread_stat, thread_config)};
-
-  std::future<void> infer_future{std::async(&IWorker::Infer, worker)};
-
-  std::this_thread::sleep_for(std::chrono::milliseconds(15));
-
-  early_exit = true;
-  infer_future.get();
-
-  // The first sequence should only be called two times, once at the very start,
-  // and once during shutdown
-  //
-  CHECK(tcm.stats_->sequence_status.seq_ids_to_count.at(1) == 2);
-}
-
-TEST_CASE("Concurrency - shared memory infer input calls")
-{
-  PerfAnalyzerParameters params{};
-  params.max_concurrency = 4;
-  bool is_sequence_model{false};
-
-  const auto& ParameterizeAsyncAndStreaming{[&]() {
-    SUBCASE("sync non-streaming")
-    {
-      params.async = false;
-      params.streaming = false;
-    }
-    SUBCASE("async non-streaming")
-    {
-      params.async = true;
-      params.streaming = false;
-    }
-    SUBCASE("async streaming")
-    {
-      params.async = true;
-      params.streaming = true;
-    }
-  }};
-
-  const auto& ParameterizeSequence{[&]() {
-    SUBCASE("non-sequence")
-    {
-      is_sequence_model = false;
-      ParameterizeAsyncAndStreaming();
-    }
-    SUBCASE("sequence")
-    {
-      is_sequence_model = true;
-      params.num_of_sequences = 1;
-      ParameterizeAsyncAndStreaming();
-    }
-  }};
-
-  const auto& ParameterizeMemory{[&]() {
-    SUBCASE("No shared memory")
-    {
-      params.shared_memory_type = NO_SHARED_MEMORY;
-      ParameterizeSequence();
-    }
-    SUBCASE("system shared memory")
-    {
-      params.shared_memory_type = SYSTEM_SHARED_MEMORY;
-      ParameterizeSequence();
-    }
-    SUBCASE("cuda shared memory")
-    {
-      params.shared_memory_type = CUDA_SHARED_MEMORY;
-      ParameterizeSequence();
-    }
-  }};
-
-  ParameterizeMemory();
-
-
-  const std::string json_str{R"(
-  {
-    "data": [
-      {
-        "INPUT0": [2000000000]
-      },
-      {
-        "INPUT0": [2000000001]
-      }
-    ]
-  }
-      )"};
-
-  MockInputPipeline mip =
-      TestLoadManagerBase::ProcessCustomJsonData(json_str, is_sequence_model);
-
-
-  TestConcurrencyManager tcm(params, is_sequence_model);
-
-  tcm.infer_data_manager_ =
-      MockInferDataManagerFactory::CreateMockInferDataManager(
-          params.max_threads, params.batch_size, params.shared_memory_type,
-          params.output_shm_size, params.request_parameters,
-          mip.mock_model_parser_, tcm.factory_, mip.mock_data_loader_);
-
-  std::shared_ptr<ThreadStat> thread_stat{std::make_shared<ThreadStat>()};
-  std::shared_ptr<ThreadConfig> thread_config{
-      std::make_shared<ThreadConfig>(0)};
-  thread_config->concurrency_ = 1;
-
-  tcm.parser_ = mip.mock_model_parser_;
-  tcm.data_loader_ = mip.mock_data_loader_;
-  tcm.using_json_data_ = true;
-  tcm.execute_ = true;
-  tcm.batch_size_ = 1;
-  tcm.max_threads_ = 1;
-
-  tcm.InitManager(
-      params.string_length, params.string_data, params.zero_input,
-      params.user_data, params.start_sequence_id, params.sequence_id_range,
-      params.sequence_length, params.sequence_length_specified,
-      params.sequence_length_variation);
-
-  std::shared_ptr<IWorker> worker{tcm.MakeWorker(thread_stat, thread_config)};
-  std::future<void> infer_future{std::async(&IWorker::Infer, worker)};
-
-  std::this_thread::sleep_for(std::chrono::milliseconds(18));
-
-  early_exit = true;
-  infer_future.get();
-
-  const auto& actual_append_raw_calls{tcm.stats_->num_append_raw_calls};
-  const auto& actual_set_shared_memory_calls{
-      tcm.stats_->num_set_shared_memory_calls};
-
-  if (params.shared_memory_type == NO_SHARED_MEMORY) {
-    CHECK(actual_append_raw_calls > 0);
-    CHECK(actual_set_shared_memory_calls == 0);
-  } else {
-    CHECK(actual_append_raw_calls == 0);
-    CHECK(actual_set_shared_memory_calls > 0);
-  }
-}
-
-/// Verify Shared Memory api calls
-///
-TEST_CASE("Concurrency - Shared memory methods")
-{
-  PerfAnalyzerParameters params;
-  bool is_sequence = false;
-  bool is_decoupled = false;
-  bool use_mock_infer = true;
-
-  const std::string json_str{R"(
-  {
-    "data": [
-      {
-        "INPUT0": [2123456789]
-      }
-    ]
-  }
-      )"};
-
-  MockInputPipeline mip = TestLoadManagerBase::ProcessCustomJsonData(json_str);
-
-  cb::MockClientStats::SharedMemoryStats expected_stats;
-
-  SUBCASE("System shared memory usage")
-  {
-    params.shared_memory_type = SYSTEM_SHARED_MEMORY;
-    TestConcurrencyManager tcm(
-        params, is_sequence, is_decoupled, use_mock_infer);
-
-    tcm.infer_data_manager_ =
-        MockInferDataManagerFactory::CreateMockInferDataManager(
-            params.max_threads, params.batch_size, params.shared_memory_type,
-            params.output_shm_size, params.request_parameters,
-            mip.mock_model_parser_, tcm.factory_, mip.mock_data_loader_);
-
-    tcm.InitManager(
-        params.string_length, params.string_data, params.zero_input,
-        params.user_data, params.start_sequence_id, params.sequence_id_range,
-        params.sequence_length, params.sequence_length_specified,
-        params.sequence_length_variation);
-
-    expected_stats.num_unregister_all_shared_memory_calls = 1;
-    expected_stats.num_register_system_shared_memory_calls = 1;
-    expected_stats.num_create_shared_memory_region_calls = 1;
-    expected_stats.num_map_shared_memory_calls = 1;
-    tcm.CheckSharedMemory(expected_stats);
-  }
-
-  SUBCASE("Cuda shared memory usage")
-  {
-    params.shared_memory_type = CUDA_SHARED_MEMORY;
-    TestConcurrencyManager tcm(
-        params, is_sequence, is_decoupled, use_mock_infer);
-
-    tcm.infer_data_manager_ =
-        MockInferDataManagerFactory::CreateMockInferDataManager(
-            params.max_threads, params.batch_size, params.shared_memory_type,
-            params.output_shm_size, params.request_parameters,
-            mip.mock_model_parser_, tcm.factory_, mip.mock_data_loader_);
-
-    tcm.InitManager(
-        params.string_length, params.string_data, params.zero_input,
-        params.user_data, params.start_sequence_id, params.sequence_id_range,
-        params.sequence_length, params.sequence_length_specified,
-        params.sequence_length_variation);
-
-    expected_stats.num_unregister_all_shared_memory_calls = 1;
-    expected_stats.num_register_cuda_shared_memory_calls = 1;
-    tcm.CheckSharedMemory(expected_stats);
-  }
-
-  SUBCASE("No shared memory usage")
-  {
-    params.shared_memory_type = NO_SHARED_MEMORY;
-    TestConcurrencyManager tcm(
-        params, is_sequence, is_decoupled, use_mock_infer);
-    tcm.infer_data_manager_ =
-        MockInferDataManagerFactory::CreateMockInferDataManager(
-            params.max_threads, params.batch_size, params.shared_memory_type,
-            params.output_shm_size, params.request_parameters,
-            mip.mock_model_parser_, tcm.factory_, mip.mock_data_loader_);
-    tcm.InitManager(
-        params.string_length, params.string_data, params.zero_input,
-        params.user_data, params.start_sequence_id, params.sequence_id_range,
-        params.sequence_length, params.sequence_length_specified,
-        params.sequence_length_variation);
-
-    tcm.CheckSharedMemory(expected_stats);
-  }
-}
-
-TEST_CASE("concurrency_deadlock")
-{
-  PerfAnalyzerParameters params{};
-  params.max_concurrency = 6;
-  bool is_sequence_model{true};
-  bool some_infer_failures{false};
-
-  const auto& ParameterizeSyncStreaming{[&]() {
-    SUBCASE("sync")
-    {
-      params.async = false;
-      params.streaming = false;
-    }
-    SUBCASE("aync no streaming")
-    {
-      params.async = true;
-      params.streaming = false;
-    }
-    SUBCASE("async streaming")
-    {
-      params.async = true;
-      params.streaming = true;
-    }
-  }};
-
-  const auto& ParameterizeConcurrency{[&]() {
-    SUBCASE("10 concurrency, 10 thread")
-    {
-      ParameterizeSyncStreaming();
-      params.max_concurrency = 10;
-      params.max_threads = 10;
-    }
-    SUBCASE("10 concurrency, 4 thread")
-    {
-      ParameterizeSyncStreaming();
-      params.max_concurrency = 10;
-      params.max_threads = 4;
-    }
-  }};
-
-  const auto& ParameterizeSequence{[&]() {
-    SUBCASE("non-sequence")
-    {
-      ParameterizeConcurrency();
-      is_sequence_model = false;
-    }
-    SUBCASE("sequence")
-    {
-      ParameterizeConcurrency();
-      is_sequence_model = true;
-    }
-  }};
-
-  const auto& ParameterizeFailures{[&]() {
-    SUBCASE("yes_failures")
-    {
-      some_infer_failures = true;
-      ParameterizeSequence();
-    }
-    SUBCASE("no_failures")
-    {
-      some_infer_failures = false;
-      ParameterizeSequence();
-    }
-  }};
-
-  std::vector<uint64_t> delays;
-
-  const auto& ParameterizeDelays{[&]() {
-    SUBCASE("no_delay")
-    {
-      delays = {0};
-      ParameterizeFailures();
-    }
-    SUBCASE("random_delay")
-    {
-      delays = {1, 5, 20, 4, 3};
-      ParameterizeFailures();
-    }
-  }};
-
-
-  ParameterizeDelays();
-
-
-  TestConcurrencyManager tcm(params, is_sequence_model);
-
-  tcm.InitManager(
-      params.string_length, params.string_data, params.zero_input,
-      params.user_data, params.start_sequence_id, params.sequence_id_range,
-      params.sequence_length, params.sequence_length_specified,
-      params.sequence_length_variation);
-
-  tcm.stats_->SetDelays(delays);
-
-  // Sometimes have a request fail
-  if (some_infer_failures) {
-    tcm.stats_->SetReturnStatuses({true, true, true, false});
-  }
-
-  tcm.TestTimeouts();
-}
-
-TEST_CASE("concurrency_overhead")
-{
-  PerfAnalyzerParameters params{};
-  SUBCASE("sync, conc 1")
-  {
-    params.async = false;
-    params.max_concurrency = 1;
-  }
-  SUBCASE("sync, conc 4")
-  {
-    params.async = false;
-    params.max_concurrency = 4;
-  }
-  SUBCASE("async, conc 1")
-  {
-    params.async = true;
-    params.max_concurrency = 1;
-  }
-  SUBCASE("async, conc 1")
-  {
-    params.async = true;
-    params.max_concurrency = 4;
-  }
-  TestConcurrencyManager tcm(params, false);
-  tcm.InitManager(
-      params.string_length, params.string_data, params.zero_input,
-      params.user_data, params.start_sequence_id, params.sequence_id_range,
-      params.sequence_length, params.sequence_length_specified,
-      params.sequence_length_variation);
-
-  tcm.TestOverhead();
-}
-
-TEST_CASE(
-    "send_request_rate_concurrency_manager: testing logic around detecting "
-    "send request count")
-{
-  PerfAnalyzerParameters params{};
-
-  SUBCASE("sync")
-  {
-    params.async = false;
-  }
-  SUBCASE("async")
-  {
-    params.async = true;
-  }
-
-  TestConcurrencyManager tcm(params);
-
-  tcm.stats_->SetDelays({10});
-
-  tcm.InitManager(
-      params.string_length, params.string_data, params.zero_input,
-      params.user_data, params.start_sequence_id, params.sequence_id_range,
-      params.sequence_length, params.sequence_length_specified,
-      params.sequence_length_variation);
-
-  tcm.ChangeConcurrencyLevel(4);
-  std::this_thread::sleep_for(std::chrono::milliseconds(100));
-  tcm.StopWorkerThreads();
-
-  const size_t num_sent_requests{tcm.GetAndResetNumSentRequests()};
-
-  CHECK(num_sent_requests == doctest::Approx(40).epsilon(0.1));
-}
-
-TEST_CASE(
-    "reconfigure_threads" *
-    doctest::description(
-        "This test confirms the side-effects of ReconfigThreads(). Namely, "
-        "that the correct number of threads are created and that they are "
-        "configured properly"))
-{
-  PerfAnalyzerParameters params{};
-  std::vector<ThreadConfig> expected_config_values;
-  std::vector<size_t> expected_concurrencies;
-  std::vector<size_t> expected_seq_stat_index_offsets;
-  std::vector<size_t> expected_num_requests;
-
-  size_t target_concurrency = 0;
-  size_t target_num_requests = 0;
-
-  SUBCASE("normal")
-  {
-    params.max_threads = 10;
-    target_concurrency = 5;
-    target_num_requests = 15;
-
-    expected_concurrencies = {1, 1, 1, 1, 1};
-    expected_seq_stat_index_offsets = {0, 1, 2, 3, 4};
-    expected_num_requests = {3, 3, 3, 3, 3};
-  }
-  SUBCASE("thread_limited")
-  {
-    params.max_threads = 5;
-    target_concurrency = 10;
-    target_num_requests = 20;
-
-    expected_concurrencies = {2, 2, 2, 2, 2};
-    expected_seq_stat_index_offsets = {0, 2, 4, 6, 8};
-    expected_num_requests = {4, 4, 4, 4, 4};
-  }
-  SUBCASE("unbalanced")
-  {
-    params.max_threads = 6;
-    target_concurrency = 14;
-    target_num_requests = 15;
-
-    expected_concurrencies = {3, 3, 2, 2, 2, 2};
-    expected_seq_stat_index_offsets = {0, 3, 6, 8, 10, 12};
-    expected_num_requests = {3, 3, 3, 2, 2, 2};
-  }
-  SUBCASE("no requests specified")
-  {
-    params.max_threads = 2;
-    target_concurrency = 14;
-    target_num_requests = 0;
-
-    expected_concurrencies = {7, 7};
-    expected_seq_stat_index_offsets = {0, 7};
-    expected_num_requests = {0, 0};
-  }
-
-  for (auto i = 0; i < expected_concurrencies.size(); i++) {
-    ThreadConfig tc(i);
-    tc.concurrency_ = expected_concurrencies[i];
-    tc.seq_stat_index_offset_ = expected_seq_stat_index_offsets[i];
-    tc.num_requests_ = expected_num_requests[i];
-    expected_config_values.push_back(tc);
-  }
-
-  TestConcurrencyManager tcm(params);
-  tcm.TestReconfigThreads(
-      target_concurrency, target_num_requests, expected_config_values);
-}
-
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/test_ctx_id_tracker.cc b/src/c++/perf_analyzer/test_ctx_id_tracker.cc
deleted file mode 100644
index 8625fbd6d..000000000
--- a/src/c++/perf_analyzer/test_ctx_id_tracker.cc
+++ /dev/null
@@ -1,146 +0,0 @@
-// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <cmath>
-#include <iostream>
-#include <memory>
-#include <numeric>
-
-#include "concurrency_ctx_id_tracker.h"
-#include "doctest.h"
-#include "fifo_ctx_id_tracker.h"
-#include "rand_ctx_id_tracker.h"
-
-namespace triton { namespace perfanalyzer {
-
-TEST_CASE("CtxIdTrackers: FIFO")
-{
-  std::shared_ptr<ICtxIdTracker> tracker = std::make_shared<FifoCtxIdTracker>();
-
-  // Reset will load up context IDs 0-9 into the queue and return them in order
-  // on consecutive Get calls
-  size_t count = 10;
-  CHECK_FALSE(tracker->IsAvailable());
-  tracker->Reset(count);
-  CHECK(tracker->IsAvailable());
-  for (size_t i = 0; i < count; i++) {
-    CHECK(tracker->Get() == i);
-  }
-
-  // Manually restoring values should be returned in-order
-  CHECK_FALSE(tracker->IsAvailable());
-  tracker->Restore(7);
-  CHECK(tracker->IsAvailable());
-  tracker->Restore(13);
-  CHECK(tracker->Get() == 7);
-  CHECK(tracker->Get() == 13);
-
-  // A reset should throw away any values on the old list
-  tracker->Reset(10);
-  tracker->Reset(1);
-  tracker->Get();
-  CHECK(!tracker->IsAvailable());
-
-  // Calling Get when not available should Throw
-  CHECK_THROWS_AS(tracker->Get(), const std::exception&);
-}
-
-TEST_CASE("CtxIdTrackers: Conc")
-{
-  std::shared_ptr<ICtxIdTracker> tracker =
-      std::make_shared<ConcurrencyCtxIdTracker>();
-
-  // Reset will load up 10 instances of context IDs 0 into the queue and return
-  // them in order on consecutive Get calls
-  size_t count = 10;
-  tracker->Reset(count);
-  for (size_t i = 0; i < count; i++) {
-    CHECK(tracker->Get() == 0);
-  }
-
-  // Manually restoring values should be returned in-order
-  CHECK_FALSE(tracker->IsAvailable());
-  tracker->Restore(7);
-  tracker->Restore(13);
-  CHECK(tracker->IsAvailable());
-  CHECK(tracker->Get() == 7);
-  CHECK(tracker->Get() == 13);
-
-  // A reset should throw away any values on the old list
-  tracker->Reset(10);
-  tracker->Reset(1);
-  tracker->Get();
-  CHECK(!tracker->IsAvailable());
-
-  // Calling Get when not available should Throw
-  CHECK_THROWS_AS(tracker->Get(), const std::exception&);
-}
-
-TEST_CASE("CtxIdTrackers: Rand")
-{
-  std::shared_ptr<ICtxIdTracker> tracker = std::make_shared<RandCtxIdTracker>();
-  size_t max;
-
-  auto check_range_and_variance = [&]() {
-    size_t num_trials = 1000;
-
-    std::vector<size_t> results(max, 0);
-    for (size_t i = 0; i < num_trials; i++) {
-      auto x = tracker->Get();
-      REQUIRE((x < max && x >= 0));
-      results[x]++;
-    }
-
-    // Confirm that the distribution of the picked CTX IDs is random
-    double mean =
-        std::accumulate(results.begin(), results.end(), 0.0) / results.size();
-    double variance = 0;
-    for (size_t i = 0; i < results.size(); i++) {
-      variance += std::pow(results[i] - mean, 2);
-    }
-    variance /= results.size();
-    CHECK((variance > 10 && variance < 100));
-  };
-
-  // IsAvailable is always true for this class
-  CHECK(tracker->IsAvailable());
-
-  // Reset should define the bounds of random CTX id picking
-  max = 10;
-  tracker->Reset(max);
-  // Restore should have no impact on this class.
-  tracker->Restore(9999);
-  check_range_and_variance();
-
-
-  // Reset should RE-define the bounds of random CTX id picking
-  max = 5;
-  tracker->Reset(max);
-  check_range_and_variance();
-}
-
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/test_custom_load_manager.cc b/src/c++/perf_analyzer/test_custom_load_manager.cc
deleted file mode 100644
index ced79af7d..000000000
--- a/src/c++/perf_analyzer/test_custom_load_manager.cc
+++ /dev/null
@@ -1,431 +0,0 @@
-// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <chrono>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "client_backend/client_backend.h"
-#include "constants.h"
-#include "custom_load_manager.h"
-#include "doctest.h"
-#include "mock_request_rate_worker.h"
-#include "request_rate_manager.h"
-#include "test_load_manager_base.h"
-
-using nanoseconds = std::chrono::nanoseconds;
-using milliseconds = std::chrono::milliseconds;
-
-namespace triton { namespace perfanalyzer {
-
-/// Class to test the CustomLoadManager
-///
-class TestCustomLoadManager : public TestLoadManagerBase,
-                              public CustomLoadManager {
- public:
-  TestCustomLoadManager() = default;
-
-  TestCustomLoadManager(
-      PerfAnalyzerParameters params, bool is_sequence_model = false,
-      bool is_decoupled_model = false, bool use_mock_infer = false)
-      : use_mock_infer_(use_mock_infer),
-        TestLoadManagerBase(params, is_sequence_model, is_decoupled_model),
-        CustomLoadManager(
-            params.async, params.streaming, "INTERVALS_FILE", params.batch_size,
-            params.measurement_window_ms, params.max_trials, params.max_threads,
-            params.num_of_sequences, params.shared_memory_type,
-            params.output_shm_size, params.serial_sequences, GetParser(),
-            GetFactory(), params.request_parameters)
-  {
-    InitManager(
-        params.string_length, params.string_data, params.zero_input,
-        params.user_data, params.start_sequence_id, params.sequence_id_range,
-        params.sequence_length, params.sequence_length_specified,
-        params.sequence_length_variation);
-  }
-
-  std::shared_ptr<IWorker> MakeWorker(
-      std::shared_ptr<ThreadStat> thread_stat,
-      std::shared_ptr<ThreadConfig> thread_config) override
-  {
-    size_t id = workers_.size();
-    auto worker = std::make_shared<MockRequestRateWorker>(
-        id, thread_stat, thread_config, parser_, data_loader_, factory_,
-        on_sequence_model_, async_, max_threads_, using_json_data_, streaming_,
-        batch_size_, wake_signal_, wake_mutex_, execute_, start_time_,
-        serial_sequences_, infer_data_manager_, sequence_manager_);
-
-    if (use_mock_infer_) {
-      EXPECT_CALL(*worker, Infer())
-          .WillRepeatedly(testing::Invoke(
-              worker.get(), &MockRequestRateWorker::EmptyInfer));
-    }
-    return worker;
-  }
-
-  void TestSchedule(
-      std::vector<uint64_t> intervals, PerfAnalyzerParameters params)
-  {
-    for (auto i : intervals) {
-      custom_intervals_.push_back(nanoseconds{i});
-    }
-    nanoseconds measurement_window_nanoseconds{
-        params.measurement_window_ms * NANOS_PER_MILLIS};
-    nanoseconds max_test_duration{
-        measurement_window_nanoseconds * params.max_trials};
-    nanoseconds expected_current_timestamp{0};
-    size_t intervals_index = 0;
-
-    PauseWorkers();
-    ConfigureThreads();
-    GenerateSchedule();
-
-    std::vector<nanoseconds> expected_timestamps;
-    std::vector<nanoseconds> observed_timestamps;
-
-    // Determine what the observed schedule was by getting each worker's
-    // schedule and then sorting them together
-    //
-    for (auto worker : workers_) {
-      nanoseconds observed_timestamp =
-          std::dynamic_pointer_cast<RequestRateWorker>(worker)
-              ->GetNextTimestamp();
-      while (observed_timestamp <= max_test_duration) {
-        observed_timestamps.push_back(observed_timestamp);
-        observed_timestamp =
-            std::dynamic_pointer_cast<RequestRateWorker>(worker)
-                ->GetNextTimestamp();
-      }
-    }
-    sort(observed_timestamps.begin(), observed_timestamps.end());
-
-    // Determine what the schedule "should" be
-    //
-    while (expected_current_timestamp < observed_timestamps.back()) {
-      expected_current_timestamp += custom_intervals_[intervals_index];
-      expected_timestamps.push_back(expected_current_timestamp);
-      intervals_index = (intervals_index + 1) % custom_intervals_.size();
-    }
-
-    // Confirm that the expected and observed schedules were the same
-    //
-    REQUIRE_MESSAGE(
-        observed_timestamps.size() == expected_timestamps.size(),
-        "Mismatch in size of schedules");
-
-    for (size_t i = 0; i < observed_timestamps.size(); i++) {
-      CHECK(observed_timestamps[i] == expected_timestamps[i]);
-    }
-  }
-
-  void TestSequences(
-      std::vector<uint64_t> intervals, bool check_sequences_balanced)
-  {
-    auto sleep_time = milliseconds(20);
-    for (auto i : intervals) {
-      custom_intervals_.push_back(nanoseconds{i});
-    }
-
-    PauseWorkers();
-    ConfigureThreads();
-    GenerateSchedule();
-    ResumeWorkers();
-    std::this_thread::sleep_for(sleep_time);
-    if (check_sequences_balanced) {
-      CheckSequenceBalance();
-    }
-    StopWorkerThreads();
-  }
-
-  std::shared_ptr<ModelParser>& parser_{LoadManager::parser_};
-  std::shared_ptr<cb::ClientBackendFactory>& factory_{
-      TestLoadManagerBase::factory_};
-
-  std::string& request_intervals_file_{
-      CustomLoadManager::request_intervals_file_};
-  NanoIntervals& custom_intervals_{CustomLoadManager::custom_intervals_};
-
-  cb::Error ReadTimeIntervalsFile(
-      const std::string& path, NanoIntervals* contents) override
-  {
-    return cb::Error::Success;
-  }
-
- private:
-  bool use_mock_infer_;
-};
-
-TEST_CASE("custom_load_schedule")
-{
-  PerfAnalyzerParameters params;
-  params.measurement_window_ms = 1000;
-  params.max_trials = 10;
-  bool is_sequence = false;
-  bool is_decoupled = false;
-  bool use_mock_infer = true;
-  std::vector<uint64_t> intervals;
-
-  const auto& ParameterizeIntervals{[&]() {
-    SUBCASE("intervals A")
-    {
-      intervals = {100000000, 110000000, 130000000};
-    }
-    SUBCASE("intervals B")
-    {
-      intervals = {150000000};
-    }
-    SUBCASE("intervals C")
-    {
-      intervals = {100000000, 110000000, 120000000, 130000000, 140000000};
-    }
-  }};
-
-  const auto& ParameterizeThreads{[&]() {
-    SUBCASE("threads 1")
-    {
-      ParameterizeIntervals();
-      params.max_threads = 1;
-    }
-    SUBCASE("threads 2")
-    {
-      ParameterizeIntervals();
-      params.max_threads = 2;
-    }
-    SUBCASE("threads 4")
-    {
-      ParameterizeIntervals();
-      params.max_threads = 4;
-    }
-    SUBCASE("threads 7")
-    {
-      ParameterizeIntervals();
-      params.max_threads = 7;
-    }
-  }};
-
-  const auto& ParameterizeTrials{[&]() {
-    SUBCASE("trials 3")
-    {
-      ParameterizeThreads();
-      params.max_trials = 3;
-    }
-    SUBCASE("trials 10")
-    {
-      ParameterizeThreads();
-      params.max_trials = 10;
-    }
-    SUBCASE("trials 20")
-    {
-      ParameterizeThreads();
-      params.max_trials = 20;
-    }
-  }};
-
-  const auto& ParameterizeMeasurementWindow{[&]() {
-    SUBCASE("window 1000")
-    {
-      ParameterizeTrials();
-      params.measurement_window_ms = 1000;
-    }
-    SUBCASE("window 10000")
-    {
-      ParameterizeTrials();
-      params.measurement_window_ms = 10000;
-    }
-    SUBCASE("window 500")
-    {
-      ParameterizeTrials();
-      params.measurement_window_ms = 500;
-    }
-  }};
-
-  const auto& ParameterizeSequences{[&]() {
-    SUBCASE("sequences off")
-    {
-      ParameterizeMeasurementWindow();
-      is_sequence = false;
-    }
-    SUBCASE("3 sequences")
-    {
-      ParameterizeMeasurementWindow();
-      is_sequence = true;
-      params.num_of_sequences = 3;
-    }
-    SUBCASE("6 sequences")
-    {
-      ParameterizeMeasurementWindow();
-      is_sequence = true;
-      params.num_of_sequences = 6;
-    }
-    SUBCASE("9 sequences")
-    {
-      ParameterizeMeasurementWindow();
-      is_sequence = true;
-      params.num_of_sequences = 9;
-    }
-  }};
-
-  ParameterizeSequences();
-  TestCustomLoadManager tclm(params, is_sequence, is_decoupled, use_mock_infer);
-  tclm.TestSchedule(intervals, params);
-}
-
-TEST_CASE("custom_load_sequences")
-{
-  PerfAnalyzerParameters params;
-
-  // This is needed so we can confirm that all sequences are being requested
-  // equally when serial_sequences is on. Otherwise we would keep creating new
-  // sequences and wouldn't be able to track it properly.
-  //
-  params.sequence_length = 1000;
-  bool is_sequence_model = true;
-  bool check_sequences_balanced = false;
-  std::vector<uint64_t> intervals;
-
-  const auto& ParameterizeIntervals{[&]() {
-    SUBCASE("intervals A")
-    {
-      intervals = {100000, 110000, 130000};
-    }
-    SUBCASE("intervals B")
-    {
-      intervals = {150000};
-    }
-    SUBCASE("intervals C")
-    {
-      intervals = {100000, 110000, 120000, 130000, 140000};
-    }
-  }};
-
-  const auto& ParameterizeSerialSequences{[&]() {
-    SUBCASE("serial_sequences")
-    {
-      ParameterizeIntervals();
-      params.serial_sequences = true;
-      check_sequences_balanced = true;
-    }
-    SUBCASE("not serial_sequences")
-    {
-      ParameterizeIntervals();
-      params.serial_sequences = false;
-      check_sequences_balanced = false;
-    }
-  }};
-
-  const auto& ParameterizeNumSequences{[&]() {
-    SUBCASE("2 sequences")
-    {
-      ParameterizeSerialSequences();
-      params.num_of_sequences = 2;
-    }
-    SUBCASE("3 sequences")
-    {
-      ParameterizeSerialSequences();
-      params.num_of_sequences = 3;
-    }
-    SUBCASE("5 sequences")
-    {
-      ParameterizeSerialSequences();
-      params.num_of_sequences = 5;
-    }
-    SUBCASE("6 sequences")
-    {
-      ParameterizeSerialSequences();
-      params.num_of_sequences = 6;
-    }
-    SUBCASE("9 sequences")
-    {
-      ParameterizeSerialSequences();
-      params.num_of_sequences = 9;
-    }
-  }};
-
-
-  const auto& ParameterizeThreads{[&]() {
-    SUBCASE("threads 1")
-    {
-      ParameterizeNumSequences();
-      params.max_threads = 1;
-    }
-    SUBCASE("threads 2")
-    {
-      ParameterizeNumSequences();
-      params.max_threads = 2;
-    }
-    SUBCASE("threads 4")
-    {
-      ParameterizeNumSequences();
-      params.max_threads = 4;
-    }
-    SUBCASE("threads 7")
-    {
-      ParameterizeNumSequences();
-      params.max_threads = 7;
-    }
-  }};
-
-  ParameterizeThreads();
-
-  TestCustomLoadManager tclm(params, is_sequence_model);
-  tclm.InitManager(
-      params.string_length, params.string_data, params.zero_input,
-      params.user_data, params.start_sequence_id, params.sequence_id_range,
-      params.sequence_length, params.sequence_length_specified,
-      params.sequence_length_variation);
-
-  tclm.TestSequences(intervals, check_sequences_balanced);
-}
-
-
-TEST_CASE("testing the GetCustomRequestRate function")
-{
-  TestCustomLoadManager tclm{};
-  double request_rate{0.0};
-
-  SUBCASE("custom_intervals_ empty")
-  {
-    cb::Error result{tclm.GetCustomRequestRate(&request_rate)};
-
-    CHECK(result.Err() == GENERIC_ERROR);
-    CHECK(result.Message() == "The custom intervals vector is empty");
-  }
-
-  SUBCASE("custom_intervals_ populated")
-  {
-    tclm.custom_intervals_.push_back(nanoseconds(100000000));
-    tclm.custom_intervals_.push_back(nanoseconds(110000000));
-    tclm.custom_intervals_.push_back(nanoseconds(130000000));
-
-    cb::Error result{tclm.GetCustomRequestRate(&request_rate)};
-
-    CHECK(result.Err() == SUCCESS);
-    CHECK(request_rate == doctest::Approx(8.0));
-  }
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/test_dataloader.cc b/src/c++/perf_analyzer/test_dataloader.cc
deleted file mode 100644
index c8db7df66..000000000
--- a/src/c++/perf_analyzer/test_dataloader.cc
+++ /dev/null
@@ -1,1639 +0,0 @@
-// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "data_loader.h"
-#include "doctest.h"
-#include "mock_data_loader.h"
-
-
-namespace triton { namespace perfanalyzer {
-
-/// Helper class for testing the DataLoader
-///
-class TestDataLoader {
- public:
-  // Static function to create a generic ModelTensor
-  //
-  static ModelTensor CreateTensor(std::string name)
-  {
-    ModelTensor t;
-    t.name_ = name;
-    t.datatype_ = "INT32";
-    t.shape_ = {1};
-    t.is_shape_tensor_ = false;
-    t.is_optional_ = false;
-    return t;
-  }
-};
-
-TEST_CASE("dataloader: no data")
-{
-  MockDataLoader dataloader;
-  CHECK(dataloader.GetDataStreamsCount() == 0);
-  cb::Error status = dataloader.ValidateIndexes(0, 0);
-  CHECK(status.IsOk() == false);
-}
-
-TEST_CASE("dataloader: ValidateIndexes")
-{
-  MockDataLoader dataloader;
-
-  // Pretend we loaded 2 streams, one with 1 step, one with 3 steps
-  dataloader.data_stream_cnt_ = 2;
-  dataloader.step_num_.push_back(1);
-  dataloader.step_num_.push_back(3);
-
-  CHECK_EQ(dataloader.GetDataStreamsCount(), 2);
-
-  // Step in range for stream 0
-  cb::Error status = dataloader.ValidateIndexes(0, 0);
-  CHECK(status.IsOk() == true);
-
-  // Step out of range for stream 0
-  status = dataloader.ValidateIndexes(0, 1);
-  CHECK(status.IsOk() == false);
-
-  // Step in range for stream 1
-  status = dataloader.ValidateIndexes(1, 2);
-  CHECK(status.IsOk() == true);
-
-  // Step out of range for stream 1
-  status = dataloader.ValidateIndexes(1, 3);
-  CHECK(status.IsOk() == false);
-
-  // Stream out of range
-  status = dataloader.ValidateIndexes(2, 0);
-  CHECK(status.IsOk() == false);
-}
-
-TEST_CASE("dataloader: GetTotalSteps")
-{
-  MockDataLoader dataloader;
-
-  // Pretend we loaded 2 streams, one with 1 step, one with 3 steps
-  dataloader.data_stream_cnt_ = 2;
-  dataloader.step_num_.push_back(1);
-  dataloader.step_num_.push_back(3);
-
-  CHECK_EQ(dataloader.GetTotalSteps(0), 1);
-  CHECK_EQ(dataloader.GetTotalSteps(1), 3);
-
-  // It will return 0 if out of range
-  CHECK_EQ(dataloader.GetTotalSteps(2), 0);
-}
-
-TEST_CASE("dataloader: ValidateIOExistsInModel")
-{
-  MockDataLoader dataloader;
-  std::shared_ptr<ModelTensorMap> inputs = std::make_shared<ModelTensorMap>();
-  std::shared_ptr<ModelTensorMap> outputs = std::make_shared<ModelTensorMap>();
-  ModelTensor input1 = TestDataLoader::CreateTensor("INPUT1");
-  ModelTensor output1 = TestDataLoader::CreateTensor("OUTPUT1");
-  inputs->insert(std::make_pair(input1.name_, input1));
-  outputs->insert(std::make_pair(output1.name_, output1));
-
-  SUBCASE("Directory does not exist")
-  {
-    std::string data_directory = "non_existent_directory";
-    cb::Error status =
-        dataloader.ValidateIOExistsInModel(inputs, outputs, data_directory);
-    CHECK(
-        status.Message() ==
-        "Error: Directory does not exist or is not a directory: "
-        "non_existent_directory");
-    CHECK(status.Err() == pa::GENERIC_ERROR);
-  }
-
-  SUBCASE("Directory is not a directory")
-  {
-    std::string data_directory = "tmp/test.txt";
-    std::ofstream file(data_directory);
-    cb::Error status =
-        dataloader.ValidateIOExistsInModel(inputs, outputs, data_directory);
-    CHECK(
-        status.Message() ==
-        "Error: Directory does not exist or is not a directory: tmp/test.txt");
-    CHECK(status.Err() == pa::GENERIC_ERROR);
-    std::remove(data_directory.c_str());
-  }
-
-  SUBCASE("Valid directory but no corresponding files")
-  {
-    std::string data_directory = "valid_directory";
-    std::filesystem::create_directory(data_directory);
-    std::ofstream(data_directory + "/invalid_file").close();
-    cb::Error status =
-        dataloader.ValidateIOExistsInModel(inputs, outputs, data_directory);
-    std::filesystem::remove_all(data_directory);
-    CHECK(
-        status.Message() ==
-        "Provided data file 'invalid_file' does not correspond to a valid "
-        "model input or output.");
-    CHECK(status.Err() == pa::GENERIC_ERROR);
-  }
-
-  SUBCASE("Valid directory with corresponding files")
-  {
-    std::string data_directory = "valid_directory";
-    std::filesystem::create_directory(data_directory);
-    std::ofstream(data_directory + "/INPUT1").close();
-    std::ofstream(data_directory + "/OUTPUT1").close();
-    cb::Error status =
-        dataloader.ValidateIOExistsInModel(inputs, outputs, data_directory);
-    std::filesystem::remove_all(data_directory);
-    CHECK(status.Message().empty());
-    CHECK(status.IsOk());
-  }
-
-  SUBCASE("Valid directory with multiple input and output tensors")
-  {
-    ModelTensor input2 = TestDataLoader::CreateTensor("INPUT2");
-    ModelTensor output2 = TestDataLoader::CreateTensor("OUTPUT2");
-
-    inputs->insert(std::make_pair(input2.name_, input2));
-    outputs->insert(std::make_pair(output2.name_, output2));
-
-    std::string data_directory = "valid_directory_multiple";
-    std::filesystem::create_directory(data_directory);
-    std::ofstream(data_directory + "/INPUT1").close();
-    std::ofstream(data_directory + "/INPUT2").close();
-    std::ofstream(data_directory + "/OUTPUT1").close();
-    std::ofstream(data_directory + "/OUTPUT2").close();
-
-    cb::Error status =
-        dataloader.ValidateIOExistsInModel(inputs, outputs, data_directory);
-    std::filesystem::remove_all(data_directory);
-    CHECK(status.Message().empty());
-    CHECK(status.IsOk());
-  }
-}
-
-TEST_CASE("dataloader: ReadDataFromJSON")
-{
-  DataLoader dataloader;
-  std::shared_ptr<ModelTensorMap> inputs = std::make_shared<ModelTensorMap>();
-  std::shared_ptr<ModelTensorMap> outputs = std::make_shared<ModelTensorMap>();
-  ModelTensor input1 = TestDataLoader::CreateTensor("INPUT1");
-  ModelTensor output1 = TestDataLoader::CreateTensor("OUTPUT1");
-
-  inputs->insert(std::make_pair(input1.name_, input1));
-  outputs->insert(std::make_pair(output1.name_, output1));
-
-  SUBCASE("File does not exist")
-  {
-    std::string json_file = "non_existent_file.json";
-    cb::Error status = dataloader.ReadDataFromJSON(inputs, outputs, json_file);
-    CHECK(status.Message() == "failed to open file for reading provided data");
-    CHECK(status.Err() == pa::GENERIC_ERROR);
-  }
-
-  SUBCASE("Valid JSON file")
-  {
-    std::string json_file = "valid_file.json";
-    std::ofstream out(json_file);
-    out << R"({
-                    "data": [
-                      { "INPUT1": [1] },
-                      { "INPUT1": [2] },
-                      { "INPUT1": [3] }
-                    ],
-                    "validation_data": [
-                      { "OUTPUT1": [4] },
-                      { "OUTPUT1": [5] },
-                      { "OUTPUT1": [6] }
-                  ]})";
-    out.close();
-
-    cb::Error status = dataloader.ReadDataFromJSON(inputs, outputs, json_file);
-    std::filesystem::remove(json_file);
-    CHECK(status.Message().empty());
-    CHECK(status.IsOk());
-  }
-
-  SUBCASE("Invalid JSON file")
-  {
-    std::string json_file = "invalid_file.json";
-    std::ofstream out(json_file);
-    out << R"({invalid_json: 1,)";
-    out.close();
-
-    cb::Error status = dataloader.ReadDataFromJSON(inputs, outputs, json_file);
-    std::filesystem::remove(json_file);
-
-    CHECK(
-        status.Message() ==
-        "failed to parse the specified json file for reading provided data");
-    CHECK(status.Err() == pa::GENERIC_ERROR);
-  }
-
-  SUBCASE("Multiple input and output tensors")
-  {
-    ModelTensor input2 = TestDataLoader::CreateTensor("INPUT2");
-    ModelTensor output2 = TestDataLoader::CreateTensor("OUTPUT2");
-
-    inputs->insert(std::make_pair(input2.name_, input2));
-    outputs->insert(std::make_pair(output2.name_, output2));
-
-    std::string json_file = "valid_file_multiple_input_output.json";
-    std::ofstream out(json_file);
-    out << R"({
-                "data": [
-                    {
-                        "INPUT1": [1],
-                        "INPUT2": [4]
-                    },
-                    {
-                        "INPUT1": [2],
-                        "INPUT2": [5]
-                    },
-                    {
-                        "INPUT1": [3],
-                        "INPUT2": [6]
-                    }
-                ],
-                "validation_data": [
-                    {
-                        "OUTPUT1": [4],
-                        "OUTPUT2": [7]
-                    },
-                    {
-                        "OUTPUT1": [5],
-                        "OUTPUT2": [8]
-                    },
-                    {
-                        "OUTPUT1": [6],
-                        "OUTPUT2": [9]
-                    }
-                ]
-              })";
-    out.close();
-
-    cb::Error status = dataloader.ReadDataFromJSON(inputs, outputs, json_file);
-    std::filesystem::remove(json_file);
-    CHECK(status.Message().empty());
-    CHECK(status.IsOk());
-  }
-}
-
-TEST_CASE("dataloader: GetInputData missing data")
-{
-  MockDataLoader dataloader;
-  ModelTensor input1 = TestDataLoader::CreateTensor("INPUT1");
-
-  TensorData data;
-
-  cb::Error status = dataloader.GetInputData(input1, 0, 0, data);
-  REQUIRE(status.IsOk() == false);
-  CHECK_EQ(status.Message(), "unable to find data for input 'INPUT1'.");
-}
-
-TEST_CASE("dataloader: ParseData: Bad Json")
-{
-  std::string json_str{"bad json text"};
-
-  MockDataLoader dataloader;
-  std::shared_ptr<ModelTensorMap> inputs = std::make_shared<ModelTensorMap>();
-  std::shared_ptr<ModelTensorMap> outputs = std::make_shared<ModelTensorMap>();
-
-  cb::Error status = dataloader.ReadDataFromStr(json_str, inputs, outputs);
-  CHECK(status.IsOk() == false);
-  CHECK_EQ(
-      status.Message(),
-      "failed to parse the specified json file for reading provided data");
-}
-
-TEST_CASE("dataloader: ParseData: Misc error cases")
-{
-  std::string expected_message;
-  std::string json_str;
-
-  SUBCASE("No data")
-  {
-    json_str = R"({ "notdata" : 5})";
-    expected_message = "The json file doesn't contain data field";
-  }
-  SUBCASE("Not string b64")
-  {
-    json_str = R"({"data": [{ "INPUT1": {"b64": 5} }]})";
-    expected_message =
-        "the value of b64 field should be of type string ( Location stream id: "
-        "0, step id: 0)";
-  }
-  SUBCASE("Not b64 or array")
-  {
-    json_str = R"({"data": [{ "INPUT1": {"not_b64": "AAAAAQ=="} }]})";
-    expected_message =
-        "missing content field. ( Location stream id: 0, step id: 0)";
-  }
-  SUBCASE("Malformed input (boolean type)")
-  {
-    json_str = R"({"data": [{ "INPUT1": null }]})";
-    expected_message = "Input data file is malformed.";
-  }
-  SUBCASE("Inconsistent elements in data array")
-  {
-    json_str = R"({"data": [
-      [{ "INPUT1": [2] },{ "INPUT1": [3] }],
-      { "INPUT1": [1] }
-    ]})";
-    expected_message =
-        "Inconsistency in input-data provided. Can not have a combination of "
-        "objects and arrays inside of the Data array";
-  }
-  SUBCASE("Not integer shape")
-  {
-    json_str = R"({"data": [{
-     "INPUT1": { "shape": ["a"], "content": [1,2,3,4,5,6] }
-    }]})";
-    expected_message = "shape values must be integers.";
-  }
-  SUBCASE("Content not array")
-  {
-    json_str = R"({"data": [{
-     "INPUT1": { "content": 6 }
-    }]})";
-    expected_message =
-        "The tensor values are not supported. Expected an array or b64 string "
-        "( Location stream id: 0, step id: 0)";
-  }
-  SUBCASE("Missing non-optional input")
-  {
-    json_str = R"({"data": [{
-     "NOT_INPUT1": { "content": 6 }
-    }]})";
-    expected_message =
-        "missing tensor INPUT1 ( Location stream id: 0, step id: 0)";
-  }
-  SUBCASE("Invalid input")
-  {
-    json_str = R"({"data":
-    [{
-     "INPUT1": [2],
-     "INVALID_INPUT": [2]
-    }]
-    })";
-    expected_message =
-        "The input or output 'INVALID_INPUT' is not found in the model "
-        "configuration";
-  }
-
-  MockDataLoader dataloader;
-  std::shared_ptr<ModelTensorMap> inputs = std::make_shared<ModelTensorMap>();
-  std::shared_ptr<ModelTensorMap> outputs = std::make_shared<ModelTensorMap>();
-  ModelTensor input1 = TestDataLoader::CreateTensor("INPUT1");
-  inputs->insert(std::make_pair(input1.name_, input1));
-
-  cb::Error status = dataloader.ReadDataFromStr(json_str, inputs, outputs);
-  CHECK(status.IsOk() == false);
-  CHECK_EQ(status.Message(), expected_message);
-}
-
-TEST_CASE(
-    "dataloader: ParseData: Mismatching Shapes" *
-    doctest::description(
-        "When the shape is provided and it is incompatible with the actual "
-        "model shape, then an error should be thrown"))
-{
-  ModelTensor input1 = TestDataLoader::CreateTensor("INPUT1");
-
-  std::string expected_message;
-  std::string json_str;
-
-  SUBCASE("Mismatching fixed shape")
-  {
-    input1.shape_ = {3};
-    expected_message =
-        "The supplied shape of [1] for input \"INPUT1\" is incompatible with "
-        "the "
-        "model's input shape of [3]";
-
-    SUBCASE("content json")
-    {
-      json_str =
-          R"({"data": [{ "INPUT1": { "shape": [1], "content": [1] } }]})";
-    }
-    SUBCASE("b64 json")
-    {
-      json_str =
-          R"({"data": [{ "INPUT1": { "shape": [1], "b64": "AAAAAQ=="} }]})";
-    }
-  }
-  SUBCASE("Mismatching dynamic dimensions")
-  {
-    input1.shape_ = {-1};
-    expected_message =
-        "The supplied shape of [1,1] for input \"INPUT1\" is incompatible with "
-        "the model's input shape of [-1]";
-
-    SUBCASE("content json")
-    {
-      json_str =
-          R"({"data": [{ "INPUT1": { "shape": [1,1], "content": [1] } }]})";
-    }
-    SUBCASE("b64 json")
-    {
-      json_str =
-          R"({"data": [{ "INPUT1": { "shape": [1,1], "b64": "AAAAAQ=="} }]})";
-    }
-  }
-  SUBCASE("Mismatching multiple dimensions")
-  {
-    input1.shape_ = {-1, 2};
-    expected_message =
-        "The supplied shape of [1,1] for input \"INPUT1\" is incompatible with "
-        "the model's input shape of [-1,2]";
-
-    SUBCASE("content json")
-    {
-      json_str =
-          R"({"data": [{ "INPUT1": { "shape": [1,1], "content": [1] } }]})";
-    }
-    SUBCASE("b64 json")
-    {
-      json_str =
-          R"({"data": [{ "INPUT1": { "shape": [1,1], "b64": "AAAAAQ=="} }]})";
-    }
-  }
-
-  MockDataLoader dataloader;
-  std::shared_ptr<ModelTensorMap> inputs = std::make_shared<ModelTensorMap>();
-  inputs->insert(std::make_pair(input1.name_, input1));
-
-  std::shared_ptr<ModelTensorMap> outputs = std::make_shared<ModelTensorMap>();
-
-  cb::Error status = dataloader.ReadDataFromStr(json_str, inputs, outputs);
-  REQUIRE(status.IsOk() == false);
-  CHECK_EQ(status.Message(), expected_message);
-}
-
-
-TEST_CASE(
-    "dataloader: ParseData: Mismatch Input Data and Fixed Shape" *
-    doctest::description(
-        "When the size of the provided Input is not in line with the Tensor's "
-        "shape, then an error should be thrown"))
-{
-  ModelTensor input1 = TestDataLoader::CreateTensor("INPUT1");
-  input1.shape_ = {3};
-
-  std::string expected_message;
-  std::string json_str;
-
-  SUBCASE("Normal json")
-  {
-    json_str = R"({"data": [{ "INPUT1": [1,2] }]})";
-    expected_message =
-        "mismatch in the data provided for INPUT1. Expected: 12 bytes, Got: 8 "
-        "bytes";
-  }
-  SUBCASE("content json")
-  {
-    json_str = R"({"data": [{ "INPUT1": { "content": [1,2] } }]})";
-    expected_message =
-        "mismatch in the data provided for INPUT1. Expected: 12 bytes, Got: 8 "
-        "bytes";
-  }
-  SUBCASE("b64 json")
-  {
-    json_str = R"({"data": [{ "INPUT1": {"b64": "AAAAAQ=="} }]})";
-    expected_message =
-        "mismatch in the data provided for INPUT1. Expected: 12 bytes, Got: 4 "
-        "bytes";
-  }
-
-  MockDataLoader dataloader;
-  std::shared_ptr<ModelTensorMap> inputs = std::make_shared<ModelTensorMap>();
-  inputs->insert(std::make_pair(input1.name_, input1));
-
-  std::shared_ptr<ModelTensorMap> outputs = std::make_shared<ModelTensorMap>();
-
-  cb::Error status = dataloader.ReadDataFromStr(json_str, inputs, outputs);
-  REQUIRE(status.IsOk() == false);
-  CHECK_EQ(status.Message(), expected_message);
-}
-
-TEST_CASE(
-    "dataloader: ParseData: Mismatch Input Data and Dynamic Shape" *
-    doctest::description(
-        "When the size of the provided Input is not in line with the Tensor's "
-        "shape, then an error should be thrown"))
-{
-  ModelTensor input1 = TestDataLoader::CreateTensor("INPUT1");
-  input1.shape_ = {-1};
-
-  std::string expected_message;
-  std::string json_str;
-
-  SUBCASE("content json")
-  {
-    json_str =
-        R"({"data": [{ "INPUT1": { "shape": [3], "content": [1,2] } }]})";
-    expected_message =
-        "mismatch in the data provided for INPUT1. Expected: 12 bytes, Got: 8 "
-        "bytes";
-  }
-  SUBCASE("b64 json")
-  {
-    json_str = R"({"data": [{ "INPUT1": {"shape": [3], "b64": "AAAAAQ=="} }]})";
-    expected_message =
-        "mismatch in the data provided for INPUT1. Expected: 12 bytes, Got: 4 "
-        "bytes";
-  }
-
-  MockDataLoader dataloader;
-  std::shared_ptr<ModelTensorMap> inputs = std::make_shared<ModelTensorMap>();
-  inputs->insert(std::make_pair(input1.name_, input1));
-
-  std::shared_ptr<ModelTensorMap> outputs = std::make_shared<ModelTensorMap>();
-
-  cb::Error status = dataloader.ReadDataFromStr(json_str, inputs, outputs);
-  REQUIRE(status.IsOk() == false);
-  CHECK_EQ(status.Message(), expected_message);
-}
-
-TEST_CASE(
-    "dataloader: ParseData: Mismatch Input and Output" *
-    doctest::description(
-        "When the size of the provided Input and validation Output data are "
-        "different, then an error should be thrown"))
-{
-  std::string json_str;
-
-  SUBCASE("Normal json")
-  {
-    json_str = R"({
-   "data": [
-     { "INPUT1": [1] },
-     { "INPUT1": [2] },
-     { "INPUT1": [3] }
-   ],
-   "validation_data": [
-     { "OUTPUT1": [7] }
-   ]})";
-  }
-  SUBCASE("content json")
-  {
-    json_str = R"({
-   "data": [
-     { "INPUT1": { "content": [1] } },
-     { "INPUT1": { "content": [2] } },
-     { "INPUT1": { "content": [3] } }
-   ],
-   "validation_data": [
-     { "OUTPUT1": { "content": [7] } }
-   ]})";
-  }
-  SUBCASE("b64 json")
-  {
-    json_str = R"({
-   "data": [
-     { "INPUT1": {"b64": "AAAAAQ=="} },
-     { "INPUT1": {"b64": "AgAAAA=="} },
-     { "INPUT1": {"b64": "AwAAAA=="} }
-   ],
-   "validation_data": [
-     { "OUTPUT1": {"b64": "BAAAAA=="} }
-   ]})";
-  }
-
-  MockDataLoader dataloader;
-  std::shared_ptr<ModelTensorMap> inputs = std::make_shared<ModelTensorMap>();
-  std::shared_ptr<ModelTensorMap> outputs = std::make_shared<ModelTensorMap>();
-
-  cb::Error status = dataloader.ReadDataFromStr(json_str, inputs, outputs);
-  CHECK(status.IsOk() == false);
-  CHECK_EQ(
-      status.Message(),
-      "The 'validation_data' field doesn't align with 'data' field in the json "
-      "file");
-}
-
-TEST_CASE("dataloader: ParseData: Valid Data")
-{
-  std::string json_str;
-
-  SUBCASE("Normal json")
-  {
-    json_str = R"({
-   "data": [
-     { "INPUT1": [1] },
-     { "INPUT1": [2] },
-     { "INPUT1": [3] }
-   ],
-   "validation_data": [
-    { "OUTPUT1": [4] },
-    { "OUTPUT1": [5] },
-    { "OUTPUT1": [6] }
-   ]})";
-  }
-  SUBCASE("Content json")
-  {
-    json_str = R"({
-   "data": [
-     { "INPUT1": { "content": [1] } },
-     { "INPUT1": { "content": [2] } },
-     { "INPUT1": { "content": [3] } }
-   ],
-   "validation_data": [
-     { "OUTPUT1": { "content": [4] } },
-     { "OUTPUT1": { "content": [5] } },
-     { "OUTPUT1": { "content": [6] } }
-   ]})";
-  }
-  SUBCASE("b64 json")
-  {
-    // Note that these encoded values decode to the numbers 1,2,3,4,5,6, which
-    // is the same data as the normal json case above
-    json_str = R"({
-   "data": [
-     { "INPUT1": {"b64": "AAAAAQ=="} },
-     { "INPUT1": {"b64": "AgAAAA=="} },
-     { "INPUT1": {"b64": "AwAAAA=="} }
-   ],
-   "validation_data": [
-    { "OUTPUT1": {"b64": "BAAAAA=="} },
-    { "OUTPUT1": {"b64": "BQAAAA=="} },
-    { "OUTPUT1": {"b64": "BgAAAA=="} }
-   ]})";
-  }
-
-  MockDataLoader dataloader;
-  std::shared_ptr<ModelTensorMap> inputs = std::make_shared<ModelTensorMap>();
-  std::shared_ptr<ModelTensorMap> outputs = std::make_shared<ModelTensorMap>();
-
-  ModelTensor input1 = TestDataLoader::CreateTensor("INPUT1");
-  ModelTensor output1 = TestDataLoader::CreateTensor("OUTPUT1");
-
-  inputs->insert(std::make_pair(input1.name_, input1));
-  outputs->insert(std::make_pair(output1.name_, output1));
-
-  cb::Error status = dataloader.ReadDataFromStr(json_str, inputs, outputs);
-  REQUIRE(status.IsOk());
-  CHECK_EQ(dataloader.GetDataStreamsCount(), 1);
-  CHECK_EQ(dataloader.GetTotalSteps(0), 3);
-
-  // Confirm the correct data is in the dataloader
-  //
-  TensorData data;
-  std::vector<int64_t> shape;
-
-  dataloader.GetInputShape(input1, 0, 1, &shape);
-  CHECK_EQ(shape.size(), 1);
-  CHECK_EQ(shape[0], 1);
-
-  status = dataloader.GetInputData(input1, 0, 1, data);
-  REQUIRE(status.IsOk());
-  CHECK(data.is_valid);
-  auto input_data = *reinterpret_cast<const int32_t*>(data.data_ptr);
-  CHECK_EQ(input_data, 2);
-  CHECK_EQ(data.batch1_size, 4);
-
-  status = dataloader.GetOutputData("OUTPUT1", 0, 2, data);
-  REQUIRE(status.IsOk());
-  CHECK(data.is_valid);
-  auto output_data = *reinterpret_cast<const int32_t*>(data.data_ptr);
-  CHECK_EQ(output_data, 6);
-  CHECK_EQ(data.batch1_size, 4);
-}
-
-TEST_CASE("dataloader: ParseData: Multiple Streams Invalid Cases")
-{
-  // Mismatch because one stream with wrong number of steps
-  std::string mismatch_case1a{R"({
-   "data": [ { "INPUT1": [1,2] } ],
-   "validation_data": [ { "OUTPUT1": [4] }, { "OUTPUT1": [5] } ]
-   })"};
-  std::string mismatch_case1b{R"({
-   "data": [ { "INPUT1": [1,2] }, { "INPUT1": [2,3] } ],
-   "validation_data": [ { "OUTPUT1": [4] } ]
-   })"};
-
-  // Mismatch because wrong number of streams (3 output streams for 2 input
-  // streams)
-  std::string mismatch_case2{R"({
-   "data": [
-     [ { "INPUT1": [1,2] }, { "INPUT1": [2,3] } ],
-     [ { "INPUT1": [10,11] } ]
-   ],
-   "validation_data": [
-    [ { "OUTPUT1": [4] }, { "OUTPUT1": [5] } ],
-    [ { "OUTPUT1": [40] } ],
-    [ { "OUTPUT1": [60] } ]
-   ]})"};
-
-  // Mismatch because same number of streams but wrong number of steps
-  std::string mismatch_case3a{R"({
-   "data": [
-     [ { "INPUT1": [1,2] } ],
-     [ { "INPUT1": [10,11] } ]
-   ],
-   "validation_data": [
-    [ { "OUTPUT1": [4] }, { "OUTPUT1": [5] } ],
-    [ { "OUTPUT1": [40] } ]
-   ]})"};
-  std::string mismatch_case3b{R"({
-   "data": [
-     [ { "INPUT1": [1,2] } ],
-     [ { "INPUT1": [10,11] } ]
-   ],
-   "validation_data": [
-    [ { "OUTPUT1": [4] } ],
-    [ { "OUTPUT1": [40] }, { "OUTPUT1": [50] } ]
-   ]})"};
-
-  auto test_lambda = [&](std::string json_data) {
-    std::shared_ptr<ModelTensorMap> inputs = std::make_shared<ModelTensorMap>();
-    std::shared_ptr<ModelTensorMap> outputs =
-        std::make_shared<ModelTensorMap>();
-
-    ModelTensor input1 = TestDataLoader::CreateTensor("INPUT1");
-    ModelTensor output1 = TestDataLoader::CreateTensor("OUTPUT1");
-    input1.shape_ = {2};
-    inputs->insert(std::make_pair(input1.name_, input1));
-    outputs->insert(std::make_pair(output1.name_, output1));
-
-    MockDataLoader dataloader;
-    cb::Error status = dataloader.ReadDataFromStr(json_data, inputs, outputs);
-    CHECK(status.IsOk() == false);
-    CHECK_EQ(
-        status.Message(),
-        "The 'validation_data' field doesn't align with 'data' field in the "
-        "json file");
-  };
-
-  test_lambda(mismatch_case1a);
-  test_lambda(mismatch_case1b);
-  test_lambda(mismatch_case2);
-  test_lambda(mismatch_case3a);
-  test_lambda(mismatch_case3b);
-}
-
-TEST_CASE("dataloader: ParseData: Multiple Streams Valid")
-{
-  std::string json_str{R"({
-   "data": [
-     [ { "INPUT1": [1,2] }, { "INPUT1": [2,3] }],
-     [ { "INPUT1": [10,11] } ]
-   ],
-   "validation_data": [
-    [ { "OUTPUT1": [4] }, { "OUTPUT1": [5] } ],
-    [ { "OUTPUT1": [40] } ]
-   ]
-   })"};
-
-  MockDataLoader dataloader;
-  std::shared_ptr<ModelTensorMap> inputs = std::make_shared<ModelTensorMap>();
-  std::shared_ptr<ModelTensorMap> outputs = std::make_shared<ModelTensorMap>();
-
-  ModelTensor input1 = TestDataLoader::CreateTensor("INPUT1");
-  ModelTensor output1 = TestDataLoader::CreateTensor("OUTPUT1");
-  input1.shape_ = {2};
-  inputs->insert(std::make_pair(input1.name_, input1));
-  outputs->insert(std::make_pair(output1.name_, output1));
-
-  cb::Error status = dataloader.ReadDataFromStr(json_str, inputs, outputs);
-  REQUIRE(status.IsOk());
-  CHECK_EQ(dataloader.GetDataStreamsCount(), 2);
-  CHECK_EQ(dataloader.GetTotalSteps(0), 2);
-  CHECK_EQ(dataloader.GetTotalSteps(1), 1);
-
-  // Confirm the correct data is in the dataloader
-  //
-  TensorData data;
-
-  status = dataloader.GetInputData(input1, 0, 1, data);
-  REQUIRE(status.IsOk());
-  CHECK(data.is_valid);
-
-  const int32_t* input_data = reinterpret_cast<const int32_t*>(data.data_ptr);
-  CHECK(data.is_valid);
-  CHECK_EQ(input_data[0], 2);
-  CHECK_EQ(input_data[1], 3);
-  // 2 elements of int32 data is 8 bytes
-  CHECK_EQ(data.batch1_size, 8);
-
-  status = dataloader.GetOutputData("OUTPUT1", 1, 0, data);
-  REQUIRE(status.IsOk());
-  CHECK(data.is_valid);
-  const int32_t* output_data = reinterpret_cast<const int32_t*>(data.data_ptr);
-  CHECK_EQ(output_data[0], 40);
-  CHECK_EQ(data.batch1_size, 4);
-}
-
-TEST_CASE(
-    "dataloader: ParseData: Missing Shape" *
-    doctest::description(
-        "When a tensor's shape is dynamic (-1), then it needs to be provided "
-        "via --shape option (which is not visible to this testing), or via a "
-        "shape option in the json. If not, an error is thrown"))
-{
-  std::string json_str{R"({"data": [{ "INPUT1": [1,2,3] } ]})"};
-
-  MockDataLoader dataloader;
-  std::shared_ptr<ModelTensorMap> inputs = std::make_shared<ModelTensorMap>();
-  std::shared_ptr<ModelTensorMap> outputs = std::make_shared<ModelTensorMap>();
-
-  ModelTensor input1 = TestDataLoader::CreateTensor("INPUT1");
-  input1.shape_ = {-1};
-
-  inputs->insert(std::make_pair(input1.name_, input1));
-
-  cb::Error status = dataloader.ReadDataFromStr(json_str, inputs, outputs);
-  CHECK_EQ(status.IsOk(), false);
-  CHECK_EQ(
-      status.Message(),
-      "The variable-sized tensor \"INPUT1\" with model shape [-1] needs to "
-      "have its shape fully defined. See the --shape option.");
-}
-
-
-TEST_CASE(
-    "dataloader: ParseData: Supplied Shape is valid" *
-    doctest::description("Supply the dynamic shape for an input"))
-{
-  std::string json_str;
-
-  SUBCASE("Normal json")
-  {
-    json_str = R"({"data": [{
-     "INPUT1": { "shape": [3,2], "content": [1,2,3,4,5,6] }
-    }]})";
-  }
-  SUBCASE("b64 json")
-  {
-    // This b64 encoding is the same as the unencoded case of [1,2,3,4,5,6]
-    json_str = R"({"data": [{
-     "INPUT1": { "shape": [3,2], "b64": "AAAAAQAAAAIAAAADAAAABAAAAAUAAAAG" }
-    }]})";
-  }
-
-  MockDataLoader dataloader;
-  std::shared_ptr<ModelTensorMap> inputs = std::make_shared<ModelTensorMap>();
-  std::shared_ptr<ModelTensorMap> outputs = std::make_shared<ModelTensorMap>();
-
-  ModelTensor input1 = TestDataLoader::CreateTensor("INPUT1");
-  input1.shape_ = {-1, -1};
-
-  inputs->insert(std::make_pair(input1.name_, input1));
-
-  cb::Error status = dataloader.ReadDataFromStr(json_str, inputs, outputs);
-  REQUIRE(status.IsOk());
-
-  std::vector<int64_t> shape;
-  dataloader.GetInputShape(input1, 0, 0, &shape);
-  CHECK_EQ(shape.size(), 2);
-  CHECK_EQ(shape[0], 3);
-  CHECK_EQ(shape[1], 2);
-}
-
-
-TEST_CASE(
-    "dataloader: ParseData: Supplied Shape is zero" *
-    doctest::description(
-        "Zero is a legal shape value and should be handled correctly. "
-        "GetInputData differentiates between an empty valid result and an "
-        "invalid result via the is_valid bit in the returned struct"))
-{
-  std::string json_str{R"({"data": [{
-     "INPUT1": { "shape": [0,2], "content": [] }
-    }]})"};
-
-  MockDataLoader dataloader;
-  std::shared_ptr<ModelTensorMap> inputs = std::make_shared<ModelTensorMap>();
-  std::shared_ptr<ModelTensorMap> outputs = std::make_shared<ModelTensorMap>();
-
-  ModelTensor input1 = TestDataLoader::CreateTensor("INPUT1");
-  input1.shape_ = {-1, 2};
-
-  ModelTensor input2 = TestDataLoader::CreateTensor("INPUT2");
-  input2.is_optional_ = true;
-
-  inputs->insert(std::make_pair(input1.name_, input1));
-  inputs->insert(std::make_pair(input2.name_, input2));
-
-  cb::Error status = dataloader.ReadDataFromStr(json_str, inputs, outputs);
-  REQUIRE(status.IsOk());
-
-  std::vector<int64_t> shape;
-  dataloader.GetInputShape(input1, 0, 0, &shape);
-  CHECK_EQ(shape.size(), 2);
-  CHECK_EQ(shape[0], 0);
-  CHECK_EQ(shape[1], 2);
-
-  // Confirm that the zero-shape input IS valid, but with size=0 and ptr=null
-  TensorData data;
-  status = dataloader.GetInputData(input1, 0, 0, data);
-  REQUIRE(status.IsOk());
-  CHECK(data.is_valid);
-  CHECK(data.data_ptr == nullptr);
-  CHECK(data.batch1_size == 0);
-
-  // Confirm that the unspecified input is NOT valid
-  status = dataloader.GetInputData(input2, 0, 0, data);
-  REQUIRE(status.IsOk());
-  CHECK(!data.is_valid);
-  CHECK(data.data_ptr == nullptr);
-  CHECK(data.batch1_size == 0);
-}
-
-
-TEST_CASE(
-    "dataloader: ParseData: Multiple Calls simple" *
-    doctest::description(
-        "ParseData can be called multiple times (due to "
-        "multiple input-data files). The data should "
-        "accumulate in stream 0 when input data has no nested arrays"))
-{
-  std::string json_str1{R"({"data": [{ "INPUT1": [1] }]})"};
-  std::string json_str2{R"({"data": [{ "INPUT1": [2] },{ "INPUT1": [22]}]})"};
-  std::string json_str3{
-      R"({"data": [{ "INPUT1": [3] }], "validation_data": [{ "OUTPUT1": [30] }]})"};
-
-  MockDataLoader dataloader;
-  std::shared_ptr<ModelTensorMap> inputs = std::make_shared<ModelTensorMap>();
-  std::shared_ptr<ModelTensorMap> outputs = std::make_shared<ModelTensorMap>();
-
-  ModelTensor input1 = TestDataLoader::CreateTensor("INPUT1");
-  ModelTensor output1 = TestDataLoader::CreateTensor("OUTPUT1");
-
-  inputs->insert(std::make_pair(input1.name_, input1));
-  outputs->insert(std::make_pair(output1.name_, output1));
-
-  cb::Error status = dataloader.ReadDataFromStr(json_str1, inputs, outputs);
-  REQUIRE(status.IsOk());
-  CHECK_EQ(dataloader.GetDataStreamsCount(), 1);
-  CHECK_EQ(dataloader.GetTotalSteps(0), 1);
-
-  status = dataloader.ReadDataFromStr(json_str2, inputs, outputs);
-  REQUIRE(status.IsOk());
-  CHECK_EQ(dataloader.GetDataStreamsCount(), 1);
-  CHECK_EQ(dataloader.GetTotalSteps(0), 3);
-
-  status = dataloader.ReadDataFromStr(json_str3, inputs, outputs);
-  REQUIRE(status.IsOk());
-  CHECK_EQ(dataloader.GetDataStreamsCount(), 1);
-  CHECK_EQ(dataloader.GetTotalSteps(0), 4);
-
-  // Confirm the correct data is in the dataloader
-  //
-  TensorData data;
-
-  status = dataloader.GetInputData(input1, 0, 3, data);
-  REQUIRE(status.IsOk());
-  CHECK(data.is_valid);
-
-  const int32_t* input_data = reinterpret_cast<const int32_t*>(data.data_ptr);
-  CHECK_EQ(input_data[0], 3);
-  CHECK_EQ(data.batch1_size, 4);
-
-  // Confirm that only one of the 4 steps has output data
-  //
-  status = dataloader.GetOutputData("OUTPUT1", 0, 0, data);
-  REQUIRE(status.IsOk());
-  CHECK(!data.is_valid);
-  status = dataloader.GetOutputData("OUTPUT1", 0, 1, data);
-  REQUIRE(status.IsOk());
-  CHECK(!data.is_valid);
-  status = dataloader.GetOutputData("OUTPUT1", 0, 2, data);
-  REQUIRE(status.IsOk());
-  CHECK(!data.is_valid);
-  status = dataloader.GetOutputData("OUTPUT1", 0, 3, data);
-  REQUIRE(status.IsOk());
-  CHECK(data.is_valid);
-  CHECK(data.data_ptr != nullptr);
-  CHECK(data.batch1_size == 4);
-}
-
-TEST_CASE(
-    "dataloader: ParseData: Multiple Calls array" *
-    doctest::description(
-        "ParseData can be called multiple times (due to "
-        "multiple input-data files). The data should "
-        "accumulate as multiple streams when input data has nested arrays"))
-{
-  std::string json_str1{R"({"data": [[{ "INPUT1": [1] }]]})"};
-  std::string json_str2{
-      R"({"data": [[{ "INPUT1": [2] },{ "INPUT1": [20] }]]})"};
-  std::string json_str3{
-      R"({"data": [[{ "INPUT1": [3] }]], "validation_data": [[{ "OUTPUT1": [30] }]]})"};
-
-  MockDataLoader dataloader;
-  std::shared_ptr<ModelTensorMap> inputs = std::make_shared<ModelTensorMap>();
-  std::shared_ptr<ModelTensorMap> outputs = std::make_shared<ModelTensorMap>();
-
-  ModelTensor input1 = TestDataLoader::CreateTensor("INPUT1");
-  ModelTensor output1 = TestDataLoader::CreateTensor("OUTPUT1");
-
-  inputs->insert(std::make_pair(input1.name_, input1));
-  outputs->insert(std::make_pair(output1.name_, output1));
-
-  cb::Error status = dataloader.ReadDataFromStr(json_str1, inputs, outputs);
-  REQUIRE(status.IsOk());
-  status = dataloader.ReadDataFromStr(json_str2, inputs, outputs);
-  REQUIRE(status.IsOk());
-  status = dataloader.ReadDataFromStr(json_str3, inputs, outputs);
-  REQUIRE(status.IsOk());
-  CHECK_EQ(dataloader.GetDataStreamsCount(), 3);
-  CHECK_EQ(dataloader.GetTotalSteps(0), 1);
-  CHECK_EQ(dataloader.GetTotalSteps(1), 2);
-  CHECK_EQ(dataloader.GetTotalSteps(2), 1);
-
-  // Confirm the correct data is in the dataloader
-  //
-  TensorData data;
-
-  status = dataloader.GetInputData(input1, 1, 1, data);
-  REQUIRE(status.IsOk());
-  CHECK(data.is_valid);
-
-  const int32_t* input_data = reinterpret_cast<const int32_t*>(data.data_ptr);
-  CHECK_EQ(input_data[0], 20);
-  CHECK_EQ(data.batch1_size, 4);
-
-  // Confirm that only one of the 3 streams has output data
-  //
-  status = dataloader.GetOutputData("OUTPUT1", 0, 0, data);
-  REQUIRE(status.IsOk());
-  CHECK(!data.is_valid);
-  status = dataloader.GetOutputData("OUTPUT1", 1, 0, data);
-  REQUIRE(status.IsOk());
-  CHECK(!data.is_valid);
-  status = dataloader.GetOutputData("OUTPUT1", 2, 0, data);
-  REQUIRE(status.IsOk());
-  CHECK(data.is_valid);
-  CHECK(data.data_ptr != nullptr);
-  CHECK(data.batch1_size == 4);
-}
-
-TEST_CASE(
-    "dataloader: ParseData: Multiple Calls mixed" *
-    doctest::description(
-        "ParseData can be called multiple times (due to "
-        "multiple input-data files). An error should be thrown if there is a "
-        "mixture of nested vs no-nested arrays in the input data"))
-{
-  std::string json_str_not_nested{R"({"data": [{ "INPUT1": [2] }]})"};
-  std::string json_str_nested{R"({"data": [[{ "INPUT1": [1] }]]})"};
-  std::string json_str1, json_str2;
-
-  SUBCASE("Nested then not-nested")
-  {
-    json_str1 = json_str_nested;
-    json_str2 = json_str_not_nested;
-  }
-  SUBCASE("Not-nested then nested")
-  {
-    json_str1 = json_str_not_nested;
-    json_str2 = json_str_nested;
-  }
-
-  MockDataLoader dataloader;
-  std::shared_ptr<ModelTensorMap> inputs = std::make_shared<ModelTensorMap>();
-  std::shared_ptr<ModelTensorMap> outputs = std::make_shared<ModelTensorMap>();
-
-  ModelTensor input1 = TestDataLoader::CreateTensor("INPUT1");
-
-  inputs->insert(std::make_pair(input1.name_, input1));
-
-  cb::Error status = dataloader.ReadDataFromStr(json_str1, inputs, outputs);
-  REQUIRE(status.IsOk());
-  status = dataloader.ReadDataFromStr(json_str2, inputs, outputs);
-  REQUIRE(!status.IsOk());
-  CHECK(
-      status.Message() ==
-      "Inconsistency in input-data provided. Can not have a combination of "
-      "objects and arrays inside of the Data array");
-}
-
-TEST_CASE(
-    "dataloader: GenerateData: Is Shape Tensor" *
-    doctest::description("It is illegal to generate data for any Tensor with "
-                         "is_shape_tensor=True"))
-{
-  MockDataLoader dataloader;
-  std::shared_ptr<ModelTensorMap> inputs = std::make_shared<ModelTensorMap>();
-  std::shared_ptr<ModelTensorMap> outputs = std::make_shared<ModelTensorMap>();
-
-  ModelTensor input1 = TestDataLoader::CreateTensor("INPUT1");
-  input1.is_shape_tensor_ = true;
-  inputs->insert(std::make_pair(input1.name_, input1));
-
-  bool zero_input = true;
-  size_t string_length = 5;
-  std::string string_data = "FOOBAR";
-  cb::Error status =
-      dataloader.GenerateData(inputs, zero_input, string_length, string_data);
-  CHECK(status.IsOk() == false);
-  CHECK_EQ(
-      status.Message(),
-      "can not generate data for shape tensor 'INPUT1', user-provided data is "
-      "needed.");
-}
-
-
-TEST_CASE(
-    "dataloader: GenerateData: Non-BYTES" *
-    doctest::description(
-        "Calling GenerateData for non-BYTES datatype should result in a single "
-        "stream with one step. If the zero input flag is set, all of that data "
-        "will be 0. Else it will be random"))
-{
-  bool zero_input;
-  size_t string_length = 5;
-  std::string string_data = "FOOBAR";
-
-  SUBCASE("zero_input true")
-  {
-    zero_input = true;
-  }
-  SUBCASE("zero_input false")
-  {
-    zero_input = false;
-  }
-  MockDataLoader dataloader;
-  std::shared_ptr<ModelTensorMap> inputs = std::make_shared<ModelTensorMap>();
-  std::shared_ptr<ModelTensorMap> outputs = std::make_shared<ModelTensorMap>();
-
-  ModelTensor input1 = TestDataLoader::CreateTensor("INPUT1");
-  input1.shape_ = {3};
-  inputs->insert(std::make_pair(input1.name_, input1));
-
-  cb::Error status =
-      dataloader.GenerateData(inputs, zero_input, string_length, string_data);
-  REQUIRE(status.IsOk());
-  CHECK_EQ(dataloader.GetDataStreamsCount(), 1);
-  CHECK_EQ(dataloader.GetTotalSteps(0), 1);
-
-  TensorData data;
-
-  status = dataloader.GetInputData(input1, 0, 0, data);
-  REQUIRE(status.IsOk());
-  CHECK(data.is_valid);
-  const int32_t* input_data = reinterpret_cast<const int32_t*>(data.data_ptr);
-  if (zero_input) {
-    CHECK_EQ(input_data[0], 0);
-    CHECK_EQ(input_data[1], 0);
-    CHECK_EQ(input_data[2], 0);
-  } else {
-    CHECK_NE(input_data[0], 0);
-    CHECK_NE(input_data[1], 0);
-    CHECK_NE(input_data[2], 0);
-  }
-  // 3 elements of int32 data is 12 bytes
-  CHECK_EQ(data.batch1_size, 12);
-}
-
-TEST_CASE(
-    "dataloader: GenerateData: BYTES" *
-    doctest::description(
-        "Calling GenerateData for BYTES datatype should result in a single "
-        "stream with one step. The zero-input flag is ignored. If string_data "
-        "is not null, it will be used. Else it will be a random string of "
-        "length string_length"))
-{
-  bool zero_input = false;
-  size_t string_length = 5;
-  std::string string_data;
-
-  SUBCASE("valid string_data")
-  {
-    string_data = "FOOBAR";
-  }
-  SUBCASE("empty string_data")
-  {
-    string_data = "";
-  }
-
-  MockDataLoader dataloader;
-  std::shared_ptr<ModelTensorMap> inputs = std::make_shared<ModelTensorMap>();
-  std::shared_ptr<ModelTensorMap> outputs = std::make_shared<ModelTensorMap>();
-
-  ModelTensor input1 = TestDataLoader::CreateTensor("INPUT1");
-  input1.datatype_ = "BYTES";
-  input1.shape_ = {3};
-  inputs->insert(std::make_pair(input1.name_, input1));
-
-  cb::Error status =
-      dataloader.GenerateData(inputs, zero_input, string_length, string_data);
-  REQUIRE(status.IsOk());
-  CHECK_EQ(dataloader.GetDataStreamsCount(), 1);
-  CHECK_EQ(dataloader.GetTotalSteps(0), 1);
-
-  TensorData data;
-
-  status = dataloader.GetInputData(input1, 0, 0, data);
-  REQUIRE(status.IsOk());
-  CHECK(data.is_valid);
-
-  // For string data, the result should be a 32-bit number indicating the data
-  // length, and then 1 byte per letter
-  //
-  // For "FOOBAR", the length would be 10 bytes:
-  //    4 bytes to indicate the string length (the number 6)
-  //    1 byte for each letter
-  //
-  // For empty string, the string length would instead be the value in
-  // string_length (5 in this case), and the characters would be random for
-  // each entry in the batch. Thus, the data length would be 9 bytes
-  //
-  // For a shape of [3], this data would be repeated 3 times
-
-  if (string_data.empty()) {
-    // 3 elements of 9 bytes is 27
-    CHECK_EQ(data.batch1_size, 27);
-
-    const char* char_data = reinterpret_cast<const char*>(data.data_ptr);
-
-    // Check all 3 entries in the "batch" of shape [3]
-    for (size_t i = 0; i < 3; i++) {
-      size_t start_index = 9 * i;
-
-      // The first 4 bytes are an int32 indicating the number of characters
-      const int32_t* int32_data =
-          reinterpret_cast<const int32_t*>(&char_data[start_index]);
-      CHECK_EQ(int32_data[0], 5);
-
-      // All of the characters should be in the specified character_set
-      for (size_t j = start_index + 4; j < start_index + 9; j++) {
-        CHECK_NE(character_set.find(char_data[j]), std::string::npos);
-      }
-    }
-
-  } else {
-    // 3 elements of 10 bytes is 30
-    CHECK_EQ(data.batch1_size, 30);
-
-    const int32_t* int32_data = reinterpret_cast<const int32_t*>(data.data_ptr);
-    const char* char_data = reinterpret_cast<const char*>(data.data_ptr);
-    CHECK_EQ(int32_data[0], 6);
-    CHECK_EQ(char_data[4], 'F');
-    CHECK_EQ(char_data[5], 'O');
-    CHECK_EQ(char_data[6], 'O');
-    CHECK_EQ(char_data[7], 'B');
-    CHECK_EQ(char_data[8], 'A');
-    CHECK_EQ(char_data[9], 'R');
-
-    // The data would repeat two more times for shape of [3]
-    for (size_t i = 10; i < 30; i++) {
-      CHECK_EQ(char_data[i - 10], char_data[i]);
-    }
-  }
-}
-
-TEST_CASE("dataloader: GenerateData: Dynamic shape")
-{
-  bool zero_input = false;
-  size_t string_length = 5;
-  std::string string_data;
-
-  ModelTensor input1 = TestDataLoader::CreateTensor("INPUT1");
-  input1.shape_ = {-1};
-
-  std::string expected_message =
-      "input INPUT1 contains dynamic shape, provide shapes to send along with "
-      "the request";
-
-  SUBCASE("BYTES")
-  {
-    input1.datatype_ = "BYTES";
-  }
-  SUBCASE("non-BYTES")
-  {
-    input1.datatype_ = "INT32";
-  }
-
-  MockDataLoader dataloader;
-  std::shared_ptr<ModelTensorMap> inputs = std::make_shared<ModelTensorMap>();
-  std::shared_ptr<ModelTensorMap> outputs = std::make_shared<ModelTensorMap>();
-  inputs->insert(std::make_pair(input1.name_, input1));
-
-  cb::Error status =
-      dataloader.GenerateData(inputs, zero_input, string_length, string_data);
-  REQUIRE(status.IsOk() == false);
-  CHECK_EQ(status.Message(), expected_message);
-}
-
-TEST_CASE(
-    "dataloader: ReadDataFromDir: Error reading input file" *
-    doctest::description(
-        "When there is an error reading an input data file, the error should "
-        "bubble up to the return value of ReadDataFromDir"))
-{
-  MockDataLoader dataloader;
-
-  std::shared_ptr<ModelTensorMap> inputs = std::make_shared<ModelTensorMap>();
-  std::shared_ptr<ModelTensorMap> outputs = std::make_shared<ModelTensorMap>();
-
-  ModelTensor input1 = TestDataLoader::CreateTensor("INPUT1");
-
-  std::string dir{"fake/path"};
-
-  SUBCASE("BYTES (string) data")
-  {
-    input1.datatype_ = "BYTES";
-  }
-  SUBCASE("Raw Binary data")
-  {
-    input1.datatype_ = "INT32";
-  }
-
-  inputs->insert(std::make_pair(input1.name_, input1));
-  cb::Error status = dataloader.ReadDataFromDir(inputs, outputs, dir);
-  CHECK(status.IsOk() == false);
-}
-
-TEST_CASE(
-    "dataloader: ReadDataFromDir: Error reading output file" *
-    doctest::description(
-        "When there is an error reading an output data file, an error is NOT "
-        "raised from ReadDataFromDir, and instead GetOutputData will return "
-        "nullptr with a batch1_size of 0"))
-{
-  MockDataLoader dataloader;
-
-  std::shared_ptr<ModelTensorMap> inputs = std::make_shared<ModelTensorMap>();
-  std::shared_ptr<ModelTensorMap> outputs = std::make_shared<ModelTensorMap>();
-
-  ModelTensor output1 = TestDataLoader::CreateTensor("OUTPUT1");
-
-  std::string dir{"fake/path"};
-
-  SUBCASE("BYTES (string) data")
-  {
-    output1.datatype_ = "BYTES";
-  }
-  SUBCASE("Raw Binary data")
-  {
-    output1.datatype_ = "INT32";
-  }
-
-  outputs->insert(std::make_pair(output1.name_, output1));
-  cb::Error status = dataloader.ReadDataFromDir(inputs, outputs, dir);
-  CHECK(status.IsOk() == true);
-
-  TensorData data;
-
-  dataloader.GetOutputData("OUTPUT1", 0, 0, data);
-  CHECK(!data.is_valid);
-  CHECK(data.data_ptr == nullptr);
-  CHECK(data.batch1_size == 0);
-}
-
-TEST_CASE(
-    "dataloader: ReadDataFromDir: Mismatching Input Data" *
-    doctest::description("Successfully reading input files but having a "
-                         "mismatch will result in an error being thrown"))
-{
-  MockDataLoader dataloader;
-
-  std::string datatype;
-  std::string expected_error_message;
-
-  std::shared_ptr<ModelTensorMap> inputs = std::make_shared<ModelTensorMap>();
-  std::shared_ptr<ModelTensorMap> outputs = std::make_shared<ModelTensorMap>();
-
-  ModelTensor input1 = TestDataLoader::CreateTensor("INPUT1");
-  ModelTensor output1 = TestDataLoader::CreateTensor("OUTPUT1");
-
-  std::string dir{"mocked_out"};
-
-  SUBCASE("BYTES (string) data")
-  {
-    datatype = "BYTES";
-    std::vector<std::string> string_data;
-
-    SUBCASE("Dynamic shape")
-    {
-      input1.shape_ = {-1};
-      expected_error_message =
-          "input INPUT1 contains dynamic shape, provide shapes to send along "
-          "with the request";
-    }
-    SUBCASE("Supplied shape")
-    {
-      input1.shape_ = {1};
-      string_data = {"InStr", "ExtraStr"};
-
-      expected_error_message =
-          "provided data for input INPUT1 has 2 elements, expect 1";
-    }
-
-    EXPECT_CALL(dataloader, ReadTextFile(testing::_, testing::_))
-        .WillOnce(testing::DoAll(
-            testing::SetArgPointee<1>(string_data),
-            testing::Return(cb::Error::Success)));
-  }
-  SUBCASE("Raw Binary data")
-  {
-    datatype = "INT32";
-    std::vector<char> char_data;
-
-    SUBCASE("Dynamic shape")
-    {
-      input1.shape_ = {-1};
-      expected_error_message =
-          "input INPUT1 contains dynamic shape, provide shapes to send along "
-          "with the request";
-    }
-    SUBCASE("Supplied shape")
-    {
-      // An INT32 of shape {1} will be 4 bytes. However, we are supplying 5
-      // bytes via char_data.
-      input1.shape_ = {1};
-      char_data = {'0', '0', '0', '7', '5'};
-      expected_error_message =
-          "provided data for input INPUT1 has byte size 5, expect 4";
-    }
-
-    EXPECT_CALL(dataloader, ReadFile(testing::_, testing::_))
-        .WillOnce(testing::DoAll(
-            testing::SetArgPointee<1>(char_data),
-            testing::Return(cb::Error::Success)));
-  }
-
-  input1.datatype_ = datatype;
-  inputs->insert(std::make_pair(input1.name_, input1));
-
-  cb::Error status = dataloader.ReadDataFromDir(inputs, outputs, dir);
-  REQUIRE(status.IsOk() == false);
-  CHECK(status.Message() == expected_error_message);
-}
-
-// FIXME TMA-1210 -- the output data is not being ignored here and no error is
-// thrown, despite the mismatch
-// TEST_CASE(
-//    "dataloader: ReadDataFromDir: Mismatching Output Data" *
-//    doctest::description("Successfully reading output files but having a "
-//                         "mismatch will result in the data being ignored"))
-//{
-//  MockDataLoader dataloader;
-//
-//  std::string datatype;
-//
-//  std::shared_ptr<ModelTensorMap> inputs = std::make_shared<ModelTensorMap>();
-//  std::shared_ptr<ModelTensorMap> outputs =
-//  std::make_shared<ModelTensorMap>();
-//
-//  ModelTensor input1 = TestDataLoader::CreateTensor("INPUT1");
-//  ModelTensor output1 = TestDataLoader::CreateTensor("OUTPUT1");
-//
-//  std::string dir{"mocked_out"};
-//
-//  std::vector<char> char_data{'0', '0', '0', '7', '5'};
-//
-//  std::vector<std::string> string_data{"InStr", "ExtraStr"};
-//
-//  SUBCASE("BYTES (string) data")
-//  {
-//    datatype = "BYTES";
-//    EXPECT_CALL(dataloader, ReadTextFile(testing::_, testing::_))
-//        .WillOnce(testing::DoAll(
-//            testing::SetArgPointee<1>(string_data),
-//            testing::Return(cb::Error::Success)));
-//
-//    SUBCASE("Dynamic shape") { output1.shape_ = {-1}; }
-//    SUBCASE("Supplied shape") { output1.shape_ = {1}; }
-//  }
-//  SUBCASE("Raw Binary data")
-//  {
-//    datatype = "INT32";
-//    EXPECT_CALL(dataloader, ReadFile(testing::_, testing::_))
-//        .WillOnce(testing::DoAll(
-//            testing::SetArgPointee<1>(char_data),
-//            testing::Return(cb::Error::Success)));
-//
-//    SUBCASE("Dynamic shape") { input1.shape_ = {-1}; }
-//    SUBCASE("Supplied shape") { input1.shape_ = {1}; }
-//  }
-//
-//  output1.datatype_ = datatype;
-//  outputs->insert(std::make_pair(output1.name_, output1));
-//
-//  cb::Error status = dataloader.ReadDataFromDir(inputs, outputs, dir);
-//  REQUIRE(status.IsOk() == true);
-//
-//  // Confirm that the data is not in the dataloader
-//  const uint8_t* data_ptr{nullptr};
-//  size_t batch1_size;
-//
-//  dataloader.GetOutputData("OUTPUT1", 0, 0, &data_ptr, &batch1_size);
-//  CHECK(data_ptr == nullptr);
-//  CHECK(batch1_size == 0);
-//}
-
-TEST_CASE(
-    "dataloader: ReadDataFromDir: Valid Data" *
-    doctest::description("Successfully reading files will always result in a "
-                         "single stream with a single step"))
-{
-  MockDataLoader dataloader;
-
-  std::string datatype;
-
-  std::shared_ptr<ModelTensorMap> inputs = std::make_shared<ModelTensorMap>();
-  std::shared_ptr<ModelTensorMap> outputs = std::make_shared<ModelTensorMap>();
-
-  ModelTensor input1 = TestDataLoader::CreateTensor("INPUT1");
-  ModelTensor output1 = TestDataLoader::CreateTensor("OUTPUT1");
-
-  std::string dir{"mocked_out"};
-
-  std::vector<char> input_char_data{'0', '0', '0', '7'};
-  std::vector<char> output_char_data{'0', '0', '0', '3'};
-
-  std::vector<std::string> input_string_data{"InStr"};
-  std::vector<std::string> output_string_data{"OutStr"};
-
-  std::vector<char> expected_input;
-  std::vector<char> expected_output;
-
-  SUBCASE("BYTES (string) data")
-  {
-    datatype = "BYTES";
-
-    expected_input = {'\5', '\0', '\0', '\0', 'I', 'n', 'S', 't', 'r'};
-    expected_output = {'\6', '\0', '\0', '\0', 'O', 'u', 't', 'S', 't', 'r'};
-
-    EXPECT_CALL(dataloader, ReadTextFile(testing::_, testing::_))
-        .WillOnce(testing::DoAll(
-            testing::SetArgPointee<1>(input_string_data),
-            testing::Return(cb::Error::Success)))
-        .WillOnce(testing::DoAll(
-            testing::SetArgPointee<1>(output_string_data),
-            testing::Return(cb::Error::Success)));
-  }
-  SUBCASE("Raw Binary data")
-  {
-    datatype = "INT32";
-
-    expected_input = input_char_data;
-    expected_output = output_char_data;
-
-    EXPECT_CALL(dataloader, ReadFile(testing::_, testing::_))
-        .WillOnce(testing::DoAll(
-            testing::SetArgPointee<1>(input_char_data),
-            testing::Return(cb::Error::Success)))
-        .WillOnce(testing::DoAll(
-            testing::SetArgPointee<1>(output_char_data),
-            testing::Return(cb::Error::Success)));
-  }
-
-  input1.datatype_ = datatype;
-  output1.datatype_ = datatype;
-
-  inputs->insert(std::make_pair(input1.name_, input1));
-  outputs->insert(std::make_pair(output1.name_, output1));
-
-  cb::Error status = dataloader.ReadDataFromDir(inputs, outputs, dir);
-  REQUIRE(status.IsOk());
-  CHECK_EQ(dataloader.GetDataStreamsCount(), 1);
-  CHECK_EQ(dataloader.GetTotalSteps(0), 1);
-
-  // Validate input and output data
-  TensorData data;
-
-  status = dataloader.GetInputData(input1, 0, 0, data);
-  REQUIRE(status.IsOk());
-  CHECK(data.is_valid);
-
-  const char* input_data = reinterpret_cast<const char*>(data.data_ptr);
-  REQUIRE(data.batch1_size == expected_input.size());
-  for (size_t i = 0; i < data.batch1_size; i++) {
-    CHECK(input_data[i] == expected_input[i]);
-  }
-
-  status = dataloader.GetOutputData("OUTPUT1", 0, 0, data);
-  REQUIRE(status.IsOk());
-  CHECK(data.is_valid);
-
-  const char* output_data = reinterpret_cast<const char*>(data.data_ptr);
-  REQUIRE(data.batch1_size == expected_output.size());
-  for (size_t i = 0; i < data.batch1_size; i++) {
-    CHECK(output_data[i] == expected_output[i]);
-  }
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/test_idle_timer.cc b/src/c++/perf_analyzer/test_idle_timer.cc
deleted file mode 100644
index 18f9d7518..000000000
--- a/src/c++/perf_analyzer/test_idle_timer.cc
+++ /dev/null
@@ -1,94 +0,0 @@
-// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <thread>
-
-#include "doctest.h"
-#include "idle_timer.h"
-
-namespace triton { namespace perfanalyzer {
-
-TEST_CASE("idle_timer: basic usage")
-{
-  IdleTimer timer;
-  CHECK(timer.GetIdleTime() == 0);
-  timer.Start();
-  std::this_thread::sleep_for(std::chrono::milliseconds(1));
-  timer.Stop();
-  CHECK(timer.GetIdleTime() > 0);
-  timer.Reset();
-  CHECK(timer.GetIdleTime() == 0);
-}
-
-TEST_CASE("idle_timer: GetIdleTime when inactive")
-{
-  IdleTimer timer;
-  CHECK(timer.GetIdleTime() == 0);
-  std::this_thread::sleep_for(std::chrono::milliseconds(1));
-  CHECK(timer.GetIdleTime() == 0);
-  CHECK_NOTHROW(timer.Start());
-}
-
-TEST_CASE("idle_timer: GetIdleTime when active")
-{
-  IdleTimer timer;
-  timer.Start();
-  std::this_thread::sleep_for(std::chrono::milliseconds(1));
-  CHECK(timer.GetIdleTime() > 0);
-  std::this_thread::sleep_for(std::chrono::milliseconds(1));
-  CHECK(timer.GetIdleTime() > 0);
-  CHECK_NOTHROW(timer.Stop());
-}
-
-TEST_CASE("idle_timer: reset when active")
-{
-  IdleTimer timer;
-  timer.Start();
-  std::this_thread::sleep_for(std::chrono::milliseconds(1));
-  timer.Stop();
-  std::this_thread::sleep_for(std::chrono::milliseconds(1));
-  timer.Start();
-  std::this_thread::sleep_for(std::chrono::milliseconds(1));
-  timer.Reset();
-  std::this_thread::sleep_for(std::chrono::milliseconds(1));
-  CHECK(timer.GetIdleTime() > 0);
-}
-
-TEST_CASE("idle_timer: double start")
-{
-  IdleTimer timer;
-  timer.Start();
-  CHECK_THROWS_AS(timer.Start(), const std::exception&);
-}
-
-TEST_CASE("idle_timer: stop without start")
-{
-  IdleTimer timer;
-  CHECK_THROWS_AS(timer.Stop(), const std::exception&);
-}
-
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/test_infer_context.cc b/src/c++/perf_analyzer/test_infer_context.cc
deleted file mode 100644
index 951fb2b10..000000000
--- a/src/c++/perf_analyzer/test_infer_context.cc
+++ /dev/null
@@ -1,178 +0,0 @@
-// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "client_backend/mock_client_backend.h"
-#include "doctest.h"
-#include "gmock/gmock.h"
-#include "infer_context.h"
-#include "mock_data_loader.h"
-#include "mock_infer_context.h"
-#include "mock_infer_data_manager.h"
-#include "mock_sequence_manager.h"
-
-namespace triton { namespace perfanalyzer {
-
-/// Tests the round robin ordering of json input data
-///
-TEST_CASE("update_seq_json_data: testing the UpdateSeqJsonData function")
-{
-  std::shared_ptr<MockSequenceManager> mock_sequence_manager{
-      std::make_shared<MockSequenceManager>()};
-
-  EXPECT_CALL(
-      *mock_sequence_manager, SetInferSequenceOptions(testing::_, testing::_))
-      .Times(6)
-      .WillRepeatedly(testing::Return());
-
-  mock_sequence_manager->InitSequenceStatuses(1);
-
-  std::shared_ptr<MockDataLoader> mock_data_loader{
-      std::make_shared<MockDataLoader>()};
-
-  EXPECT_CALL(*mock_data_loader, GetTotalSteps(testing::_))
-      .Times(6)
-      .WillRepeatedly(testing::Return(3));
-
-  std::shared_ptr<MockInferDataManager> mock_infer_data_manager{
-      std::make_shared<MockInferDataManager>()};
-
-  testing::Sequence seq;
-  EXPECT_CALL(
-      *mock_infer_data_manager,
-      UpdateInferData(testing::_, testing::_, 0, testing::_))
-      .InSequence(seq)
-      .WillOnce(testing::Return(cb::Error::Success));
-  EXPECT_CALL(
-      *mock_infer_data_manager,
-      UpdateInferData(testing::_, testing::_, 1, testing::_))
-      .InSequence(seq)
-      .WillOnce(testing::Return(cb::Error::Success));
-  EXPECT_CALL(
-      *mock_infer_data_manager,
-      UpdateInferData(testing::_, testing::_, 2, testing::_))
-      .InSequence(seq)
-      .WillOnce(testing::Return(cb::Error::Success));
-  EXPECT_CALL(
-      *mock_infer_data_manager,
-      UpdateInferData(testing::_, testing::_, 0, testing::_))
-      .InSequence(seq)
-      .WillOnce(testing::Return(cb::Error::Success));
-  EXPECT_CALL(
-      *mock_infer_data_manager,
-      UpdateInferData(testing::_, testing::_, 1, testing::_))
-      .InSequence(seq)
-      .WillOnce(testing::Return(cb::Error::Success));
-  EXPECT_CALL(
-      *mock_infer_data_manager,
-      UpdateInferData(testing::_, testing::_, 2, testing::_))
-      .InSequence(seq)
-      .WillOnce(testing::Return(cb::Error::Success));
-
-  std::shared_ptr<MockInferContext> mic{std::make_shared<MockInferContext>()};
-
-  EXPECT_CALL(*mic, SendRequest(testing::_, testing::_, testing::_))
-      .Times(6)
-      .WillRepeatedly(testing::Return());
-
-  mic->sequence_manager_ = mock_sequence_manager;
-  mic->data_loader_ = mock_data_loader;
-  mic->infer_data_manager_ = mock_infer_data_manager;
-  mic->thread_stat_ = std::make_shared<ThreadStat>();
-  bool execute{true};
-  mic->execute_ = execute;
-  mic->using_json_data_ = true;
-
-  size_t seq_stat_index{0};
-  bool delayed{false};
-
-  mic->SendSequenceInferRequest(seq_stat_index, delayed);
-  mic->SendSequenceInferRequest(seq_stat_index, delayed);
-  mic->SendSequenceInferRequest(seq_stat_index, delayed);
-  mic->SendSequenceInferRequest(seq_stat_index, delayed);
-  mic->SendSequenceInferRequest(seq_stat_index, delayed);
-  mic->SendSequenceInferRequest(seq_stat_index, delayed);
-
-  // Destruct gmock objects to determine gmock-related test failure
-  mock_sequence_manager.reset();
-  mock_data_loader.reset();
-  mock_infer_data_manager.reset();
-  mic.reset();
-  REQUIRE(testing::Test::HasFailure() == false);
-}
-
-TEST_CASE("send_request: testing the SendRequest function")
-{
-  MockInferContext mock_infer_context{};
-
-  SUBCASE("testing logic relevant to request record sequence ID")
-  {
-    mock_infer_context.thread_stat_ = std::make_shared<ThreadStat>();
-    mock_infer_context.thread_stat_->contexts_stat_.emplace_back();
-    mock_infer_context.async_ = true;
-    mock_infer_context.streaming_ = true;
-    mock_infer_context.infer_data_.options_ =
-        std::make_unique<cb::InferOptions>("my_model");
-    std::shared_ptr<cb::MockClientStats> mock_client_stats{
-        std::make_shared<cb::MockClientStats>()};
-    mock_infer_context.infer_backend_ =
-        std::make_unique<cb::MockClientBackend>(mock_client_stats);
-
-    const uint64_t request_id{5};
-    const bool delayed{false};
-    const uint64_t sequence_id{2};
-
-    mock_infer_context.infer_data_.options_->request_id_ =
-        std::to_string(request_id);
-
-    cb::MockInferResult* mock_infer_result{
-        new cb::MockInferResult(*mock_infer_context.infer_data_.options_)};
-
-    cb::OnCompleteFn& stream_callback{mock_infer_context.async_callback_func_};
-
-    EXPECT_CALL(
-        dynamic_cast<cb::MockClientBackend&>(
-            *mock_infer_context.infer_backend_),
-        AsyncStreamInfer(testing::_, testing::_, testing::_))
-        .WillOnce(
-            [&mock_infer_result, &stream_callback](
-                const cb::InferOptions& options,
-                const std::vector<cb::InferInput*>& inputs,
-                const std::vector<const cb::InferRequestedOutput*>& outputs)
-                -> cb::Error {
-              stream_callback(mock_infer_result);
-              return cb::Error::Success;
-            });
-
-    mock_infer_context.SendRequest(request_id, delayed, sequence_id);
-
-    CHECK(mock_infer_context.thread_stat_->request_records_.size() == 1);
-    CHECK(
-        mock_infer_context.thread_stat_->request_records_[0].sequence_id_ ==
-        sequence_id);
-  }
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/test_inference_profiler.cc b/src/c++/perf_analyzer/test_inference_profiler.cc
deleted file mode 100644
index 2941867fc..000000000
--- a/src/c++/perf_analyzer/test_inference_profiler.cc
+++ /dev/null
@@ -1,1132 +0,0 @@
-// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "doctest.h"
-#include "inference_profiler.h"
-#include "mock_inference_profiler.h"
-#include "mock_load_manager.h"
-#include "mock_model_parser.h"
-
-namespace triton { namespace perfanalyzer {
-
-class TestInferenceProfiler : public InferenceProfiler {
- public:
-  static void ValidLatencyMeasurement(
-      const std::pair<uint64_t, uint64_t>& valid_range,
-      size_t& valid_sequence_count, size_t& delayed_request_count,
-      std::vector<uint64_t>* latencies, size_t& response_count,
-      std::vector<RequestRecord>& valid_requests,
-      std::vector<RequestRecord>& all_request_records)
-  {
-    InferenceProfiler inference_profiler{};
-    inference_profiler.all_request_records_ = all_request_records;
-    inference_profiler.ValidLatencyMeasurement(
-        valid_range, valid_sequence_count, delayed_request_count, latencies,
-        response_count, valid_requests);
-  }
-
-  static std::tuple<uint64_t, uint64_t> GetMeanAndStdDev(
-      const std::vector<uint64_t>& latencies)
-  {
-    InferenceProfiler inference_profiler{};
-    return inference_profiler.GetMeanAndStdDev(latencies);
-  }
-
-  void SummarizeSendRequestRate(
-      const double window_duration_s, const size_t num_sent_requests,
-      PerfStatus& summary)
-  {
-    InferenceProfiler::SummarizeSendRequestRate(
-        window_duration_s, num_sent_requests, summary);
-  }
-
-  static bool TestCheckWithinThreshold(
-      LoadStatus& ls, LoadParams& lp, uint64_t latency_threshold_ms)
-  {
-    InferenceProfiler ip;
-    size_t idx = ls.infer_per_sec.size() - lp.stability_window;
-    ip.latency_threshold_ms_ = latency_threshold_ms;
-
-    return ip.CheckWithinThreshold(idx, ls);
-  }
-
-  static bool TestCheckWindowForStability(LoadStatus& ls, LoadParams& lp)
-  {
-    size_t idx = ls.infer_per_sec.size() - lp.stability_window;
-
-    InferenceProfiler ip;
-    ip.load_parameters_.stability_threshold = lp.stability_threshold;
-    ip.load_parameters_.stability_window = lp.stability_window;
-
-    return ip.CheckWindowForStability(idx, ls, true);
-  };
-
-  static bool TestDetermineStability(
-      LoadStatus& ls, LoadParams& lp, bool check_latency = true)
-  {
-    InferenceProfiler ip;
-    ip.load_parameters_.stability_threshold = lp.stability_threshold;
-    ip.load_parameters_.stability_window = lp.stability_window;
-
-    return ip.DetermineStability(ls, check_latency);
-  }
-
-  static bool TestIsDoneProfiling(
-      LoadStatus& ls, LoadParams& lp, uint64_t latency_threshold_ms)
-  {
-    InferenceProfiler ip;
-    ip.load_parameters_.stability_threshold = lp.stability_threshold;
-    ip.load_parameters_.stability_window = lp.stability_window;
-    ip.latency_threshold_ms_ = latency_threshold_ms;
-    ip.mpi_driver_ = std::make_shared<triton::perfanalyzer::MPIDriver>(false);
-
-    bool is_stable = ip.DetermineStability(ls);
-    return ip.IsDoneProfiling(ls, &is_stable);
-  };
-
-  std::pair<uint64_t, uint64_t> ClampWindow(std::vector<RequestRecord>& reqs)
-  {
-    return InferenceProfiler::ClampWindow(reqs);
-  }
-
-  cb::Error MergeMetrics(
-      const std::vector<std::reference_wrapper<const Metrics>>& all_metrics,
-      Metrics& merged_metrics)
-  {
-    return InferenceProfiler::MergeMetrics(all_metrics, merged_metrics);
-  }
-
-  template <typename T>
-  void GetMetricAveragePerGPU(
-      const std::vector<std::reference_wrapper<const std::map<std::string, T>>>&
-          input_metric_maps,
-      std::map<std::string, T>& output_metric_map)
-  {
-    InferenceProfiler::GetMetricAveragePerGPU<T>(
-        input_metric_maps, output_metric_map);
-  }
-
-  template <typename T>
-  void GetMetricMaxPerGPU(
-      const std::vector<std::reference_wrapper<const std::map<std::string, T>>>&
-          input_metric_maps,
-      std::map<std::string, T>& output_metric_map)
-  {
-    InferenceProfiler::GetMetricMaxPerGPU<T>(
-        input_metric_maps, output_metric_map);
-  }
-
-  template <typename T>
-  void GetMetricFirstPerGPU(
-      const std::vector<std::reference_wrapper<const std::map<std::string, T>>>&
-          input_metric_maps,
-      std::map<std::string, T>& output_metric_map)
-  {
-    InferenceProfiler::GetMetricFirstPerGPU<T>(
-        input_metric_maps, output_metric_map);
-  }
-
-  void SummarizeOverhead(
-      const uint64_t window_duration_ns, const uint64_t idle_ns,
-      PerfStatus& summary)
-  {
-    InferenceProfiler::SummarizeOverhead(window_duration_ns, idle_ns, summary);
-  }
-
-
-  cb::Error DetermineStatsModelVersion(
-      const cb::ModelIdentifier& model_identifier,
-      const std::map<cb::ModelIdentifier, cb::ModelStatistics>& start_stats,
-      const std::map<cb::ModelIdentifier, cb::ModelStatistics>& end_stats,
-      int64_t* model_version)
-  {
-    return InferenceProfiler::DetermineStatsModelVersion(
-        model_identifier, start_stats, end_stats, model_version);
-  }
-
-  cb::Error SetTopLevelResponseCaching(bool enable_top_level_response_caching)
-  {
-    return InferenceProfiler::SetTopLevelResponseCaching(
-        enable_top_level_response_caching);
-  }
-};
-
-
-TEST_CASE("testing the ValidLatencyMeasurement function")
-{
-  size_t valid_sequence_count{};
-  size_t delayed_request_count{};
-  std::vector<uint64_t> latencies{};
-  size_t response_count{};
-  std::vector<RequestRecord> valid_requests{};
-
-  const std::pair<uint64_t, uint64_t> window{4, 17};
-  using time_point = std::chrono::time_point<std::chrono::system_clock>;
-  using ns = std::chrono::nanoseconds;
-  std::vector<RequestRecord> all_request_records{
-      // request ends before window starts, this should not be possible to exist
-      // in the vector of requests, but if it is, we exclude it: not included in
-      // current window
-      RequestRecord(
-          time_point(ns(1)), std::vector<time_point>{time_point(ns(2))}, {}, {},
-          0, false, 0, false),
-
-      // request starts before window starts and ends inside window: included in
-      // current window
-      RequestRecord(
-          time_point(ns(3)), std::vector<time_point>{time_point(ns(5))}, {}, {},
-          0, false, 0, false),
-
-      // requests start and end inside window: included in current window
-      RequestRecord(
-          time_point(ns(6)), std::vector<time_point>{time_point(ns(9))}, {}, {},
-          0, false, 0, false),
-      RequestRecord(
-          time_point(ns(10)), std::vector<time_point>{time_point(ns(14))}, {},
-          {}, 0, false, 0, false),
-
-      // request starts before window ends and ends after window ends: not
-      // included in current window
-      RequestRecord(
-          time_point(ns(15)), std::vector<time_point>{time_point(ns(20))}, {},
-          {}, 0, false, 0, false),
-
-      // request starts after window ends: not included in current window
-      RequestRecord(
-          time_point(ns(21)), std::vector<time_point>{time_point(ns(27))}, {},
-          {}, 0, false, 0, false)};
-
-  TestInferenceProfiler::ValidLatencyMeasurement(
-      window, valid_sequence_count, delayed_request_count, &latencies,
-      response_count, valid_requests, all_request_records);
-
-  const auto& convert_request_record_to_latency{[](RequestRecord t) {
-    return CHRONO_TO_NANOS(t.response_timestamps_.back()) -
-           CHRONO_TO_NANOS(t.start_time_);
-  }};
-
-  CHECK(latencies.size() == 3);
-  CHECK(
-      latencies[0] ==
-      convert_request_record_to_latency(all_request_records[1]));
-  CHECK(
-      latencies[1] ==
-      convert_request_record_to_latency(all_request_records[2]));
-  CHECK(
-      latencies[2] ==
-      convert_request_record_to_latency(all_request_records[3]));
-}
-
-TEST_CASE("test_check_window_for_stability")
-{
-  LoadStatus ls;
-  LoadParams lp;
-
-  SUBCASE("test throughput not stable")
-  {
-    ls.infer_per_sec = {1.0, 1000.0, 500.0};
-    ls.latencies = {1, 1, 1};
-    lp.stability_window = 3;
-    lp.stability_threshold = 0.1;
-    CHECK(TestInferenceProfiler::TestCheckWindowForStability(ls, lp) == false);
-  }
-  SUBCASE("test throughput stable")
-  {
-    ls.infer_per_sec = {500.0, 520.0, 510.0};
-    ls.latencies = {1, 1, 1};
-    lp.stability_window = 3;
-    lp.stability_threshold = 0.1;
-    CHECK(TestInferenceProfiler::TestCheckWindowForStability(ls, lp) == true);
-  }
-  SUBCASE("test latency not stable")
-  {
-    ls.infer_per_sec = {500.0, 520.0, 510.0};
-    ls.latencies = {100, 106, 112};
-    lp.stability_window = 3;
-    lp.stability_threshold = 0.1;
-    CHECK(TestInferenceProfiler::TestCheckWindowForStability(ls, lp) == false);
-  }
-  SUBCASE("test latency stable")
-  {
-    ls.infer_per_sec = {500.0, 520.0, 510.0};
-    ls.latencies = {100, 104, 108};
-    lp.stability_window = 3;
-    lp.stability_threshold = 0.1;
-    CHECK(TestInferenceProfiler::TestCheckWindowForStability(ls, lp) == true);
-  }
-  SUBCASE("test throughput stable after many measurements")
-  {
-    ls.infer_per_sec = {1.0, 1000.0, 500.0, 1500.0, 500.0, 520.0, 510.0};
-    ls.latencies = {1, 1, 1, 1, 1, 1, 1};
-    lp.stability_window = 3;
-    lp.stability_threshold = 0.1;
-    CHECK(TestInferenceProfiler::TestCheckWindowForStability(ls, lp) == true);
-  }
-  SUBCASE("test stability window of 5")
-  {
-    ls.infer_per_sec = {500.0, 520.0, 510.0, 505.0, 515.0};
-    ls.latencies = {100, 104, 108, 102, 106};
-    lp.stability_window = 5;
-    lp.stability_threshold = 0.1;
-    CHECK(TestInferenceProfiler::TestCheckWindowForStability(ls, lp) == true);
-  }
-  SUBCASE("test not stable in 5 but stable in 3")
-  {
-    ls.infer_per_sec = {1.0, 1000.0, 510.0, 505.0, 515.0};
-    ls.latencies = {100, 104, 108, 102, 106};
-    lp.stability_window = 5;
-    lp.stability_threshold = 0.1;
-    CHECK(TestInferenceProfiler::TestCheckWindowForStability(ls, lp) == false);
-  }
-  SUBCASE("test stability window of 2")
-  {
-    ls.infer_per_sec = {500.0, 1000.0, 1.0, 505.0, 515.0};
-    ls.latencies = {100, 104, 108, 102, 106};
-    lp.stability_window = 2;
-    lp.stability_threshold = 0.1;
-    CHECK(TestInferenceProfiler::TestCheckWindowForStability(ls, lp) == true);
-  }
-}
-
-TEST_CASE("test check within threshold")
-{
-  LoadStatus ls;
-  LoadParams lp;
-
-  ls.infer_per_sec = {500.0, 520.0, 510.0};
-  lp.stability_window = 3;
-  lp.stability_threshold = 0.1;
-  uint64_t latency_threshold_ms = 1;
-
-  SUBCASE("test not within threshold")
-  {
-    ls.latencies = {2000000, 2000000, 2000000};
-    CHECK(
-        TestInferenceProfiler::TestCheckWithinThreshold(
-            ls, lp, latency_threshold_ms) == false);
-  }
-
-  SUBCASE("test within threshold")
-  {
-    ls.latencies = {100000, 100000, 100000};
-    CHECK(
-        TestInferenceProfiler::TestCheckWithinThreshold(
-            ls, lp, latency_threshold_ms) == true);
-  }
-}
-
-TEST_CASE("test_determine_stability")
-{
-  LoadStatus ls;
-  LoadParams lp;
-
-  SUBCASE("test inference equals zero")
-  {
-    ls.infer_per_sec = {500.0, 0.0, 510.0};
-    ls.latencies = {1, 1, 1};
-    lp.stability_window = 3;
-    lp.stability_threshold = 0.1;
-    uint64_t latency_threshold_ms = 1;
-    CHECK(TestInferenceProfiler::TestDetermineStability(ls, lp) == false);
-
-    ls.infer_per_sec = {500.0, 520.0, 510.0};
-    CHECK(TestInferenceProfiler::TestDetermineStability(ls, lp) == true);
-  }
-
-  SUBCASE("test determine stability without latency check")
-  {
-    ls.infer_per_sec = {500.0, 520.0, 510.0};
-    ls.latencies = {100, 106, 112};
-    lp.stability_window = 3;
-    lp.stability_threshold = 0.1;
-    uint64_t latency_threshold_ms = 1;
-    CHECK(TestInferenceProfiler::TestDetermineStability(ls, lp, false) == true);
-  }
-}
-
-TEST_CASE("test_is_done_profiling")
-{
-  LoadStatus ls;
-  LoadParams lp;
-
-
-  SUBCASE("test latency_threshold is NO_LIMIT")
-  {
-    ls.infer_per_sec = {1.0, 1000.0, 500.0};
-    ls.latencies = {1, 1, 1};
-    lp.stability_window = 3;
-    lp.stability_threshold = 0.1;
-    uint64_t latency_threshold_ms = NO_LIMIT;
-
-    CHECK(
-        TestInferenceProfiler::TestIsDoneProfiling(
-            ls, lp, latency_threshold_ms) == false);
-  }
-
-  SUBCASE("test not within threshold from done profiling")
-  {
-    ls.infer_per_sec = {1.0, 1000.0, 500.0};
-    ls.latencies = {2000000, 2000000, 2000000};
-    lp.stability_window = 3;
-    lp.stability_threshold = 0.1;
-    uint64_t latency_threshold_ms = 1;
-    CHECK(
-        TestInferenceProfiler::TestIsDoneProfiling(
-            ls, lp, latency_threshold_ms) == true);
-  }
-
-  SUBCASE("test stability from is done profiling")
-  {
-    ls.infer_per_sec = {1.0, 1000.0, 500.0};
-    ls.latencies = {1, 1, 1};
-    lp.stability_window = 3;
-    lp.stability_threshold = 0.1;
-    uint64_t latency_threshold_ms = 1;
-
-    CHECK(
-        TestInferenceProfiler::TestIsDoneProfiling(
-            ls, lp, latency_threshold_ms) == false);
-    ls.infer_per_sec = {500.0, 520.0, 510.0};
-
-    CHECK(
-        TestInferenceProfiler::TestIsDoneProfiling(
-            ls, lp, latency_threshold_ms) == true);
-  }
-
-  SUBCASE("test underflow")
-  {
-    ls.infer_per_sec = {500.0, 510.0};
-    ls.latencies = {1, 1};
-    lp.stability_window = 3;
-    lp.stability_threshold = 0.1;
-    uint64_t latency_threshold_ms = 1;
-    CHECK(
-        TestInferenceProfiler::TestIsDoneProfiling(
-            ls, lp, latency_threshold_ms) == false);
-  }
-}
-
-TEST_CASE("test mocking")
-{
-  using testing::AtLeast;
-  using testing::Return;
-  MockInferenceProfiler mip;
-
-  EXPECT_CALL(mip, IncludeServerStats())
-      .Times(AtLeast(1))
-      .WillOnce(Return(false));
-
-  CHECK(mip.IncludeServerStats() == false);
-}
-
-TEST_CASE("testing the GetMeanAndStdDev function")
-{
-  uint64_t avg_latency_ns{0};
-  uint64_t std_dev_latency_us{0};
-
-  SUBCASE("calculation using small latencies")
-  {
-    std::vector<uint64_t> latencies{100000, 200000, 50000};
-    std::tie(avg_latency_ns, std_dev_latency_us) =
-        TestInferenceProfiler::GetMeanAndStdDev(latencies);
-    CHECK(avg_latency_ns == 116666);
-    CHECK(std_dev_latency_us == 76);
-  }
-
-  SUBCASE("calculation using big latencies")
-  {
-    // Squaring these would exceed UINT64_MAX.
-    std::vector<uint64_t> latencies{4300000000, 4400000000, 5000000000};
-    std::tie(avg_latency_ns, std_dev_latency_us) =
-        TestInferenceProfiler::GetMeanAndStdDev(latencies);
-    CHECK(avg_latency_ns == 4566666666);
-    CHECK(std_dev_latency_us == 378593);
-  }
-
-  SUBCASE("calculation using one latency")
-  {
-    // Edge case should set standard deviation to near infinity
-    std::vector<uint64_t> latencies{100};
-    std::tie(avg_latency_ns, std_dev_latency_us) =
-        TestInferenceProfiler::GetMeanAndStdDev(latencies);
-    CHECK(avg_latency_ns == 100);
-    CHECK(std_dev_latency_us == UINT64_MAX);
-  }
-}
-
-TEST_CASE("testing the MergeMetrics function")
-{
-  TestInferenceProfiler tip{};
-  Metrics metrics_1{}, metrics_2{}, merged_metrics{};
-
-  SUBCASE("all metrics present")
-  {
-    metrics_1.gpu_utilization_per_gpu["gpu0"] = 0.45;
-    metrics_2.gpu_utilization_per_gpu["gpu0"] = 0.52;
-
-    metrics_1.gpu_power_usage_per_gpu["gpu0"] = 70.0;
-    metrics_2.gpu_power_usage_per_gpu["gpu0"] = 84.5;
-
-    metrics_1.gpu_memory_used_bytes_per_gpu["gpu0"] = 10000;
-    metrics_2.gpu_memory_used_bytes_per_gpu["gpu0"] = 12000;
-
-    metrics_1.gpu_memory_total_bytes_per_gpu["gpu0"] = 100000;
-    metrics_2.gpu_memory_total_bytes_per_gpu["gpu0"] = 100000;
-
-    const std::vector<std::reference_wrapper<const Metrics>> all_metrics{
-        metrics_1, metrics_2};
-
-    tip.MergeMetrics(all_metrics, merged_metrics);
-    CHECK(merged_metrics.gpu_utilization_per_gpu.size() == 1);
-    CHECK(merged_metrics.gpu_power_usage_per_gpu.size() == 1);
-    CHECK(merged_metrics.gpu_memory_used_bytes_per_gpu.size() == 1);
-    CHECK(merged_metrics.gpu_memory_total_bytes_per_gpu.size() == 1);
-    CHECK(
-        merged_metrics.gpu_utilization_per_gpu["gpu0"] ==
-        doctest::Approx(0.485));
-    CHECK(
-        merged_metrics.gpu_power_usage_per_gpu["gpu0"] ==
-        doctest::Approx(77.25));
-    CHECK(merged_metrics.gpu_memory_used_bytes_per_gpu["gpu0"] == 12000);
-    CHECK(merged_metrics.gpu_memory_total_bytes_per_gpu["gpu0"] == 100000);
-  }
-
-  SUBCASE("missing multiple metrics")
-  {
-    metrics_1.gpu_utilization_per_gpu["gpu0"] = 0.45;
-    metrics_2.gpu_utilization_per_gpu["gpu0"] = 0.52;
-
-    metrics_1.gpu_memory_used_bytes_per_gpu["gpu0"] = 10000;
-    metrics_2.gpu_memory_used_bytes_per_gpu["gpu0"] = 12000;
-
-    const std::vector<std::reference_wrapper<const Metrics>> all_metrics{
-        metrics_1, metrics_2};
-
-    tip.MergeMetrics(all_metrics, merged_metrics);
-    CHECK(merged_metrics.gpu_utilization_per_gpu.size() == 1);
-    CHECK(merged_metrics.gpu_power_usage_per_gpu.size() == 0);
-    CHECK(merged_metrics.gpu_memory_used_bytes_per_gpu.size() == 1);
-    CHECK(merged_metrics.gpu_memory_total_bytes_per_gpu.size() == 0);
-    CHECK(
-        merged_metrics.gpu_utilization_per_gpu["gpu0"] ==
-        doctest::Approx(0.485));
-    CHECK(merged_metrics.gpu_memory_used_bytes_per_gpu["gpu0"] == 12000);
-  }
-}
-
-TEST_CASE("testing the GetMetricAveragePerGPU function")
-{
-  TestInferenceProfiler tip{};
-  std::map<std::string, double> metric_averages{};
-
-  SUBCASE("all GPUs present")
-  {
-    const std::map<std::string, double> metric_1{
-        {"gpu0", 0.45}, {"gpu1", 0.23}},
-        metric_2{{"gpu0", 0.52}, {"gpu1", 0.27}},
-        metric_3{{"gpu0", 0.56}, {"gpu1", 0.30}};
-
-    const std::vector<
-        std::reference_wrapper<const std::map<std::string, double>>>
-        all_metrics{metric_1, metric_2, metric_3};
-
-    tip.GetMetricAveragePerGPU<double>(all_metrics, metric_averages);
-
-    CHECK(metric_averages.size() == 2);
-    CHECK(metric_averages["gpu0"] == doctest::Approx(0.51));
-    CHECK(metric_averages["gpu1"] == doctest::Approx(0.26666));
-  }
-
-  SUBCASE("missing one GPU from one metric")
-  {
-    const std::map<std::string, double> metric_1{
-        {"gpu0", 0.45}, {"gpu1", 0.23}},
-        metric_2{{"gpu0", 0.52}}, metric_3{{"gpu0", 0.56}, {"gpu1", 0.30}};
-
-    const std::vector<
-        std::reference_wrapper<const std::map<std::string, double>>>
-        all_metrics{metric_1, metric_2, metric_3};
-
-    tip.GetMetricAveragePerGPU<double>(all_metrics, metric_averages);
-
-    CHECK(metric_averages.size() == 2);
-    CHECK(metric_averages["gpu0"] == doctest::Approx(0.51));
-    CHECK(metric_averages["gpu1"] == doctest::Approx(0.265));
-  }
-}
-
-TEST_CASE("testing the GetMetricMaxPerGPU function")
-{
-  TestInferenceProfiler tip{};
-  std::map<std::string, uint64_t> metric_maxes{};
-
-  SUBCASE("all GPUs present")
-  {
-    const std::map<std::string, uint64_t> metric_1{{"gpu0", 10}, {"gpu1", 55}},
-        metric_2{{"gpu0", 12}, {"gpu1", 84}},
-        metric_3{{"gpu0", 15}, {"gpu1", 47}};
-
-    const std::vector<
-        std::reference_wrapper<const std::map<std::string, uint64_t>>>
-        all_metrics{metric_1, metric_2, metric_3};
-
-    tip.GetMetricMaxPerGPU<uint64_t>(all_metrics, metric_maxes);
-
-    CHECK(metric_maxes.size() == 2);
-    CHECK(metric_maxes["gpu0"] == 15);
-    CHECK(metric_maxes["gpu1"] == 84);
-  }
-
-  SUBCASE("missing one GPU from one metric")
-  {
-    const std::map<std::string, uint64_t> metric_1{{"gpu0", 10}, {"gpu1", 55}},
-        metric_2{{"gpu0", 12}}, metric_3{{"gpu0", 15}, {"gpu1", 47}};
-
-    const std::vector<
-        std::reference_wrapper<const std::map<std::string, uint64_t>>>
-        all_metrics{metric_1, metric_2, metric_3};
-
-    tip.GetMetricMaxPerGPU<uint64_t>(all_metrics, metric_maxes);
-
-    CHECK(metric_maxes.size() == 2);
-    CHECK(metric_maxes["gpu0"] == 15);
-    CHECK(metric_maxes["gpu1"] == 55);
-  }
-}
-
-TEST_CASE("testing the GetMetricFirstPerGPU function")
-{
-  TestInferenceProfiler tip{};
-  std::map<std::string, uint64_t> metric_firsts{};
-
-  SUBCASE("all GPUs present")
-  {
-    const std::map<std::string, uint64_t> metric_1{{"gpu0", 10}, {"gpu1", 55}},
-        metric_2{{"gpu0", 12}, {"gpu1", 84}},
-        metric_3{{"gpu0", 15}, {"gpu1", 47}};
-
-    const std::vector<
-        std::reference_wrapper<const std::map<std::string, uint64_t>>>
-        all_metrics{metric_1, metric_2, metric_3};
-
-    tip.GetMetricFirstPerGPU<uint64_t>(all_metrics, metric_firsts);
-
-    CHECK(metric_firsts.size() == 2);
-    CHECK(metric_firsts["gpu0"] == 10);
-    CHECK(metric_firsts["gpu1"] == 55);
-  }
-
-  SUBCASE("missing one GPU from one metric")
-  {
-    const std::map<std::string, uint64_t> metric_1{{"gpu0", 10}},
-        metric_2{{"gpu0", 12}, {"gpu1", 84}},
-        metric_3{{"gpu0", 15}, {"gpu1", 47}};
-
-    const std::vector<
-        std::reference_wrapper<const std::map<std::string, uint64_t>>>
-        all_metrics{metric_1, metric_2, metric_3};
-
-    tip.GetMetricFirstPerGPU<uint64_t>(all_metrics, metric_firsts);
-
-    CHECK(metric_firsts.size() == 2);
-    CHECK(metric_firsts["gpu0"] == 10);
-    CHECK(metric_firsts["gpu1"] == 84);
-  }
-}
-
-TEST_CASE("test the ReportPrometheusMetrics function")
-{
-  Metrics metrics{};
-  std::stringstream captured_cout;
-  std::streambuf* old_cout{std::cout.rdbuf(captured_cout.rdbuf())};
-
-  SUBCASE("regular output")
-  {
-    metrics.gpu_utilization_per_gpu["gpu0"] = 0.45;
-    metrics.gpu_utilization_per_gpu["gpu1"] = 0.52;
-
-    metrics.gpu_power_usage_per_gpu["gpu0"] = 70.0;
-    metrics.gpu_power_usage_per_gpu["gpu1"] = 84.5;
-
-    metrics.gpu_memory_used_bytes_per_gpu["gpu0"] = 10000;
-    metrics.gpu_memory_used_bytes_per_gpu["gpu1"] = 12000;
-
-    metrics.gpu_memory_total_bytes_per_gpu["gpu0"] = 100000;
-    metrics.gpu_memory_total_bytes_per_gpu["gpu1"] = 100000;
-
-    cb::Error result{ReportPrometheusMetrics(metrics)};
-
-    std::cout.rdbuf(old_cout);
-
-    CHECK(result.Err() == SUCCESS);
-    CHECK(
-        captured_cout.str() ==
-        "    Avg GPU Utilization:\n"
-        "      gpu0 : 45%\n"
-        "      gpu1 : 52%\n"
-        "    Avg GPU Power Usage:\n"
-        "      gpu0 : 70 watts\n"
-        "      gpu1 : 84.5 watts\n"
-        "    Max GPU Memory Usage:\n"
-        "      gpu0 : 10000 bytes\n"
-        "      gpu1 : 12000 bytes\n"
-        "    Total GPU Memory:\n"
-        "      gpu0 : 100000 bytes\n"
-        "      gpu1 : 100000 bytes\n");
-  }
-
-  SUBCASE("too many GPUs")
-  {
-    const size_t num_gpus{17};
-    for (size_t gpu_idx{0}; gpu_idx < num_gpus; gpu_idx++) {
-      const auto& gpu_key{"gpu" + std::to_string(gpu_idx)};
-      metrics.gpu_utilization_per_gpu[gpu_key] = 0.5;
-      metrics.gpu_power_usage_per_gpu[gpu_key] = 75.5;
-      metrics.gpu_memory_used_bytes_per_gpu[gpu_key] = 12500;
-      metrics.gpu_memory_total_bytes_per_gpu[gpu_key] = 150000;
-    }
-
-    cb::Error result{ReportPrometheusMetrics(metrics)};
-
-    std::cout.rdbuf(old_cout);
-
-    CHECK(result.Err() == SUCCESS);
-    CHECK(
-        captured_cout.str() ==
-        "Too many GPUs on system to print out individual Prometheus metrics, "
-        "use the CSV output feature to see metrics.\n");
-  }
-}
-
-TEST_CASE("InferenceProfiler: Test SummarizeOverhead")
-{
-  TestInferenceProfiler tip{};
-  PerfStatus status;
-  SUBCASE("normal")
-  {
-    tip.SummarizeOverhead(100, 63, status);
-    CHECK(status.overhead_pct == doctest::Approx(37));
-  }
-  SUBCASE("normal 2")
-  {
-    tip.SummarizeOverhead(234, 56, status);
-    CHECK(status.overhead_pct == doctest::Approx(76.068));
-  }
-  SUBCASE("overflow")
-  {
-    tip.SummarizeOverhead(100, 101, status);
-    CHECK(status.overhead_pct == doctest::Approx(0));
-  }
-}
-
-TEST_CASE(
-    "summarize_send_request_rate: testing the SummarizeSendRequestRate "
-    "function")
-{
-  TestInferenceProfiler tip{};
-  PerfStatus perf_status;
-
-  SUBCASE("invalid zero window duration")
-  {
-    double window_duration_s{0.0};
-    size_t num_sent_requests{0};
-    CHECK_THROWS_WITH_AS(
-        tip.SummarizeSendRequestRate(
-            window_duration_s, num_sent_requests, perf_status),
-        "window_duration_s must be positive", std::runtime_error);
-  }
-
-  SUBCASE("invalid negative window duration")
-  {
-    double window_duration_s{-1.0};
-    size_t num_sent_requests{0};
-    CHECK_THROWS_WITH_AS(
-        tip.SummarizeSendRequestRate(
-            window_duration_s, num_sent_requests, perf_status),
-        "window_duration_s must be positive", std::runtime_error);
-  }
-
-  SUBCASE("regular case")
-  {
-    double window_duration_s{2.0};
-    size_t num_sent_requests{100};
-    tip.SummarizeSendRequestRate(
-        window_duration_s, num_sent_requests, perf_status);
-    CHECK(perf_status.send_request_rate == doctest::Approx(50));
-  }
-}
-
-TEST_CASE("determine_stats_model_version: testing DetermineStatsModelVersion()")
-{
-  TestInferenceProfiler tip{};
-  cb::ModelIdentifier model_identifier;
-  cb::ModelStatistics old_stats;
-  cb::ModelStatistics new_stats;
-  old_stats.queue_count_ = 1;
-  new_stats.queue_count_ = 2;
-
-  int64_t expected_model_version;
-  bool expect_warning = false;
-  bool expect_exception = false;
-
-  std::map<cb::ModelIdentifier, cb::ModelStatistics> start_stats_map;
-  std::map<cb::ModelIdentifier, cb::ModelStatistics> end_stats_map;
-
-  SUBCASE("One entry - unspecified - valid and in start")
-  {
-    model_identifier = {"ModelA", ""};
-    start_stats_map.insert({{"ModelA", "3"}, old_stats});
-    end_stats_map.insert({{"ModelA", "3"}, new_stats});
-    expected_model_version = 3;
-  }
-  SUBCASE("One entry - unspecified - valid and not in start")
-  {
-    model_identifier = {"ModelA", ""};
-    end_stats_map.insert({{"ModelA", "3"}, new_stats});
-    expected_model_version = 3;
-  }
-  SUBCASE("One entry - unspecified - invalid")
-  {
-    model_identifier = {"ModelA", ""};
-    start_stats_map.insert({{"ModelA", "3"}, old_stats});
-    end_stats_map.insert({{"ModelA", "3"}, old_stats});
-    expect_exception = true;
-    expected_model_version = -1;
-  }
-  SUBCASE("One entry - match")
-  {
-    model_identifier = {"ModelA", "3"};
-    end_stats_map.insert({{"ModelA", "3"}, new_stats});
-    expected_model_version = 3;
-  }
-  SUBCASE("One entry - miss")
-  {
-    model_identifier = {"ModelA", "2"};
-    end_stats_map.insert({{"ModelA", "3"}, new_stats});
-    expect_exception = true;
-    expected_model_version = -1;
-  }
-  SUBCASE("Two entries - unspecified case 1")
-  {
-    model_identifier = {"ModelA", ""};
-    start_stats_map.insert({{"ModelA", "3"}, old_stats});
-    start_stats_map.insert({{"ModelA", "4"}, old_stats});
-    end_stats_map.insert({{"ModelA", "3"}, new_stats});
-    end_stats_map.insert({{"ModelA", "4"}, old_stats});
-    expected_model_version = 3;
-  }
-  SUBCASE("Two entries - unspecified case 2")
-  {
-    model_identifier = {"ModelA", ""};
-    start_stats_map.insert({{"ModelA", "3"}, old_stats});
-    start_stats_map.insert({{"ModelA", "4"}, old_stats});
-    end_stats_map.insert({{"ModelA", "3"}, old_stats});
-    end_stats_map.insert({{"ModelA", "4"}, new_stats});
-    expected_model_version = 4;
-  }
-  SUBCASE("Two entries - unspecified case 3")
-  {
-    model_identifier = {"ModelA", ""};
-    start_stats_map.insert({{"ModelA", "3"}, old_stats});
-    start_stats_map.insert({{"ModelA", "4"}, old_stats});
-    end_stats_map.insert({{"ModelA", "3"}, new_stats});
-    end_stats_map.insert({{"ModelA", "4"}, new_stats});
-    expected_model_version = 4;
-    expect_warning = 1;
-  }
-  SUBCASE("Two entries - specified hit")
-  {
-    model_identifier = {"ModelA", "3"};
-    end_stats_map.insert({{"ModelA", "3"}, old_stats});
-    end_stats_map.insert({{"ModelA", "4"}, old_stats});
-    expected_model_version = 3;
-  }
-  SUBCASE("Two entries - specified miss")
-  {
-    model_identifier = {"ModelA", "2"};
-    end_stats_map.insert({{"ModelA", "3"}, old_stats});
-    end_stats_map.insert({{"ModelA", "4"}, old_stats});
-    expected_model_version = -1;
-    expect_exception = true;
-  }
-
-  SUBCASE("One entry - version -1 - valid and in start")
-  {
-    model_identifier = {"ModelA", "-1"};
-    start_stats_map.insert({{"ModelA", "3"}, old_stats});
-    end_stats_map.insert({{"ModelA", "3"}, new_stats});
-    cb::Error status = tip.SetTopLevelResponseCaching(true);
-    CHECK(status.IsOk());
-    expected_model_version = -1;
-  }
-
-  SUBCASE("One entry - version -1 - not valid")
-  {
-    model_identifier = {"ModelA", "-1"};
-    end_stats_map.insert({{"ModelA", "3"}, old_stats});
-    cb::Error status = tip.SetTopLevelResponseCaching(false);
-    CHECK(status.IsOk());
-    expected_model_version = -1;
-    expect_exception = true;
-  }
-
-  std::stringstream captured_cerr;
-  std::streambuf* old = std::cerr.rdbuf(captured_cerr.rdbuf());
-
-  int64_t result_model_version;
-  cb::Error result;
-  result = tip.DetermineStatsModelVersion(
-      model_identifier, start_stats_map, end_stats_map, &result_model_version);
-
-  CHECK(result_model_version == expected_model_version);
-  CHECK(result.IsOk() != expect_exception);
-  CHECK(captured_cerr.str().empty() != expect_warning);
-
-  std::cerr.rdbuf(old);
-}
-
-TEST_CASE(
-    "valid_latency_measurement: testing the ValidLatencyMeasurement function")
-{
-  MockInferenceProfiler mock_inference_profiler{};
-
-  SUBCASE("testing logic relevant to response throughput metric")
-  {
-    auto clock_epoch{std::chrono::time_point<std::chrono::system_clock>()};
-
-    auto request1_timestamp{clock_epoch + std::chrono::nanoseconds(1)};
-    auto response1_timestamp{clock_epoch + std::chrono::nanoseconds(2)};
-    auto response2_timestamp{clock_epoch + std::chrono::nanoseconds(3)};
-    auto request_record1{RequestRecord(
-        request1_timestamp,
-        std::vector<std::chrono::time_point<std::chrono::system_clock>>{
-            response1_timestamp, response2_timestamp},
-        {}, {}, 0, false, 0, false)};
-
-    auto request2_timestamp{clock_epoch + std::chrono::nanoseconds(4)};
-    RequestRecord request_record2{};
-    size_t expected_response_count{0};
-
-    SUBCASE("second request has three data responses")
-    {
-      auto response3_timestamp{clock_epoch + std::chrono::nanoseconds(5)};
-      auto response4_timestamp{clock_epoch + std::chrono::nanoseconds(6)};
-      auto response5_timestamp{clock_epoch + std::chrono::nanoseconds(7)};
-      request_record2 = RequestRecord(
-          request2_timestamp,
-          std::vector<std::chrono::time_point<std::chrono::system_clock>>{
-              response3_timestamp, response4_timestamp, response5_timestamp},
-          {}, {}, 0, false, 0, false);
-      expected_response_count = 5;
-    }
-    SUBCASE("second request has two data responses and one null response")
-    {
-      auto response3_timestamp{clock_epoch + std::chrono::nanoseconds(5)};
-      auto response4_timestamp{clock_epoch + std::chrono::nanoseconds(6)};
-      auto response5_timestamp{clock_epoch + std::chrono::nanoseconds(7)};
-      request_record2 = RequestRecord(
-          request2_timestamp,
-          std::vector<std::chrono::time_point<std::chrono::system_clock>>{
-              response3_timestamp, response4_timestamp, response5_timestamp},
-          {}, {}, 0, false, 0, true);
-      expected_response_count = 4;
-    }
-    SUBCASE("second request has one null response")
-    {
-      request_record2 = RequestRecord(
-          request2_timestamp,
-          std::vector<std::chrono::time_point<std::chrono::system_clock>>{}, {},
-          {}, 0, false, 0, true);
-      expected_response_count = 2;
-    }
-
-    mock_inference_profiler.all_request_records_ = {
-        request_record1, request_record2};
-
-    const std::pair<uint64_t, uint64_t> valid_range{
-        std::make_pair(0, UINT64_MAX)};
-    size_t valid_sequence_count{0};
-    size_t delayed_request_count{0};
-    std::vector<uint64_t> valid_latencies{};
-    size_t response_count{0};
-    std::vector<RequestRecord> valid_requests{};
-
-    mock_inference_profiler.ValidLatencyMeasurement(
-        valid_range, valid_sequence_count, delayed_request_count,
-        &valid_latencies, response_count, valid_requests);
-
-    CHECK(response_count == expected_response_count);
-  }
-  SUBCASE("testing logic relevant to valid request output")
-  {
-    auto clock_epoch{std::chrono::time_point<std::chrono::system_clock>()};
-
-    auto request1_timestamp{clock_epoch + std::chrono::nanoseconds(1)};
-    auto response1_timestamp{clock_epoch + std::chrono::nanoseconds(2)};
-    auto request_record1{RequestRecord(
-        request1_timestamp,
-        std::vector<std::chrono::time_point<std::chrono::system_clock>>{
-            response1_timestamp},
-        {}, {}, 0, false, 0, false)};
-
-    auto request2_timestamp{clock_epoch + std::chrono::nanoseconds(3)};
-    auto response2_timestamp{clock_epoch + std::chrono::nanoseconds(4)};
-    auto request_record2{RequestRecord(
-        request2_timestamp,
-        std::vector<std::chrono::time_point<std::chrono::system_clock>>{
-            response2_timestamp},
-        {}, {}, 0, false, 0, false)};
-
-    auto request3_timestamp{clock_epoch + std::chrono::nanoseconds(5)};
-    auto response3_timestamp{clock_epoch + std::chrono::nanoseconds(6)};
-    auto request_record3{RequestRecord(
-        request3_timestamp,
-        std::vector<std::chrono::time_point<std::chrono::system_clock>>{
-            response3_timestamp},
-        {}, {}, 0, false, 0, false)};
-
-    mock_inference_profiler.all_request_records_ = {
-        request_record1, request_record2, request_record3};
-
-    const std::pair<uint64_t, uint64_t> valid_range{std::make_pair(0, 4)};
-    size_t valid_sequence_count{0};
-    size_t delayed_request_count{0};
-    std::vector<uint64_t> valid_latencies{};
-    size_t response_count{0};
-    std::vector<RequestRecord> valid_requests{};
-
-    mock_inference_profiler.ValidLatencyMeasurement(
-        valid_range, valid_sequence_count, delayed_request_count,
-        &valid_latencies, response_count, valid_requests);
-
-    CHECK(valid_requests.size() == 2);
-    CHECK(valid_requests[0].start_time_ == request1_timestamp);
-    CHECK(valid_requests[1].start_time_ == request2_timestamp);
-  }
-}
-
-TEST_CASE(
-    "merge_perf_status_reports: testing the MergePerfStatusReports function")
-{
-  MockInferenceProfiler mock_inference_profiler{};
-
-  SUBCASE("testing logic relevant to response throughput metric")
-  {
-    PerfStatus perf_status1{};
-    perf_status1.client_stats.response_count = 8;
-    perf_status1.client_stats.duration_ns = 2000000000;
-
-    PerfStatus perf_status2{};
-    perf_status2.client_stats.response_count = 10;
-    perf_status2.client_stats.duration_ns = 4000000000;
-
-    std::deque<PerfStatus> perf_status{perf_status1, perf_status2};
-    PerfStatus summary_status{};
-
-    cb::Error error{};
-
-    EXPECT_CALL(
-        mock_inference_profiler, MergeServerSideStats(testing::_, testing::_))
-        .WillOnce(testing::Return(cb::Error::Success));
-    EXPECT_CALL(
-        mock_inference_profiler, SummarizeLatency(testing::_, testing::_))
-        .WillOnce(testing::Return(cb::Error::Success));
-
-    error = mock_inference_profiler.MergePerfStatusReports(
-        perf_status, summary_status);
-
-    REQUIRE(error.IsOk() == true);
-    CHECK(summary_status.client_stats.response_count == 18);
-    CHECK(
-        summary_status.client_stats.responses_per_sec == doctest::Approx(3.0));
-  }
-}
-
-TEST_CASE("clamp window")
-{
-  TestInferenceProfiler tip{};
-  std::vector<RequestRecord> reqs{};
-
-  auto clock_epoch{std::chrono::time_point<std::chrono::system_clock>()};
-
-  auto request1_timestamp{clock_epoch + std::chrono::nanoseconds(5)};
-  auto response1_timestamp{clock_epoch + std::chrono::nanoseconds(20)};
-
-  reqs.emplace_back(
-      request1_timestamp,
-      std::vector<std::chrono::time_point<std::chrono::system_clock>>{
-          response1_timestamp});
-
-  auto request2_timestamp{clock_epoch + std::chrono::nanoseconds(3)};
-  auto response2_timestamp{clock_epoch + std::chrono::nanoseconds(15)};
-  reqs.emplace_back(
-      request2_timestamp,
-      std::vector<std::chrono::time_point<std::chrono::system_clock>>{
-          response2_timestamp});
-
-  auto request3_timestamp{clock_epoch + std::chrono::nanoseconds(7)};
-  auto response3_timestamp{clock_epoch + std::chrono::nanoseconds(17)};
-  reqs.emplace_back(
-      request3_timestamp,
-      std::vector<std::chrono::time_point<std::chrono::system_clock>>{
-          response3_timestamp});
-
-  auto window = tip.ClampWindow(reqs);
-
-  CHECK(window.first == 3);
-  CHECK(window.second == 20);
-}
-
-TEST_CASE("summarize_client_stat: testing the SummarizeClientStat function")
-{
-  MockInferenceProfiler mock_inference_profiler{};
-
-  SUBCASE("testing logic relevant to response throughput metric")
-  {
-    mock_inference_profiler.parser_ = std::make_shared<MockModelParser>();
-    mock_inference_profiler.manager_ = std::make_unique<MockLoadManager>();
-
-    const cb::InferStat start_stat{};
-    const cb::InferStat end_stat{};
-    const uint64_t duration_ns{2000000000};
-    const size_t valid_request_count{0};
-    const size_t delayed_request_count{0};
-    const size_t valid_sequence_count{0};
-    const size_t response_count{8};
-    PerfStatus summary{};
-
-    cb::Error error{};
-
-    error = mock_inference_profiler.SummarizeClientStat(
-        start_stat, end_stat, duration_ns, valid_request_count,
-        delayed_request_count, valid_sequence_count, response_count, summary);
-
-    REQUIRE(error.IsOk() == true);
-    CHECK(summary.client_stats.response_count == 8);
-    CHECK(summary.client_stats.responses_per_sec == doctest::Approx(4.0));
-  }
-}
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/test_load_manager.cc b/src/c++/perf_analyzer/test_load_manager.cc
deleted file mode 100644
index 3908374ed..000000000
--- a/src/c++/perf_analyzer/test_load_manager.cc
+++ /dev/null
@@ -1,460 +0,0 @@
-// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "command_line_parser.h"
-#include "doctest.h"
-#include "load_manager.h"
-#include "test_load_manager_base.h"
-
-namespace cb = triton::perfanalyzer::clientbackend;
-
-namespace triton { namespace perfanalyzer {
-
-namespace {
-
-bool
-operator==(const RequestRecord& lhs, const RequestRecord& rhs)
-{
-  return std::tie(
-             lhs.start_time_, lhs.response_timestamps_, lhs.request_inputs_,
-             lhs.response_outputs_, lhs.sequence_end_, lhs.delayed_,
-             lhs.sequence_id_, lhs.has_null_last_response_) ==
-         std::tie(
-             rhs.start_time_, rhs.response_timestamps_, rhs.request_inputs_,
-             rhs.response_outputs_, rhs.sequence_end_, rhs.delayed_,
-             rhs.sequence_id_, rhs.has_null_last_response_);
-}
-
-}  // namespace
-
-class TestLoadManager : public TestLoadManagerBase, public LoadManager {
- public:
-  ~TestLoadManager() = default;
-  TestLoadManager(
-      PerfAnalyzerParameters params, bool is_sequence_model = false,
-      bool is_decoupled_model = false)
-      : TestLoadManagerBase(params, is_sequence_model, is_decoupled_model),
-        LoadManager(
-            params.async, params.streaming, params.batch_size,
-            params.max_threads, params.shared_memory_type,
-            params.output_shm_size, GetParser(), GetFactory(),
-            params.request_parameters)
-  {
-  }
-
-  std::vector<std::shared_ptr<ThreadStat>>& threads_stat_{
-      LoadManager::threads_stat_};
-
-  /// Test the public function CheckHealth
-  ///
-  /// It will return a bad result if any of the thread stats
-  /// have a bad status or cb_status
-  ///
-  void TestCheckHealth()
-  {
-    auto good = std::make_shared<ThreadStat>();
-    good->status_ = cb::Error::Success;
-    good->cb_status_ = cb::Error::Success;
-
-    auto bad_status = std::make_shared<ThreadStat>();
-    bad_status->status_ = cb::Error::Failure;
-    bad_status->cb_status_ = cb::Error::Success;
-
-    auto bad_cb_status = std::make_shared<ThreadStat>();
-    bad_cb_status->status_ = cb::Error::Success;
-    bad_cb_status->cb_status_ = cb::Error::Failure;
-
-    threads_stat_.clear();
-    bool expect_ok = true;
-
-    SUBCASE("Empty")
-    {
-      expect_ok = true;
-    }
-    SUBCASE("Good")
-    {
-      // Good entries: expect OK
-      threads_stat_.push_back(good);
-      threads_stat_.push_back(good);
-      expect_ok = true;
-    }
-    SUBCASE("BadStatus")
-    {
-      // Bad Status: expect not OK
-      threads_stat_.push_back(good);
-      threads_stat_.push_back(bad_status);
-      expect_ok = false;
-    }
-    SUBCASE("BadCbStatus")
-    {
-      // Bad cb_Status: expect not OK
-      threads_stat_.push_back(bad_cb_status);
-      threads_stat_.push_back(good);
-      expect_ok = false;
-    }
-    SUBCASE("BadBothStatus")
-    {
-      threads_stat_.push_back(bad_status);
-      threads_stat_.push_back(good);
-      threads_stat_.push_back(bad_cb_status);
-      expect_ok = false;
-    }
-
-    CHECK(CheckHealth().IsOk() == expect_ok);
-  }
-
-  /// Test the public function SwapRequestRecords
-  ///
-  /// It will gather all request records from the thread_stats
-  /// and return them, and clear the thread_stats request records
-  ///
-  void TestSwapRequestRecords()
-  {
-    using time_point = std::chrono::time_point<std::chrono::system_clock>;
-    using ns = std::chrono::nanoseconds;
-    auto request_record1 = RequestRecord(
-        time_point(ns(1)), std::vector<time_point>{time_point(ns(2))}, {}, {},
-        0, false, 0, false);
-    auto request_record2 = RequestRecord(
-        time_point(ns(3)), std::vector<time_point>{time_point(ns(4))}, {}, {},
-        0, false, 0, false);
-    auto request_record3 = RequestRecord(
-        time_point(ns(5)), std::vector<time_point>{time_point(ns(6))}, {}, {},
-        0, false, 0, false);
-
-    std::vector<RequestRecord> source_request_records;
-
-    SUBCASE("No threads")
-    {
-      auto ret = SwapRequestRecords(source_request_records);
-      CHECK(source_request_records.size() == 0);
-      CHECK(ret.IsOk() == true);
-    }
-    SUBCASE("Source has request records")
-    {
-      // Any request records in the vector passed in to SwapRequestRecords will
-      // be dropped on the floor
-      //
-      source_request_records.push_back(request_record1);
-      auto ret = SwapRequestRecords(source_request_records);
-      CHECK(source_request_records.size() == 0);
-      CHECK(ret.IsOk() == true);
-    }
-    SUBCASE("One thread")
-    {
-      auto stat1 = std::make_shared<ThreadStat>();
-      stat1->request_records_.push_back(request_record1);
-      stat1->request_records_.push_back(request_record2);
-      stat1->request_records_.push_back(request_record3);
-      threads_stat_.push_back(stat1);
-
-      CHECK(stat1->request_records_.size() == 3);
-      auto ret = SwapRequestRecords(source_request_records);
-      CHECK(stat1->request_records_.size() == 0);
-
-      REQUIRE(source_request_records.size() == 3);
-      CHECK(source_request_records[0] == request_record1);
-      CHECK(source_request_records[1] == request_record2);
-      CHECK(source_request_records[2] == request_record3);
-      CHECK(ret.IsOk() == true);
-    }
-    SUBCASE("Multiple threads")
-    {
-      auto stat1 = std::make_shared<ThreadStat>();
-      stat1->request_records_.push_back(request_record2);
-
-      auto stat2 = std::make_shared<ThreadStat>();
-      stat2->request_records_.push_back(request_record1);
-      stat2->request_records_.push_back(request_record3);
-
-      threads_stat_.push_back(stat1);
-      threads_stat_.push_back(stat2);
-
-      CHECK(stat1->request_records_.size() == 1);
-      CHECK(stat2->request_records_.size() == 2);
-      auto ret = SwapRequestRecords(source_request_records);
-      CHECK(stat1->request_records_.size() == 0);
-      CHECK(stat2->request_records_.size() == 0);
-
-      REQUIRE(source_request_records.size() == 3);
-      CHECK(source_request_records[0] == request_record2);
-      CHECK(source_request_records[1] == request_record1);
-      CHECK(source_request_records[2] == request_record3);
-      CHECK(ret.IsOk() == true);
-    }
-  }
-
-  /// Test the public function GetAccumulatedClientStat
-  ///
-  /// It will accumulate all contexts_stat data from all threads_stat
-  ///
-  void TestGetAccumulatedClientStat()
-  {
-    cb::InferStat result_stat;
-
-    SUBCASE("No threads")
-    {
-      auto ret = GetAccumulatedClientStat(&result_stat);
-      CHECK(result_stat.completed_request_count == 0);
-      CHECK(result_stat.cumulative_total_request_time_ns == 0);
-      CHECK(result_stat.cumulative_send_time_ns == 0);
-      CHECK(result_stat.cumulative_receive_time_ns == 0);
-      CHECK(ret.IsOk() == true);
-    }
-    SUBCASE("One thread one context stat")
-    {
-      auto stat1 = std::make_shared<ThreadStat>();
-      stat1->contexts_stat_.push_back(cb::InferStat());
-      stat1->contexts_stat_[0].completed_request_count = 2;
-      stat1->contexts_stat_[0].cumulative_total_request_time_ns = 3;
-      stat1->contexts_stat_[0].cumulative_send_time_ns = 4;
-      stat1->contexts_stat_[0].cumulative_receive_time_ns = 5;
-      threads_stat_.push_back(stat1);
-
-      auto ret = GetAccumulatedClientStat(&result_stat);
-      CHECK(result_stat.completed_request_count == 2);
-      CHECK(result_stat.cumulative_total_request_time_ns == 3);
-      CHECK(result_stat.cumulative_send_time_ns == 4);
-      CHECK(result_stat.cumulative_receive_time_ns == 5);
-      CHECK(ret.IsOk() == true);
-    }
-    SUBCASE("Multiple thread multiple contexts")
-    {
-      auto stat1 = std::make_shared<ThreadStat>();
-      stat1->contexts_stat_.push_back(cb::InferStat());
-      stat1->contexts_stat_.push_back(cb::InferStat());
-      stat1->contexts_stat_[0].completed_request_count = 2;
-      stat1->contexts_stat_[0].cumulative_total_request_time_ns = 3;
-      stat1->contexts_stat_[0].cumulative_send_time_ns = 4;
-      stat1->contexts_stat_[0].cumulative_receive_time_ns = 5;
-      stat1->contexts_stat_[1].completed_request_count = 3;
-      stat1->contexts_stat_[1].cumulative_total_request_time_ns = 4;
-      stat1->contexts_stat_[1].cumulative_send_time_ns = 5;
-      stat1->contexts_stat_[1].cumulative_receive_time_ns = 6;
-      threads_stat_.push_back(stat1);
-
-      auto stat2 = std::make_shared<ThreadStat>();
-      stat2->contexts_stat_.push_back(cb::InferStat());
-      stat2->contexts_stat_.push_back(cb::InferStat());
-      stat2->contexts_stat_[0].completed_request_count = 7;
-      stat2->contexts_stat_[0].cumulative_total_request_time_ns = 8;
-      stat2->contexts_stat_[0].cumulative_send_time_ns = 9;
-      stat2->contexts_stat_[0].cumulative_receive_time_ns = 10;
-      stat2->contexts_stat_[1].completed_request_count = 11;
-      stat2->contexts_stat_[1].cumulative_total_request_time_ns = 12;
-      stat2->contexts_stat_[1].cumulative_send_time_ns = 13;
-      stat2->contexts_stat_[1].cumulative_receive_time_ns = 14;
-      threads_stat_.push_back(stat2);
-
-      auto ret = GetAccumulatedClientStat(&result_stat);
-      // 2 + 3 + 7 + 11
-      //
-      CHECK(result_stat.completed_request_count == 23);
-      // 3 + 4 + 8 + 12
-      //
-      CHECK(result_stat.cumulative_total_request_time_ns == 27);
-      // 4 + 5 + 9 + 13
-      //
-      CHECK(result_stat.cumulative_send_time_ns == 31);
-      // 5 + 6 + 10 + 14
-      //
-      CHECK(result_stat.cumulative_receive_time_ns == 35);
-
-      CHECK(ret.IsOk() == true);
-    }
-  }
-
-  /// Test the public function CountCollectedRequests
-  ///
-  /// It will count all request records in the thread_stats (and not modify
-  /// the thread_stats in any way)
-  ///
-  void TestCountCollectedRequests()
-  {
-    using time_point = std::chrono::time_point<std::chrono::system_clock>;
-    using ns = std::chrono::nanoseconds;
-    auto request_record1 = RequestRecord(
-        time_point(ns(1)), std::vector<time_point>{time_point(ns(2))}, {}, {},
-        0, false, 0, false);
-    auto request_record2 = RequestRecord(
-        time_point(ns(3)), std::vector<time_point>{time_point(ns(4))}, {}, {},
-        0, false, 0, false);
-    auto request_record3 = RequestRecord(
-        time_point(ns(5)), std::vector<time_point>{time_point(ns(6))}, {}, {},
-        0, false, 0, false);
-
-    SUBCASE("No threads")
-    {
-      CHECK(CountCollectedRequests() == 0);
-    }
-    SUBCASE("One thread")
-    {
-      auto stat1 = std::make_shared<ThreadStat>();
-      stat1->request_records_.push_back(request_record1);
-      stat1->request_records_.push_back(request_record2);
-      stat1->request_records_.push_back(request_record3);
-      threads_stat_.push_back(stat1);
-
-      CHECK(stat1->request_records_.size() == 3);
-      CHECK(CountCollectedRequests() == 3);
-      CHECK(stat1->request_records_.size() == 3);
-    }
-    SUBCASE("Multiple threads")
-    {
-      auto stat1 = std::make_shared<ThreadStat>();
-      stat1->request_records_.push_back(request_record2);
-
-      auto stat2 = std::make_shared<ThreadStat>();
-      stat2->request_records_.push_back(request_record1);
-      stat2->request_records_.push_back(request_record3);
-
-      threads_stat_.push_back(stat1);
-      threads_stat_.push_back(stat2);
-
-      CHECK(stat1->request_records_.size() == 1);
-      CHECK(stat2->request_records_.size() == 2);
-      CHECK(CountCollectedRequests() == 3);
-      CHECK(stat1->request_records_.size() == 1);
-      CHECK(stat2->request_records_.size() == 2);
-    }
-  }
-
-  void TestIdle()
-  {
-    auto stat1 = std::make_shared<ThreadStat>();
-    auto stat2 = std::make_shared<ThreadStat>();
-    threads_stat_.push_back(stat1);
-    threads_stat_.push_back(stat2);
-
-    SUBCASE("All active")
-    {
-      // If multiple threads are active, their idle times are averaged
-      stat1->idle_timer.idle_ns_ = 5;
-      stat2->idle_timer.idle_ns_ = 7;
-      CHECK(GetIdleTime() == 6);
-      ResetIdleTime();
-      CHECK(GetIdleTime() == 0);
-    }
-
-    SUBCASE("One inactive")
-    {
-      // If a thread has no idle time, it is considered inactive and not
-      // factored in to the average
-      stat1->idle_timer.idle_ns_ = 0;
-      stat2->idle_timer.idle_ns_ = 7;
-      CHECK(GetIdleTime() == 7);
-      ResetIdleTime();
-      CHECK(GetIdleTime() == 0);
-    }
-  }
-};
-
-TEST_CASE("load_manager_check_health: Test the public function CheckHealth()")
-{
-  TestLoadManager tlm(PerfAnalyzerParameters{});
-  tlm.TestCheckHealth();
-}
-
-TEST_CASE(
-    "load_manager_swap_request_records: Test the public function "
-    "SwapRequestRecords()")
-{
-  TestLoadManager tlm(PerfAnalyzerParameters{});
-  tlm.TestSwapRequestRecords();
-}
-
-TEST_CASE(
-    "load_manager_get_accumulated_client_stat: Test the public function "
-    "GetAccumulatedClientStat()")
-{
-  TestLoadManager tlm(PerfAnalyzerParameters{});
-  tlm.TestGetAccumulatedClientStat();
-}
-
-TEST_CASE(
-    "load_manager_count_collected_requests: Test the public function "
-    "CountCollectedRequests()")
-{
-  TestLoadManager tlm(PerfAnalyzerParameters{});
-  tlm.TestCountCollectedRequests();
-}
-
-TEST_CASE("load_manager_batch_size: Test the public function BatchSize()")
-{
-  PerfAnalyzerParameters params;
-
-  SUBCASE("batch size 0")
-  {
-    params.batch_size = 0;
-  }
-  SUBCASE("batch size 1")
-  {
-    params.batch_size = 1;
-  }
-  SUBCASE("batch size 4")
-  {
-    params.batch_size = 4;
-  }
-
-  TestLoadManager tlm(params);
-  CHECK(tlm.BatchSize() == params.batch_size);
-}
-
-TEST_CASE("load_manager: Test public idle time functions")
-{
-  PerfAnalyzerParameters params;
-  TestLoadManager tlm(params);
-  tlm.TestIdle();
-}
-
-TEST_CASE(
-    "send_request_rate_load_manager: testing the GetAndResetNumSentRequests "
-    "function")
-{
-  PerfAnalyzerParameters params{};
-
-  TestLoadManager tlm(params);
-
-  std::shared_ptr<ThreadStat> thread_stat_1{std::make_shared<ThreadStat>()};
-  std::shared_ptr<ThreadStat> thread_stat_2{std::make_shared<ThreadStat>()};
-
-  std::chrono::steady_clock::time_point start_time{
-      std::chrono::steady_clock::time_point::min()};
-
-  thread_stat_1->num_sent_requests_ = 6;
-  thread_stat_2->num_sent_requests_ = 5;
-
-  tlm.threads_stat_ = {thread_stat_1, thread_stat_2};
-
-  const size_t result{tlm.GetAndResetNumSentRequests()};
-
-  CHECK(result == 11);
-  CHECK(tlm.threads_stat_.size() == 2);
-  CHECK(tlm.threads_stat_[0]->num_sent_requests_ == 0);
-  CHECK(tlm.threads_stat_[1]->num_sent_requests_ == 0);
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/test_load_manager_base.h b/src/c++/perf_analyzer/test_load_manager_base.h
deleted file mode 100644
index 6bbdf6d23..000000000
--- a/src/c++/perf_analyzer/test_load_manager_base.h
+++ /dev/null
@@ -1,305 +0,0 @@
-// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <algorithm>
-#include <memory>
-
-#include "command_line_parser.h"
-#include "doctest.h"
-#include "mock_client_backend.h"
-#include "mock_data_loader.h"
-#include "mock_model_parser.h"
-#include "sequence_manager.h"
-
-namespace cb = triton::perfanalyzer::clientbackend;
-
-namespace triton { namespace perfanalyzer {
-
-// Struct to hold the mock pieces to ingest custom json data
-struct MockInputPipeline {
-  MockInputPipeline(
-      std::shared_ptr<MockModelParser> mmp, std::shared_ptr<MockDataLoader> mdl)
-      : mock_model_parser_(mmp), mock_data_loader_(mdl)
-  {
-  }
-  std::shared_ptr<MockModelParser> mock_model_parser_;
-  std::shared_ptr<MockDataLoader> mock_data_loader_;
-};
-
-/// Helper base class to be inherited when testing any Load Manager class
-///
-class TestLoadManagerBase {
- public:
-  TestLoadManagerBase() = default;
-  TestLoadManagerBase(
-      PerfAnalyzerParameters params, bool is_sequence_model,
-      bool is_decoupled_model)
-      : params_(params)
-  {
-    stats_ = std::make_shared<cb::MockClientStats>();
-    factory_ = std::make_shared<cb::MockClientBackendFactory>(stats_);
-    parser_ = std::make_shared<MockModelParser>(
-        is_sequence_model, is_decoupled_model);
-  }
-
-  ~TestLoadManagerBase()
-  {
-    // Reset early_exit in case any test sets it to true during execution.
-    early_exit = false;
-  }
-
-  // Helper function to process custom json data in testing
-  // Creates a model tensor to pass to a mock parser which is consumed by the
-  // mock data loader
-  static MockInputPipeline ProcessCustomJsonData(
-      const std::string& json_str, const bool is_sequence_model = false)
-  {
-    std::shared_ptr<MockModelParser> mmp{
-        std::make_shared<MockModelParser>(is_sequence_model, false)};
-    ModelTensor model_tensor{};
-    model_tensor.datatype_ = "INT32";
-    model_tensor.is_optional_ = false;
-    model_tensor.is_shape_tensor_ = false;
-    model_tensor.name_ = "INPUT0";
-    model_tensor.shape_ = {1};
-    mmp->inputs_ = std::make_shared<ModelTensorMap>();
-    (*mmp->inputs_)[model_tensor.name_] = model_tensor;
-
-    std::shared_ptr<MockDataLoader> mdl{std::make_shared<MockDataLoader>()};
-    mdl->ReadDataFromStr(json_str, mmp->Inputs(), mmp->Outputs());
-    return MockInputPipeline{mmp, mdl};
-  }
-
-  // Set up all combinations of parameters for sequence testing
-  //
-  static PerfAnalyzerParameters GetSequenceTestParams()
-  {
-    PerfAnalyzerParameters params;
-    bool is_async;
-
-    SUBCASE("Async sequence")
-    {
-      is_async = true;
-      params = GetSequenceTestParamsHelper(is_async);
-    }
-    SUBCASE("Sync sequence")
-    {
-      is_async = false;
-      params = GetSequenceTestParamsHelper(is_async);
-    }
-    return params;
-  }
-
-  void CheckInferType()
-  {
-    auto stats = GetStats();
-
-    if (params_.async) {
-      if (params_.streaming) {
-        CHECK(stats->num_infer_calls == 0);
-        CHECK(stats->num_async_infer_calls == 0);
-        CHECK(stats->num_async_stream_infer_calls > 0);
-        CHECK(stats->num_start_stream_calls > 0);
-      } else {
-        CHECK(stats->num_infer_calls == 0);
-        CHECK(stats->num_async_infer_calls > 0);
-        CHECK(stats->num_async_stream_infer_calls == 0);
-        CHECK(stats->num_start_stream_calls == 0);
-      }
-    } else {
-      if (params_.streaming) {
-        CHECK(stats->num_infer_calls > 0);
-        CHECK(stats->num_async_infer_calls == 0);
-        CHECK(stats->num_async_stream_infer_calls == 0);
-        CHECK(stats->num_start_stream_calls > 0);
-      } else {
-        CHECK(stats->num_infer_calls > 0);
-        CHECK(stats->num_async_infer_calls == 0);
-        CHECK(stats->num_async_stream_infer_calls == 0);
-        CHECK(stats->num_start_stream_calls == 0);
-      }
-    }
-  }
-
-
-  void CheckSharedMemory(
-      const cb::MockClientStats::SharedMemoryStats& expected_stats)
-  {
-    auto actual_stats = GetStats();
-    CHECK(expected_stats == actual_stats->memory_stats);
-  }
-
-  void CheckSequences(uint64_t expected_num_seq)
-  {
-    auto stats = GetStats();
-
-    // Make sure no live sequences remain
-    CHECK(stats->sequence_status.live_seq_ids_to_length.size() == 0);
-
-    // Make sure all seq IDs are within range
-    //
-    for (auto seq_id : stats->sequence_status.used_seq_ids) {
-      CHECK(seq_id >= params_.start_sequence_id);
-      CHECK(seq_id <= params_.start_sequence_id + params_.sequence_id_range);
-    }
-
-    // Make sure that we had the correct number of concurrently live sequences
-    //
-    // If the sequence length is only 1 then there is nothing to check because
-    // there are never any overlapping requests -- they always immediately exit
-    //
-    if (params_.sequence_length != 1) {
-      expected_num_seq = std::min(expected_num_seq, params_.sequence_id_range);
-      CHECK(expected_num_seq == stats->sequence_status.max_live_seq_count);
-    }
-
-    // Make sure that the length of each sequence is as expected
-    //
-    // All but X of them should be within 20% (The code explicitly has a 20%
-    // slop) of the requested sequence length, where X is the number of
-    // sequences (This is due to the shutdown of sequences at the end that will
-    // create shorter than expected sequences)
-    //
-    auto num_values = stats->sequence_status.seq_lengths.size();
-    auto max_len = params_.sequence_length * 1.2;
-    auto min_len = params_.sequence_length * 0.8;
-    auto num_allowed_to_be_below_min_len = expected_num_seq;
-    auto num_below_min_len = 0;
-
-    for (size_t i = 0; i < num_values; i++) {
-      auto len = stats->sequence_status.seq_lengths[i];
-
-      CHECK(len <= max_len);
-      if (len < min_len) {
-        num_below_min_len++;
-      }
-    }
-    CHECK(num_below_min_len <= num_allowed_to_be_below_min_len);
-  }
-
-  std::shared_ptr<cb::MockClientStats> stats_;
-
- protected:
-  PerfAnalyzerParameters params_;
-  std::shared_ptr<cb::ClientBackendFactory> factory_;
-  std::shared_ptr<ModelParser> parser_;
-
-  const std::shared_ptr<ModelParser>& GetParser() { return parser_; }
-  const std::shared_ptr<cb::ClientBackendFactory>& GetFactory()
-  {
-    return factory_;
-  }
-  std::shared_ptr<cb::MockClientStats> GetStats() { return stats_; }
-  void ResetStats() { stats_->Reset(); }
-
-  // Verifies that the number of inferences for each sequence is n or n+1.
-  //
-  void CheckSequenceBalance()
-  {
-    auto first_value = -1;
-    auto second_value = -1;
-
-    for (auto seq : stats_->sequence_status.seq_ids_to_count) {
-      auto count = seq.second;
-      // set first possible value for seqs
-      if (first_value == -1) {
-        first_value = count;
-        continue;
-      }
-      // set second possible value for seqs count
-      if (second_value == -1) {
-        if (count == first_value + 1 || count == first_value - 1) {
-          second_value = count;
-          continue;
-        } else if (first_value == count) {
-          continue;
-        }
-      }
-
-      if (count != first_value || count != second_value) {
-        std::stringstream os;
-        os << "Sequence request counts were not balanced: ";
-        for (auto x : stats_->sequence_status.seq_ids_to_count) {
-          os << x.second << ",";
-        }
-        CHECK_MESSAGE(
-            (count == first_value || count == second_value), os.str());
-        break;
-      }
-    }
-  }
-
-  static PerfAnalyzerParameters GetSequenceTestParamsHelper(bool is_async)
-  {
-    PerfAnalyzerParameters params;
-
-    params.async = is_async;
-
-    // Generally we want short sequences for testing
-    // so we can hit the corner cases more often
-    //
-    params.sequence_length = 4;
-    params.max_concurrency = 8;
-    params.max_threads = 8;
-
-    SUBCASE("Normal") {}
-    SUBCASE("sequence IDs test 1")
-    {
-      params.start_sequence_id = 1;
-      params.sequence_id_range = 3;
-    }
-    SUBCASE("sequence IDs test 2")
-    {
-      params.start_sequence_id = 17;
-      params.sequence_id_range = 8;
-    }
-    SUBCASE("num_of_sequences 1")
-    {
-      params.num_of_sequences = 1;
-    }
-    SUBCASE("less threads than seq")
-    {
-      params.num_of_sequences = 12;
-    }
-    SUBCASE("num_of_sequences 8")
-    {
-      params.num_of_sequences = 8;
-      // Make sequences long so we actually get 8 in flight at a time
-      params.sequence_length = 20;
-    }
-    SUBCASE("sequence_length 1")
-    {
-      params.sequence_length = 1;
-    }
-    SUBCASE("sequence_length 10")
-    {
-      params.sequence_length = 10;
-    }
-    return params;
-  }
-};
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/test_metrics_manager.cc b/src/c++/perf_analyzer/test_metrics_manager.cc
deleted file mode 100644
index b6fb1eb7b..000000000
--- a/src/c++/perf_analyzer/test_metrics_manager.cc
+++ /dev/null
@@ -1,137 +0,0 @@
-// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <iostream>
-#include <sstream>
-#include <streambuf>
-
-#include "doctest.h"
-#include "metrics_manager.h"
-
-namespace triton { namespace perfanalyzer {
-
-class TestMetricsManager : public MetricsManager {
- public:
-  void CheckForMissingMetrics(const Metrics& metrics)
-  {
-    MetricsManager::CheckForMissingMetrics(metrics);
-  }
-
-  void CheckForMetricIntervalTooShort(
-      const std::chrono::nanoseconds& remainder,
-      const std::chrono::nanoseconds& duration)
-  {
-    MetricsManager::CheckForMetricIntervalTooShort(remainder, duration);
-  }
-
-  uint64_t& metrics_interval_ms_{MetricsManager::metrics_interval_ms_};
-};
-
-TEST_CASE("testing the CheckForMissingMetrics function")
-{
-  TestMetricsManager tmm{};
-  Metrics metrics{};
-  std::stringstream captured_cerr;
-  std::streambuf* old_cerr{std::cerr.rdbuf(captured_cerr.rdbuf())};
-
-  // check that no warning gets printed when all metrics are present
-  metrics.gpu_utilization_per_gpu["gpu0"] = 0.5;
-  metrics.gpu_power_usage_per_gpu["gpu0"] = 50.0;
-  metrics.gpu_memory_used_bytes_per_gpu["gpu0"] = 1000;
-  metrics.gpu_memory_total_bytes_per_gpu["gpu0"] = 10000;
-  tmm.CheckForMissingMetrics(metrics);
-  CHECK(captured_cerr.str() == "");
-
-  // check that still no warning gets printed on a subsequent call
-  tmm.CheckForMissingMetrics(metrics);
-  CHECK(captured_cerr.str() == "");
-
-  // check that warning gets printed when missing metrics
-  metrics.gpu_utilization_per_gpu.clear();
-  metrics.gpu_power_usage_per_gpu.clear();
-  metrics.gpu_memory_used_bytes_per_gpu.clear();
-  metrics.gpu_memory_total_bytes_per_gpu.clear();
-  tmm.CheckForMissingMetrics(metrics);
-  CHECK(
-      captured_cerr.str() ==
-      "WARNING: Unable to parse 'nv_gpu_utilization' metric.\n"
-      "WARNING: Unable to parse 'nv_gpu_power_usage' metric.\n"
-      "WARNING: Unable to parse 'nv_gpu_memory_used_bytes' metric.\n"
-      "WARNING: Unable to parse 'nv_gpu_memory_total_bytes' metric.\n");
-
-  // check that no additional warning gets printed on a subsequent call
-  tmm.CheckForMissingMetrics(metrics);
-  CHECK(
-      captured_cerr.str() ==
-      "WARNING: Unable to parse 'nv_gpu_utilization' metric.\n"
-      "WARNING: Unable to parse 'nv_gpu_power_usage' metric.\n"
-      "WARNING: Unable to parse 'nv_gpu_memory_used_bytes' metric.\n"
-      "WARNING: Unable to parse 'nv_gpu_memory_total_bytes' metric.\n");
-
-  std::cerr.rdbuf(old_cerr);
-}
-
-TEST_CASE("testing the CheckForMetricIntervalTooShort function")
-{
-  TestMetricsManager tmm{};
-  tmm.metrics_interval_ms_ = 5;
-  std::chrono::nanoseconds remainder{};
-  std::chrono::nanoseconds duration{};
-  std::stringstream captured_cerr;
-  std::streambuf* old_cerr{std::cerr.rdbuf(captured_cerr.rdbuf())};
-
-  // check that no warning gets printed when interval is long enough
-  remainder = std::chrono::nanoseconds(2000000);
-  duration = std::chrono::nanoseconds(3000000);
-  tmm.CheckForMetricIntervalTooShort(remainder, duration);
-  CHECK(captured_cerr.str() == "");
-
-  // check that still no warning gets printed on a subsequent call
-  tmm.CheckForMetricIntervalTooShort(remainder, duration);
-  CHECK(captured_cerr.str() == "");
-
-  // check that warning gets printed when interval is too short
-  remainder = std::chrono::nanoseconds(-2000000);
-  duration = std::chrono::nanoseconds(7000000);
-  tmm.CheckForMetricIntervalTooShort(remainder, duration);
-  CHECK(
-      captured_cerr.str() ==
-      "WARNING: Triton metrics endpoint latency (7ms) is larger than the "
-      "querying interval (5ms). Please try a larger querying interval via "
-      "`--triton-metrics-interval`.\n");
-
-  // check that no additional warning gets printed on a subsequent call
-  tmm.CheckForMetricIntervalTooShort(remainder, duration);
-  CHECK(
-      captured_cerr.str() ==
-      "WARNING: Triton metrics endpoint latency (7ms) is larger than the "
-      "querying interval (5ms). Please try a larger querying interval via "
-      "`--triton-metrics-interval`.\n");
-
-  std::cerr.rdbuf(old_cerr);
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/test_model_parser.cc b/src/c++/perf_analyzer/test_model_parser.cc
deleted file mode 100644
index dabf8c9e2..000000000
--- a/src/c++/perf_analyzer/test_model_parser.cc
+++ /dev/null
@@ -1,365 +0,0 @@
-// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <rapidjson/document.h>
-
-#include <cstdint>
-
-#include "client_backend/client_backend.h"
-#include "constants.h"
-#include "doctest.h"
-#include "mock_client_backend.h"
-#include "mock_model_parser.h"
-
-namespace cb = triton::perfanalyzer::clientbackend;
-
-namespace triton { namespace perfanalyzer {
-
-class TestModelParser {
- public:
-  constexpr static const char* no_batching =
-      R"({ "name": "NoBatchingModel", "platform":"not_ensemble" })";
-
-  constexpr static const char* seq_batching =
-      R"({ "name": "SeqBatchingModel", "platform":"not_ensemble", "sequence_batching":{} })";
-
-  constexpr static const char* dyn_batching =
-      R"({ "name": "DynBatchingModel", "platform":"not_ensemble", "dynamic_batching":{} })";
-
-  constexpr static const char* ensemble = R"({
-    "name": "EnsembleModel",
-    "platform": "ensemble",
-    "ensemble_scheduling": {
-      "step": [{
-          "model_name": "ModelA",
-          "model_version": 2
-        },
-        {
-          "model_name": "ModelB",
-          "model_version": -1
-        }
-      ]
-    }
-  })";
-
-  constexpr static const char* nested_ensemble = R"({
-    "name": "ModelA",
-    "platform": "ensemble",
-    "ensemble_scheduling": {
-      "step": [{
-          "model_name": "ModelC",
-          "model_version": -1
-        },
-        {
-          "model_name": "ModelD",
-          "model_version": -1
-        }
-      ]
-    }
-  })";
-
-  static cb::Error SetJsonPtrNoSeq(rapidjson::Document* model_config)
-  {
-    model_config->Parse(no_batching);
-    return cb::Error::Success;
-  };
-
-  static cb::Error SetJsonPtrYesSeq(rapidjson::Document* model_config)
-  {
-    model_config->Parse(seq_batching);
-    return cb::Error::Success;
-  };
-
-  static cb::Error SetJsonPtrNestedEnsemble(rapidjson::Document* model_config)
-  {
-    model_config->Parse(nested_ensemble);
-    return cb::Error::Success;
-  };
-};
-
-TEST_CASE("ModelParser: testing the GetInt function")
-{
-  int64_t integer_value{0};
-  MockModelParser mmp;
-
-  SUBCASE("valid string")
-  {
-    rapidjson::Value value("100");
-    cb::Error result{mmp.GetInt(value, &integer_value)};
-    CHECK(result.Err() == SUCCESS);
-    CHECK(integer_value == 100);
-  }
-
-  SUBCASE("invalid string, alphabet")
-  {
-    rapidjson::Value value("abc");
-    cb::Error result{mmp.GetInt(value, &integer_value)};
-    CHECK(result.Err() == GENERIC_ERROR);
-    CHECK(result.Message() == "unable to convert 'abc' to integer");
-    CHECK(integer_value == 0);
-  }
-
-  SUBCASE("invalid string, number out of range")
-  {
-    rapidjson::Value value("9223372036854775808");
-    cb::Error result{mmp.GetInt(value, &integer_value)};
-    CHECK(result.Err() == GENERIC_ERROR);
-    CHECK(
-        result.Message() ==
-        "unable to convert '9223372036854775808' to integer");
-    CHECK(integer_value == 0);
-  }
-
-  SUBCASE("valid int, lowest Int64")
-  {
-    rapidjson::Value value(2147483648);
-    cb::Error result{mmp.GetInt(value, &integer_value)};
-    CHECK(result.Err() == SUCCESS);
-    CHECK(integer_value == 2147483648);
-  }
-
-  SUBCASE("valid int, highest Int32")
-  {
-    rapidjson::Value value(2147483647);
-    cb::Error result{mmp.GetInt(value, &integer_value)};
-    CHECK(result.Err() == SUCCESS);
-    CHECK(integer_value == 2147483647);
-  }
-
-  SUBCASE("invalid floating point")
-  {
-    rapidjson::Value value(100.1);
-    cb::Error result{mmp.GetInt(value, &integer_value)};
-    CHECK(result.Err() == GENERIC_ERROR);
-    CHECK(result.Message() == "failed to parse the integer value");
-    CHECK(integer_value == 0);
-  }
-}
-
-TEST_CASE(
-    "ModelParser: DetermineComposingModelMap" *
-    doctest::description(
-        "This test confirms that the composing model map will be correctly "
-        "populated by DetermineComposingModelMap()"))
-{
-  std::shared_ptr<cb::MockClientStats> stats =
-      std::make_shared<cb::MockClientStats>();
-  std::unique_ptr<cb::MockClientBackend> mock_backend =
-      std::make_unique<cb::MockClientBackend>(stats);
-
-  rapidjson::Document config;
-  std::vector<cb::ModelIdentifier> input_bls_composing_models;
-  ComposingModelMap expected_composing_model_map;
-
-  std::string parent_model_name;
-
-
-  const auto& ParameterizeListedComposingModels{[&]() {
-    SUBCASE("No listed composing models") {}
-    SUBCASE("Yes listed composing models")
-    {
-      input_bls_composing_models.push_back({"ListedModelA", ""});
-      input_bls_composing_models.push_back({"ListedModelB", ""});
-      expected_composing_model_map[parent_model_name].emplace(
-          "ListedModelA", "");
-      expected_composing_model_map[parent_model_name].emplace(
-          "ListedModelB", "");
-    }
-    EXPECT_CALL(*mock_backend, ModelConfig(testing::_, testing::_, testing::_))
-        .WillRepeatedly(testing::WithArg<0>(TestModelParser::SetJsonPtrNoSeq));
-  }};
-
-  SUBCASE("No Ensemble")
-  {
-    config.Parse(TestModelParser::no_batching);
-    parent_model_name = "NoBatchingModel";
-    ParameterizeListedComposingModels();
-  }
-  SUBCASE("Ensemble")
-  {
-    config.Parse(TestModelParser::ensemble);
-    parent_model_name = "EnsembleModel";
-    ParameterizeListedComposingModels();
-
-    expected_composing_model_map["EnsembleModel"].emplace("ModelA", "2");
-    expected_composing_model_map["EnsembleModel"].emplace("ModelB", "");
-    EXPECT_CALL(*mock_backend, ModelConfig(testing::_, testing::_, testing::_))
-        .WillRepeatedly(testing::WithArg<0>(TestModelParser::SetJsonPtrNoSeq));
-  }
-  SUBCASE("Nested Ensemble")
-  {
-    config.Parse(TestModelParser::ensemble);
-    parent_model_name = "EnsembleModel";
-    ParameterizeListedComposingModels();
-
-    expected_composing_model_map["EnsembleModel"].emplace("ModelA", "2");
-    expected_composing_model_map["EnsembleModel"].emplace("ModelB", "");
-    expected_composing_model_map["ModelA"].emplace("ModelC", "");
-    expected_composing_model_map["ModelA"].emplace("ModelD", "");
-    EXPECT_CALL(*mock_backend, ModelConfig(testing::_, testing::_, testing::_))
-        .WillOnce(
-            testing::WithArg<0>(TestModelParser::SetJsonPtrNestedEnsemble))
-        .WillRepeatedly(testing::WithArg<0>(TestModelParser::SetJsonPtrNoSeq));
-  }
-  SUBCASE("BLS with an Ensemble")
-  {
-    config.Parse(TestModelParser::no_batching);
-    parent_model_name = "NoBatchingModel";
-
-    input_bls_composing_models.push_back({"ModelA", ""});
-    input_bls_composing_models.push_back({"ModelB", ""});
-
-    expected_composing_model_map[parent_model_name].emplace("ModelA", "");
-    expected_composing_model_map[parent_model_name].emplace("ModelB", "");
-    expected_composing_model_map["ModelA"].emplace("ModelC", "");
-    expected_composing_model_map["ModelA"].emplace("ModelD", "");
-    EXPECT_CALL(*mock_backend, ModelConfig(testing::_, testing::_, testing::_))
-        .WillOnce(
-            testing::WithArg<0>(TestModelParser::SetJsonPtrNestedEnsemble))
-        .WillRepeatedly(testing::WithArg<0>(TestModelParser::SetJsonPtrNoSeq));
-  }
-
-  std::unique_ptr<cb::ClientBackend> backend = std::move(mock_backend);
-
-  MockModelParser mmp;
-
-  mmp.DetermineComposingModelMap(input_bls_composing_models, config, backend);
-
-  auto actual_composing_model_map = *mmp.GetComposingModelMap().get();
-  CHECK(actual_composing_model_map == expected_composing_model_map);
-
-  // Destruct gmock objects to determine gmock-related test failure
-  backend.reset();
-}
-
-TEST_CASE(
-    "ModelParser: determining scheduler type" *
-    doctest::description("This test confirms that scheduler_type_ will be set "
-                         "correctly by DetermineSchedulerType()"))
-{
-  std::shared_ptr<cb::MockClientStats> stats =
-      std::make_shared<cb::MockClientStats>();
-  std::unique_ptr<cb::MockClientBackend> mock_backend =
-      std::make_unique<cb::MockClientBackend>(stats);
-
-
-  rapidjson::Document config;
-  ModelParser::ModelSchedulerType expected_type;
-
-  ComposingModelMap input_composing_model_map;
-
-
-  SUBCASE("No batching")
-  {
-    config.Parse(TestModelParser::no_batching);
-    expected_type = ModelParser::ModelSchedulerType::NONE;
-  }
-  SUBCASE("Sequence batching")
-  {
-    config.Parse(TestModelParser::seq_batching);
-    expected_type = ModelParser::ModelSchedulerType::SEQUENCE;
-  }
-  SUBCASE("Dynamic batching")
-  {
-    config.Parse(TestModelParser::dyn_batching);
-    expected_type = ModelParser::ModelSchedulerType::DYNAMIC;
-  }
-  SUBCASE("Ensemble")
-  {
-    config.Parse(TestModelParser::ensemble);
-
-    input_composing_model_map["EnsembleModel"].emplace("ModelA", "2");
-    input_composing_model_map["EnsembleModel"].emplace("ModelB", "");
-
-    SUBCASE("no sequences")
-    {
-      EXPECT_CALL(
-          *mock_backend, ModelConfig(testing::_, testing::_, testing::_))
-          .WillOnce(testing::WithArg<0>(TestModelParser::SetJsonPtrNoSeq))
-          .WillOnce(testing::WithArg<0>(TestModelParser::SetJsonPtrNoSeq));
-
-      expected_type = ModelParser::ModelSchedulerType::ENSEMBLE;
-    }
-    SUBCASE("yes sequences")
-    {
-      EXPECT_CALL(
-          *mock_backend, ModelConfig(testing::_, testing::_, testing::_))
-          .WillOnce(testing::WithArg<0>(TestModelParser::SetJsonPtrNoSeq))
-          .WillOnce(testing::WithArg<0>(TestModelParser::SetJsonPtrYesSeq));
-
-      expected_type = ModelParser::ModelSchedulerType::ENSEMBLE_SEQUENCE;
-    }
-  }
-  SUBCASE("Nested Ensemble")
-  {
-    config.Parse(TestModelParser::ensemble);
-
-    input_composing_model_map["EnsembleModel"].emplace("ModelA", "2");
-    input_composing_model_map["EnsembleModel"].emplace("ModelB", "");
-    input_composing_model_map["ModelA"].emplace("ModelC", "");
-    input_composing_model_map["ModelA"].emplace("ModelD", "");
-
-    SUBCASE("no sequences")
-    {
-      EXPECT_CALL(
-          *mock_backend, ModelConfig(testing::_, testing::_, testing::_))
-          .WillOnce(
-              testing::WithArg<0>(TestModelParser::SetJsonPtrNestedEnsemble))
-          .WillOnce(testing::WithArg<0>(TestModelParser::SetJsonPtrNoSeq))
-          .WillOnce(testing::WithArg<0>(TestModelParser::SetJsonPtrNoSeq))
-          .WillOnce(testing::WithArg<0>(TestModelParser::SetJsonPtrNoSeq));
-
-      expected_type = ModelParser::ModelSchedulerType::ENSEMBLE;
-    }
-    SUBCASE("yes sequences")
-    {
-      EXPECT_CALL(
-          *mock_backend, ModelConfig(testing::_, testing::_, testing::_))
-          .WillOnce(
-              testing::WithArg<0>(TestModelParser::SetJsonPtrNestedEnsemble))
-          .WillOnce(testing::WithArg<0>(TestModelParser::SetJsonPtrNoSeq))
-          .WillOnce(testing::WithArg<0>(TestModelParser::SetJsonPtrYesSeq))
-          .WillOnce(testing::WithArg<0>(TestModelParser::SetJsonPtrNoSeq));
-
-      expected_type = ModelParser::ModelSchedulerType::ENSEMBLE_SEQUENCE;
-    }
-  }
-
-  std::unique_ptr<cb::ClientBackend> backend = std::move(mock_backend);
-
-  MockModelParser mmp;
-  mmp.composing_models_map_ =
-      std::make_shared<ComposingModelMap>(input_composing_model_map);
-  mmp.DetermineSchedulerType(config, backend);
-
-  auto actual_type = mmp.SchedulerType();
-  CHECK(actual_type == expected_type);
-
-  // Destruct gmock objects to determine gmock-related test failure
-  backend.reset();
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/test_perf_utils.cc b/src/c++/perf_analyzer/test_perf_utils.cc
deleted file mode 100644
index 74bf6afb4..000000000
--- a/src/c++/perf_analyzer/test_perf_utils.cc
+++ /dev/null
@@ -1,375 +0,0 @@
-// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <unistd.h>
-
-#include <cstdio>
-#include <fstream>
-
-#include "doctest.h"
-#include "perf_utils.h"
-#include "test_utils.h"
-
-namespace triton { namespace perfanalyzer {
-
-/// Helper class to test perf_utils.cc
-///
-class TestPerfUtils {
- public:
-  /// Given a distributionType and request rate, confirm that request pattern
-  /// matches what is expected.
-  ///
-  static void TestDistribution(
-      Distribution distribution_type, uint32_t request_rate)
-  {
-    std::mt19937 schedule_rng;
-    std::vector<int64_t> delays;
-
-    double avg, variance;
-    double expected_avg, expected_variance;
-
-    auto dist_func = GetDistributionFunction(distribution_type, request_rate);
-
-    for (int i = 0; i < 100000; i++) {
-      auto delay = dist_func(schedule_rng);
-      delays.push_back(delay.count());
-    }
-
-    avg = CalculateAverage(delays);
-    variance = CalculateVariance(delays, avg);
-
-    std::chrono::nanoseconds ns_in_one_second =
-        std::chrono::duration_cast<std::chrono::nanoseconds>(
-            std::chrono::seconds(1));
-    expected_avg = ns_in_one_second.count() / request_rate;
-
-    if (distribution_type == CONSTANT) {
-      expected_variance = 0;
-    } else {
-      // By definition, variance = mean for poisson
-      expected_variance = expected_avg;
-    }
-
-    CHECK(avg == doctest::Approx(expected_avg).epsilon(0.005));
-    CHECK(variance == doctest::Approx(expected_variance).epsilon(0.005));
-  }
-
-
- private:
-  static std::function<std::chrono::nanoseconds(std::mt19937&)>
-  GetDistributionFunction(Distribution type, uint32_t request_rate)
-  {
-    std::function<std::chrono::nanoseconds(std::mt19937&)> distributionFunction;
-
-    if (type == CONSTANT) {
-      distributionFunction = ScheduleDistribution<CONSTANT>(request_rate);
-    } else if (type == POISSON) {
-      distributionFunction = ScheduleDistribution<POISSON>(request_rate);
-    } else {
-      throw std::invalid_argument("Unexpected distribution type");
-    }
-    return distributionFunction;
-  }
-};
-
-/// Test all distributions across various request rates
-///
-TEST_CASE("perf_utils: TestDistribution")
-{
-  std::vector<Distribution> distTypes{CONSTANT, POISSON};
-  std::vector<uint32_t> requestRates{10, 100, 1000, 10000};
-
-  for (auto dist : distTypes) {
-    for (auto rate : requestRates) {
-      TestPerfUtils::TestDistribution(dist, rate);
-    }
-  }
-}
-
-TEST_CASE("perf_utils: ParseTensorFormat")
-{
-  CHECK(ParseTensorFormat("binary") == cb::TensorFormat::BINARY);
-  CHECK(ParseTensorFormat("BINARY") == cb::TensorFormat::BINARY);
-  CHECK(ParseTensorFormat("json") == cb::TensorFormat::JSON);
-  CHECK(ParseTensorFormat("JSON") == cb::TensorFormat::JSON);
-  CHECK(ParseTensorFormat("abc") == cb::TensorFormat::UNKNOWN);
-  CHECK(ParseTensorFormat("") == cb::TensorFormat::UNKNOWN);
-}
-
-TEST_CASE("perf_utils: ParseProtocol")
-{
-  CHECK(ParseProtocol("HTTP") == cb::ProtocolType::HTTP);
-  CHECK(ParseProtocol("http") == cb::ProtocolType::HTTP);
-  CHECK(ParseProtocol("GRPC") == cb::ProtocolType::GRPC);
-  CHECK(ParseProtocol("grpc") == cb::ProtocolType::GRPC);
-  CHECK(ParseProtocol("hhtp") == cb::ProtocolType::UNKNOWN);
-  CHECK(ParseProtocol("") == cb::ProtocolType::UNKNOWN);
-  CHECK(ParseProtocol("http2") == cb::ProtocolType::UNKNOWN);
-}
-
-TEST_CASE("perf_utils: ConvertDTypeFromTFS")
-{
-  std::string datatype;
-  cb::Error status;
-
-  SUBCASE("Check for correct conversion")
-  {
-    std::vector<std::pair<std::string, std::string>> tf_to_datatype{
-        std::make_pair("DT_HALF", "FP16"),
-        std::make_pair("DT_BFLOAT16", "BF16"),
-        std::make_pair("DT_FLOAT", "FP32"),
-        std::make_pair("DT_DOUBLE", "FP64"),
-        std::make_pair("DT_INT32", "INT32"),
-        std::make_pair("DT_INT16", "INT16"),
-        std::make_pair("DT_UINT16", "UINT16"),
-        std::make_pair("DT_INT8", "INT8"),
-        std::make_pair("DT_UINT8", "UINT8"),
-        std::make_pair("DT_STRING", "BYTES"),
-        std::make_pair("DT_INT64", "INT64"),
-        std::make_pair("DT_BOOL", "BOOL"),
-        std::make_pair("DT_UINT32", "UINT32"),
-        std::make_pair("DT_UINT64", "UINT64")};
-
-    for (const auto& type_pair : tf_to_datatype) {
-      status = ConvertDTypeFromTFS(type_pair.first, &datatype);
-      CHECK(status.IsOk());
-      CHECK(datatype == type_pair.second);
-    }
-  }
-
-  SUBCASE("Invalid tensorflow datatype")
-  {
-    status = ConvertDTypeFromTFS("dt_bool", &datatype);
-    CHECK(!status.IsOk());
-    CHECK(datatype == "");
-
-    status = ConvertDTypeFromTFS("dt_uint8", &datatype);
-    CHECK(!status.IsOk());
-    CHECK(datatype == "");
-
-    status = ConvertDTypeFromTFS("abcdef", &datatype);
-    CHECK(!status.IsOk());
-    CHECK(datatype == "");
-
-    status = ConvertDTypeFromTFS("", &datatype);
-    CHECK(!status.IsOk());
-    CHECK(datatype == "");
-  }
-}
-
-TEST_CASE("perf_utils: IsDirectory")
-{
-  // Create a temporary directory /tmp/abcdef1234
-  int status;
-  std::string temp_path{"/tmp/abcdef1234"};
-
-  CHECK(!IsDirectory(temp_path));
-
-  status = mkdir(temp_path.c_str(), S_IRWXU | S_IROTH | S_IXOTH);
-  REQUIRE(status == 0);
-  CHECK(IsDirectory(temp_path));
-
-  status = rmdir(temp_path.c_str());
-  REQUIRE(status == 0);
-  CHECK(!IsDirectory(temp_path));
-}
-
-TEST_CASE("perf_utils: IsFile")
-{
-  // Create a temporary file /tmp/test.txt
-  int status;
-  std::string temp_path{"/tmp/test.txt"};
-
-  CHECK(!IsFile(temp_path));
-
-  std::ofstream file(temp_path);
-  CHECK(IsFile(temp_path));
-
-  std::remove(temp_path.c_str());
-  CHECK(!IsFile(temp_path));
-}
-
-TEST_CASE("perf_utils: ByteSize")
-{
-  std::vector<int64_t> shape{3, 4, 5};
-  constexpr int num_elements = 3 * 4 * 5;
-
-  SUBCASE("Single byte elements")
-  {
-    CHECK(ByteSize(shape, "BOOL") == 1 * num_elements);
-    CHECK(ByteSize(shape, "INT8") == 1 * num_elements);
-    CHECK(ByteSize(shape, "UINT8") == 1 * num_elements);
-  }
-
-  SUBCASE("2 byte elements")
-  {
-    CHECK(ByteSize(shape, "INT16") == 2 * num_elements);
-    CHECK(ByteSize(shape, "UINT16") == 2 * num_elements);
-    CHECK(ByteSize(shape, "FP16") == 2 * num_elements);
-    CHECK(ByteSize(shape, "BF16") == 2 * num_elements);
-  }
-
-  SUBCASE("4 byte elements")
-  {
-    CHECK(ByteSize(shape, "INT32") == 4 * num_elements);
-    CHECK(ByteSize(shape, "UINT32") == 4 * num_elements);
-    CHECK(ByteSize(shape, "FP32") == 4 * num_elements);
-  }
-
-  SUBCASE("8 byte elements")
-  {
-    CHECK(ByteSize(shape, "INT64") == 8 * num_elements);
-    CHECK(ByteSize(shape, "UINT64") == 8 * num_elements);
-    CHECK(ByteSize(shape, "FP64") == 8 * num_elements);
-  }
-
-  SUBCASE("Dynamic shape tensor")
-  {
-    shape.insert(shape.begin(), -1);
-
-    CHECK(ByteSize(shape, "BOOL") == -1);
-    CHECK(ByteSize(shape, "INT8") == -1);
-    CHECK(ByteSize(shape, "UINT8") == -1);
-
-    CHECK(ByteSize(shape, "INT16") == -1);
-    CHECK(ByteSize(shape, "UINT16") == -1);
-    CHECK(ByteSize(shape, "FP16") == -1);
-    CHECK(ByteSize(shape, "BF16") == -1);
-
-    CHECK(ByteSize(shape, "INT32") == -1);
-    CHECK(ByteSize(shape, "UINT32") == -1);
-    CHECK(ByteSize(shape, "FP32") == -1);
-
-    CHECK(ByteSize(shape, "INT64") == -1);
-    CHECK(ByteSize(shape, "UINT64") == -1);
-    CHECK(ByteSize(shape, "FP64") == -1);
-  }
-
-  SUBCASE("Unknown data types")
-  {
-    CHECK(ByteSize(shape, "bool") == -1);
-    CHECK(ByteSize(shape, "int8") == -1);
-    CHECK(ByteSize(shape, "uint8") == -1);
-
-    CHECK(ByteSize(shape, "int16") == -1);
-    CHECK(ByteSize(shape, "uint16") == -1);
-    CHECK(ByteSize(shape, "fp16") == -1);
-    CHECK(ByteSize(shape, "bf16") == -1);
-
-    CHECK(ByteSize(shape, "int32") == -1);
-    CHECK(ByteSize(shape, "uint32") == -1);
-    CHECK(ByteSize(shape, "fp32") == -1);
-
-    CHECK(ByteSize(shape, "int64") == -1);
-    CHECK(ByteSize(shape, "uint64") == -1);
-    CHECK(ByteSize(shape, "fp64") == -1);
-
-    CHECK(ByteSize(shape, "abc") == -1);
-    CHECK(ByteSize(shape, "1234") == -1);
-    CHECK(ByteSize(shape, "") == -1);
-  }
-}
-
-TEST_CASE("perf_utils: ElementCount")
-{
-  std::vector<int64_t> shape{3, 4, 5};
-  constexpr int num_elements = 3 * 4 * 5;
-
-  SUBCASE("Static tensor shape")
-  {
-    CHECK(ElementCount(shape) == num_elements);
-
-    shape.push_back(1);
-    CHECK(ElementCount(shape) == num_elements * 1);
-
-    shape.push_back(300);
-    CHECK(ElementCount(shape) == num_elements * 1 * 300);
-  }
-
-  SUBCASE("Dynamic tensor shape")
-  {
-    CHECK(ElementCount(shape) == num_elements);
-
-    shape.push_back(-1);
-    CHECK(ElementCount(shape) == -1);
-
-    shape.pop_back();
-    shape.insert(shape.begin(), -1);
-    CHECK(ElementCount(shape) == -1);
-  }
-}
-
-TEST_CASE("perf_utils: ShapeVecToString")
-{
-  std::vector<int64_t> shape{3, 4, 5};
-
-  SUBCASE("No skipping first dim")
-  {
-    CHECK(ShapeVecToString(shape, false) == "[3,4,5]");
-
-    shape.push_back(10);
-    CHECK(ShapeVecToString(shape, false) == "[3,4,5,10]");
-
-    shape.push_back(-1);
-    CHECK(ShapeVecToString(shape, false) == "[3,4,5,10,-1]");
-
-    shape.pop_back();
-    shape.insert(shape.begin(), -1);
-    CHECK(ShapeVecToString(shape, false) == "[-1,3,4,5,10]");
-
-    shape.clear();
-    CHECK(ShapeVecToString(shape, false) == "[]");
-  }
-
-  SUBCASE("Skipping first dim")
-  {
-    CHECK(ShapeVecToString(shape, true) == "[4,5]");
-
-    shape.push_back(-1);
-    CHECK(ShapeVecToString(shape, true) == "[4,5,-1]");
-
-    shape.pop_back();
-    shape.insert(shape.begin(), -1);
-    CHECK(ShapeVecToString(shape, true) == "[3,4,5]");
-
-    shape.clear();
-    CHECK(ShapeVecToString(shape, true) == "[]");
-  }
-}
-
-TEST_CASE("perf_utils: TensorToRegionName")
-{
-  CHECK(TensorToRegionName("name/with/slash") == "namewithslash");
-  CHECK(TensorToRegionName("name//with//slash") == "namewithslash");
-  CHECK(TensorToRegionName("name\\with\\backslash") == "namewithbackslash");
-  CHECK(TensorToRegionName("name\\\\with\\\\backslash") == "namewithbackslash");
-  CHECK(TensorToRegionName("name_without_slash") == "name_without_slash");
-  CHECK(TensorToRegionName("abc123!@#") == "abc123!@#");
-  CHECK(TensorToRegionName("") == "");
-}
-
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/test_profile_data_collector.cc b/src/c++/perf_analyzer/test_profile_data_collector.cc
deleted file mode 100644
index 926a90151..000000000
--- a/src/c++/perf_analyzer/test_profile_data_collector.cc
+++ /dev/null
@@ -1,161 +0,0 @@
-// Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS"" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "doctest.h"
-#include "mock_profile_data_collector.h"
-#include "profile_data_collector.h"
-
-namespace triton { namespace perfanalyzer {
-
-TEST_CASE("profile_data_collector: FindExperiment")
-{
-  MockProfileDataCollector collector{};
-  InferenceLoadMode infer_mode1{10, 20.0};
-
-  std::vector<Experiment>::iterator it;
-  it = collector.FindExperiment(infer_mode1);
-  CHECK(it == collector.experiments_.end());
-
-  std::vector<RequestRecord> request_records{RequestRecord{}};
-  collector.AddData(infer_mode1, std::move(request_records));
-
-  it = collector.FindExperiment(infer_mode1);
-  CHECK(it != collector.experiments_.end());
-  CHECK((*it).mode == infer_mode1);
-
-  InferenceLoadMode infer_mode2{123, 0.0};
-  it = collector.FindExperiment(infer_mode2);
-  CHECK(it == collector.experiments_.end());
-}
-
-TEST_CASE("profile_data_collector: AddData")
-{
-  using std::chrono::nanoseconds;
-  using std::chrono::system_clock;
-  using std::chrono::time_point;
-
-  MockProfileDataCollector collector{};
-  InferenceLoadMode infer_mode{10, 20.0};
-
-  // Add RequestRecords
-  auto clock_epoch{time_point<system_clock>()};
-
-  uint64_t sequence_id1{123};
-  auto request1_timestamp{clock_epoch + nanoseconds(1)};
-  auto request1_response1_timestamp{clock_epoch + nanoseconds(2)};
-  auto request1_response2_timestamp{clock_epoch + nanoseconds(3)};
-  uint8_t fake_data_in[] = {0x01, 0x02, 0x03, 0x04};
-  uint8_t fake_data_out[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08};
-  RequestRecord::RequestInput request1_request_input{
-      {"key1", RecordData(fake_data_in, 1)},
-      {"key2", RecordData(fake_data_in, 2)}};
-  RequestRecord::ResponseOutput request1_response1_output{
-      {"key1", RecordData(fake_data_out, 1)},
-      {"key2", RecordData(fake_data_out, 2)}};
-  RequestRecord::ResponseOutput request1_response2_output{
-      {"key3", RecordData(fake_data_out, 3)},
-      {"key4", RecordData(fake_data_out, 4)}};
-
-  RequestRecord request_record1{
-      request1_timestamp,
-      std::vector<time_point<system_clock>>{
-          request1_response1_timestamp, request1_response2_timestamp},
-      {request1_request_input},
-      {request1_response1_output, request1_response2_output},
-      0,
-      false,
-      sequence_id1,
-      false};
-
-  uint64_t sequence_id2{456};
-  auto request2_timestamp{clock_epoch + nanoseconds(4)};
-  auto request2_response1_timestamp{clock_epoch + nanoseconds(5)};
-  auto request2_response2_timestamp{clock_epoch + nanoseconds(6)};
-  RequestRecord::RequestInput request2_request_input{
-      {"key3", RecordData(fake_data_in, 3)},
-      {"key4", RecordData(fake_data_in, 4)}};
-  RequestRecord::ResponseOutput request2_response1_output{
-      {"key5", RecordData(fake_data_out, 5)},
-      {"key6", RecordData(fake_data_out, 6)}};
-  RequestRecord::ResponseOutput request2_response2_output{
-      {"key7", RecordData(fake_data_out, 7)},
-      {"key8", RecordData(fake_data_out, 8)}};
-
-  RequestRecord request_record2{
-      request2_timestamp,
-      std::vector<time_point<system_clock>>{
-          request2_response1_timestamp, request2_response2_timestamp},
-      {request2_request_input},
-      {request2_response1_output, request2_response2_output},
-      0,
-      false,
-      sequence_id2,
-      false};
-
-  std::vector<RequestRecord> request_records{request_record1, request_record2};
-  collector.AddData(infer_mode, std::move(request_records));
-
-  CHECK(!collector.experiments_.empty());
-
-  std::vector<RequestRecord> rr{collector.experiments_[0].requests};
-  CHECK(rr[0].sequence_id_ == sequence_id1);
-  CHECK(rr[0].start_time_ == request1_timestamp);
-  CHECK(rr[0].request_inputs_[0] == request1_request_input);
-  CHECK(rr[0].response_timestamps_[0] == request1_response1_timestamp);
-  CHECK(rr[0].response_timestamps_[1] == request1_response2_timestamp);
-  CHECK(rr[0].response_outputs_[0] == request1_response1_output);
-  CHECK(rr[0].response_outputs_[1] == request1_response2_output);
-  CHECK(rr[1].sequence_id_ == sequence_id2);
-  CHECK(rr[1].start_time_ == request2_timestamp);
-  CHECK(rr[1].request_inputs_[0] == request2_request_input);
-  CHECK(rr[1].response_timestamps_[0] == request2_response1_timestamp);
-  CHECK(rr[1].response_timestamps_[1] == request2_response2_timestamp);
-  CHECK(rr[1].response_outputs_[0] == request2_response1_output);
-  CHECK(rr[1].response_outputs_[1] == request2_response2_output);
-}
-
-TEST_CASE("profile_data_collector: AddWindow")
-{
-  MockProfileDataCollector collector{};
-  InferenceLoadMode infer_mode{10, 20.0};
-
-  uint64_t window_start1{123};
-  uint64_t window_end1{456};
-  collector.AddWindow(infer_mode, window_start1, window_end1);
-
-  CHECK(!collector.experiments_.empty());
-  CHECK(collector.experiments_[0].window_boundaries[0] == window_start1);
-  CHECK(collector.experiments_[0].window_boundaries[1] == window_end1);
-
-  uint64_t window_start2{678};
-  uint64_t window_end2{912};
-  collector.AddWindow(infer_mode, window_start2, window_end2);
-
-  CHECK(collector.experiments_[0].window_boundaries[2] == window_start2);
-  CHECK(collector.experiments_[0].window_boundaries[3] == window_end2);
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/test_profile_data_exporter.cc b/src/c++/perf_analyzer/test_profile_data_exporter.cc
deleted file mode 100644
index ffd958c5c..000000000
--- a/src/c++/perf_analyzer/test_profile_data_exporter.cc
+++ /dev/null
@@ -1,327 +0,0 @@
-// Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS"" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "doctest.h"
-#include "mock_profile_data_exporter.h"
-#include "profile_data_exporter.h"
-
-namespace triton { namespace perfanalyzer {
-
-TEST_CASE("profile_data_exporter: ConvertToJson")
-{
-  using std::chrono::nanoseconds;
-  using std::chrono::system_clock;
-  using std::chrono::time_point;
-
-  MockProfileDataExporter exporter{};
-
-  InferenceLoadMode infer_mode{4, 0.0};
-  uint64_t sequence_id{1};
-
-  auto clock_epoch{time_point<system_clock>()};
-  auto request_timestamp{clock_epoch + nanoseconds(1)};
-  auto response_timestamp1{clock_epoch + nanoseconds(2)};
-  auto response_timestamp2{clock_epoch + nanoseconds(3)};
-
-  // Request inputs
-  const std::string in_buf1{"abc123"};
-  const int32_t in_buf2{456};
-  const bool in_buf3{true};
-  const std::string in_buf4{"{\"abc\":\"def\"}"};
-
-  RequestRecord::RequestInput request_input{
-      {"in_key1",
-       {reinterpret_cast<const uint8_t*>(in_buf1.data()), in_buf1.size(),
-        "BYTES"}},
-      {"in_key2",
-       {reinterpret_cast<const uint8_t*>(&in_buf2), sizeof(in_buf2), "INT32"}},
-      {"in_key3",
-       {reinterpret_cast<const uint8_t*>(&in_buf3), sizeof(in_buf3), "BOOL"}},
-      {"in_key4",
-       {reinterpret_cast<const uint8_t*>(in_buf4.data()), sizeof(in_buf4),
-        "JSON"}},
-  };
-
-  // Response outputs
-  std::vector<std::string> out_bufs{"abc", "def", "ghi", "jkl"};
-  RequestRecord::ResponseOutput response_output1{
-      {"out_key1",
-       {reinterpret_cast<const uint8_t*>(out_bufs[0].data()),
-        out_bufs[0].size()}},
-      {"out_key2",
-       {reinterpret_cast<const uint8_t*>(out_bufs[1].data()),
-        out_bufs[1].size()}}};
-  RequestRecord::ResponseOutput response_output2{
-      {"out_key3",
-       {reinterpret_cast<const uint8_t*>(out_bufs[2].data()),
-        out_bufs[2].size()}},
-      {"out_key4",
-       {reinterpret_cast<const uint8_t*>(out_bufs[3].data()),
-        out_bufs[3].size()}}};
-
-  RequestRecord request_record{
-      request_timestamp,
-      std::vector<time_point<system_clock>>{
-          response_timestamp1, response_timestamp2},
-      {request_input},
-      {response_output1, response_output2},
-      0,
-      false,
-      sequence_id,
-      false};
-  std::vector<RequestRecord> requests{request_record};
-  std::vector<uint64_t> window_boundaries{1, 5, 6};
-
-  Experiment experiment;
-  experiment.mode = infer_mode;
-  experiment.requests = requests;
-  experiment.window_boundaries = window_boundaries;
-  std::vector<Experiment> experiments{experiment};
-
-  std::string version{"1.2.3"};
-  cb::BackendKind service_kind = cb::BackendKind::TRITON;
-  std::string endpoint{""};
-
-  exporter.ConvertToJson(experiments, version, service_kind, endpoint);
-
-  std::string json{R"(
-      {
-        "experiments" : [
-          {
-            "experiment" : {
-              "mode" : "concurrency",
-              "value" : 4
-            },
-            "requests" : [
-              {
-                "timestamp" : 1,
-                "sequence_id" : 1,
-                "request_inputs" : {"in_key1":"abc123","in_key2":456,"in_key3":true,"in_key4":"{\"abc\":\"def\"}"},
-                "response_timestamps" : [ 2, 3 ],
-                "response_outputs" : [ {"out_key1":"abc","out_key2":"def"}, {"out_key3":"ghi","out_key4":"jkl"} ]
-              }
-            ],
-            "window_boundaries" : [ 1, 5, 6 ]
-          }
-        ],
-        "version" : "1.2.3",
-        "service_kind": "triton",
-        "endpoint": ""
-      }
-      )"};
-
-  rapidjson::Document expected_document;
-  expected_document.Parse(json.c_str());
-
-  // FIXME (TMA-1339): Look into the testing the order of things in the json
-  const rapidjson::Value& expected_experiment{
-      expected_document["experiments"][0]["experiment"]};
-  const rapidjson::Value& expected_request{
-      expected_document["experiments"][0]["requests"][0]};
-  const rapidjson::Value& expected_windows{
-      expected_document["experiments"][0]["window_boundaries"]};
-  const rapidjson::Value& expected_version{expected_document["version"]};
-
-  const rapidjson::Value& actual_experiment{
-      exporter.document_["experiments"][0]["experiment"]};
-  const rapidjson::Value& actual_request{
-      exporter.document_["experiments"][0]["requests"][0]};
-  const rapidjson::Value& actual_windows{
-      exporter.document_["experiments"][0]["window_boundaries"]};
-  const rapidjson::Value& actual_version{exporter.document_["version"]};
-
-  CHECK(actual_experiment["mode"] == expected_experiment["mode"]);
-  CHECK(actual_experiment["value"] == expected_experiment["value"]);
-
-  CHECK(actual_request["timestamp"] == expected_request["timestamp"]);
-  CHECK(actual_request["sequence_id"] == expected_request["sequence_id"]);
-
-  CHECK(
-      actual_request["request_inputs"]["in_key1"] ==
-      expected_request["request_inputs"]["in_key1"]);
-  CHECK(
-      actual_request["request_inputs"]["in_key2"] ==
-      expected_request["request_inputs"]["in_key2"]);
-  CHECK(
-      actual_request["request_inputs"]["in_key3"] ==
-      expected_request["request_inputs"]["in_key3"]);
-  auto act_inkey_4 = actual_request["request_inputs"]["in_key4"].GetString();
-  auto exp_inkey_4 = expected_request["request_inputs"]["in_key4"].GetString();
-  CHECK(std::string{act_inkey_4} == std::string{exp_inkey_4});
-
-  CHECK(
-      actual_request["response_timestamps"][0] ==
-      expected_request["response_timestamps"][0]);
-  CHECK(
-      actual_request["response_timestamps"][1] ==
-      expected_request["response_timestamps"][1]);
-  CHECK(
-      actual_request["response_outputs"][0] ==
-      expected_request["response_outputs"][0]);
-  CHECK(
-      actual_request["response_outputs"][1] ==
-      expected_request["response_outputs"][1]);
-
-  CHECK(actual_windows[0] == expected_windows[0]);
-  CHECK(actual_windows[1] == expected_windows[1]);
-  CHECK(actual_windows[2] == expected_windows[2]);
-
-  CHECK(actual_version == expected_version);
-}
-
-TEST_CASE("profile_data_exporter: AddExperiment")
-{
-  MockProfileDataExporter exporter{};
-
-  Experiment raw_experiment;
-  rapidjson::Value entry(rapidjson::kObjectType);
-  rapidjson::Value experiment(rapidjson::kObjectType);
-
-  SUBCASE("Concurrency mode")
-  {
-    InferenceLoadMode infer_mode{15, 0.0};
-    raw_experiment.mode = infer_mode;
-
-    exporter.AddExperiment(entry, experiment, raw_experiment);
-    CHECK(entry.HasMember("experiment"));
-    CHECK(entry["experiment"]["mode"] == "concurrency");
-    CHECK(entry["experiment"]["value"] == 15);
-  }
-
-  SUBCASE("Request rate mode")
-  {
-    InferenceLoadMode infer_mode{0, 23.5};
-    raw_experiment.mode = infer_mode;
-
-    exporter.AddExperiment(entry, experiment, raw_experiment);
-    CHECK(entry.HasMember("experiment"));
-    CHECK(entry["experiment"]["mode"] == "request_rate");
-    CHECK(entry["experiment"]["value"] == 23.5);
-  }
-}
-
-TEST_CASE("profile_data_exporter: OutputToFile")
-{
-  MockProfileDataExporter exporter{};
-  std::string file_path;
-
-  SUBCASE("Empty file path")
-  {
-    file_path = "";
-    CHECK_THROWS_WITH_AS(
-        exporter.OutputToFile(file_path),
-        "failed to open file for outputting raw profile data",
-        PerfAnalyzerException);
-  }
-
-  SUBCASE("With file path")
-  {
-    file_path = "/tmp/test-" + GetRandomString(4) + ".json";
-    CHECK_NOTHROW(exporter.OutputToFile(file_path));
-    CHECK(IsFile(file_path));
-
-    std::remove(file_path.c_str());
-    CHECK(!IsFile(file_path));
-  }
-}
-
-TEST_CASE("profile_data_exporter: AddServiceKind")
-{
-  MockProfileDataExporter exporter{};
-  exporter.ClearDocument();
-
-  cb::BackendKind service_kind;
-  std::string json{""};
-
-  SUBCASE("Backend kind: TRITON")
-  {
-    service_kind = cb::BackendKind::TRITON;
-    json = R"({ "service_kind": "triton" })";
-  }
-
-  SUBCASE("Backend kind: TENSORFLOW_SERVING")
-  {
-    service_kind = cb::BackendKind::TENSORFLOW_SERVING;
-    json = R"({ "service_kind": "tfserving" })";
-  }
-
-  SUBCASE("Backend kind: TORCHSERVE")
-  {
-    service_kind = cb::BackendKind::TORCHSERVE;
-    json = R"({ "service_kind": "torchserve" })";
-  }
-
-  SUBCASE("Backend kind: TRITON_C_API")
-  {
-    service_kind = cb::BackendKind::TRITON_C_API;
-    json = R"({ "service_kind": "triton_c_api" })";
-  }
-
-  SUBCASE("Backend kind: OPENAI")
-  {
-    service_kind = cb::BackendKind::OPENAI;
-    json = R"({ "service_kind": "openai" })";
-  }
-
-  exporter.AddServiceKind(service_kind);
-  rapidjson::Document expected_document;
-  expected_document.Parse(json.c_str());
-
-  const rapidjson::Value& expected_kind{expected_document["service_kind"]};
-  const rapidjson::Value& actual_kind{exporter.document_["service_kind"]};
-  CHECK(actual_kind == expected_kind);
-}
-
-TEST_CASE("profile_data_exporter: AddEndpoint")
-{
-  MockProfileDataExporter exporter{};
-  exporter.ClearDocument();
-
-  std::string endpoint{""};
-  std::string json{""};
-
-  SUBCASE("Endpoint: OpenAI Chat Completions")
-  {
-    endpoint = "v1/chat/completions";
-    json = R"({ "endpoint": "v1/chat/completions" })";
-  }
-
-  SUBCASE("Endpoint: OpenAI Completions")
-  {
-    endpoint = "v1/completions";
-    json = R"({ "endpoint": "v1/completions" })";
-  }
-
-  exporter.AddEndpoint(endpoint);
-  rapidjson::Document expected_document;
-  expected_document.Parse(json.c_str());
-
-  const rapidjson::Value& expected_endpoint{expected_document["endpoint"]};
-  const rapidjson::Value& actual_endpoint{exporter.document_["endpoint"]};
-  CHECK(actual_endpoint == expected_endpoint);
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/test_report_writer.cc b/src/c++/perf_analyzer/test_report_writer.cc
deleted file mode 100644
index 5d341c30a..000000000
--- a/src/c++/perf_analyzer/test_report_writer.cc
+++ /dev/null
@@ -1,93 +0,0 @@
-// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS"" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <string>
-
-#include "doctest.h"
-#include "report_writer.h"
-
-namespace triton { namespace perfanalyzer {
-
-class TestReportWriter : ReportWriter {
- public:
-  void WriteGpuMetrics(std::ostream& ofs, const Metrics& metrics)
-  {
-    ReportWriter::WriteGpuMetrics(ofs, metrics);
-  }
-};
-
-TEST_CASE("testing WriteGpuMetrics")
-{
-  TestReportWriter trw{};
-  Metrics m{};
-  m.gpu_utilization_per_gpu["a"] = 1.0;
-  m.gpu_power_usage_per_gpu["a"] = 2.2;
-  m.gpu_memory_used_bytes_per_gpu["a"] = 3;
-  m.gpu_memory_total_bytes_per_gpu["a"] = 4;
-  std::ostringstream actual_output{};
-
-  SUBCASE("single gpu complete output")
-  {
-    trw.WriteGpuMetrics(actual_output, m);
-    const std::string expected_output{",a:1;,a:2.2;,a:3;,a:4;"};
-    CHECK(actual_output.str() == expected_output);
-  }
-
-  SUBCASE("single gpu missing data")
-  {
-    m.gpu_power_usage_per_gpu.erase("a");
-    trw.WriteGpuMetrics(actual_output, m);
-    const std::string expected_output{",a:1;,,a:3;,a:4;"};
-    CHECK(actual_output.str() == expected_output);
-  }
-
-  SUBCASE("multi-gpu")
-  {
-    m.gpu_utilization_per_gpu["z"] = 100.0;
-    m.gpu_power_usage_per_gpu["z"] = 222.2;
-    m.gpu_memory_used_bytes_per_gpu["z"] = 45;
-    m.gpu_memory_total_bytes_per_gpu["z"] = 89;
-
-    SUBCASE("multi gpu complete output")
-    {
-      trw.WriteGpuMetrics(actual_output, m);
-      const std::string expected_output{
-          ",a:1;z:100;,a:2.2;z:222.2;,a:3;z:45;,a:4;z:89;"};
-      CHECK(actual_output.str() == expected_output);
-    }
-
-    SUBCASE("multi gpu missing data")
-    {
-      m.gpu_utilization_per_gpu.erase("z");
-      m.gpu_power_usage_per_gpu.erase("a");
-      trw.WriteGpuMetrics(actual_output, m);
-      const std::string expected_output{",a:1;,z:222.2;,a:3;z:45;,a:4;z:89;"};
-      CHECK(actual_output.str() == expected_output);
-    }
-  }
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/test_request_rate_manager.cc b/src/c++/perf_analyzer/test_request_rate_manager.cc
deleted file mode 100644
index 07b9016dd..000000000
--- a/src/c++/perf_analyzer/test_request_rate_manager.cc
+++ /dev/null
@@ -1,2242 +0,0 @@
-// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <future>
-#include <memory>
-
-#include "command_line_parser.h"
-#include "common.h"
-#include "doctest.h"
-#include "mock_client_backend.h"
-#include "mock_data_loader.h"
-#include "mock_infer_data_manager.h"
-#include "mock_model_parser.h"
-#include "mock_request_rate_worker.h"
-#include "mock_sequence_manager.h"
-#include "request_rate_manager.h"
-#include "test_load_manager_base.h"
-#include "test_utils.h"
-
-namespace cb = triton::perfanalyzer::clientbackend;
-using milliseconds = std::chrono::milliseconds;
-using nanoseconds = std::chrono::nanoseconds;
-
-namespace triton { namespace perfanalyzer {
-
-/// Class to test the RequestRateManager
-///
-class TestRequestRateManager : public TestLoadManagerBase,
-                               public RequestRateManager {
- public:
-  TestRequestRateManager(
-      PerfAnalyzerParameters params, bool is_sequence_model = false,
-      bool is_decoupled_model = false, bool use_mock_infer = false)
-      : use_mock_infer_(use_mock_infer),
-        TestLoadManagerBase(params, is_sequence_model, is_decoupled_model),
-        RequestRateManager(
-            params.async, params.streaming, params.request_distribution,
-            params.batch_size, params.measurement_window_ms, params.max_trials,
-            params.max_threads, params.num_of_sequences,
-            params.shared_memory_type, params.output_shm_size,
-            params.serial_sequences, GetParser(), GetFactory(),
-            params.request_parameters)
-  {
-  }
-
-  std::shared_ptr<IWorker> MakeWorker(
-      std::shared_ptr<ThreadStat> thread_stat,
-      std::shared_ptr<ThreadConfig> thread_config) override
-  {
-    size_t id = workers_.size();
-    auto worker = std::make_shared<MockRequestRateWorker>(
-        id, thread_stat, thread_config, parser_, data_loader_, factory_,
-        on_sequence_model_, async_, max_threads_, using_json_data_, streaming_,
-        batch_size_, wake_signal_, wake_mutex_, execute_, start_time_,
-        serial_sequences_, infer_data_manager_, sequence_manager_);
-
-    if (use_mock_infer_) {
-      EXPECT_CALL(*worker, Infer())
-          .WillRepeatedly(testing::Invoke(
-              worker.get(), &MockRequestRateWorker::EmptyInfer));
-    }
-    return worker;
-  }
-
-  void TestConfigureThreads(
-      std::vector<ThreadConfig>& expected_configs, size_t request_count)
-  {
-    RequestRateManager::ConfigureThreads(request_count);
-
-    auto expected_size = expected_configs.size();
-
-    // Check that the correct number of threads are created
-    //
-    CHECK(threads_.size() == expected_size);
-
-    // Check that threads_config has correct number of sequences and
-    // seq stat index offset
-    for (auto i = 0; i < expected_configs.size(); i++) {
-      CHECK(
-          threads_config_[i]->num_sequences_ ==
-          expected_configs[i].num_sequences_);
-      CHECK(
-          threads_config_[i]->seq_stat_index_offset_ ==
-          expected_configs[i].seq_stat_index_offset_);
-      CHECK(
-          threads_config_[i]->num_requests_ ==
-          expected_configs[i].num_requests_);
-    }
-  }
-
-  void TestCalculateThreadIds(std::vector<size_t>& expected_thread_ids)
-  {
-    std::vector<size_t> actual_thread_ids =
-        RequestRateManager::CalculateThreadIds();
-    CHECK(actual_thread_ids.size() == expected_thread_ids.size());
-
-    for (auto i = 0; i < actual_thread_ids.size(); i++) {
-      CHECK(actual_thread_ids[i] == expected_thread_ids[i]);
-    }
-  }
-
-  void StopWorkerThreads() { LoadManager::StopWorkerThreads(); }
-
-  void TestSchedule(double rate, PerfAnalyzerParameters params)
-  {
-    PauseWorkers();
-    ConfigureThreads();
-    GenerateSchedule(rate);
-
-    nanoseconds measurement_window_nanoseconds{
-        params.measurement_window_ms * NANOS_PER_MILLIS};
-    nanoseconds max_test_duration{
-        measurement_window_nanoseconds * params.max_trials};
-
-    nanoseconds expected_time_between_requests{int(NANOS_PER_SECOND / rate)};
-    nanoseconds expected_current_timestamp{0};
-
-    // Keep calling GetNextTimestamp for the entire test_duration to make sure
-    // the schedule is exactly as expected
-    //
-    while (expected_current_timestamp < max_test_duration) {
-      for (auto worker : workers_) {
-        expected_current_timestamp += expected_time_between_requests;
-        auto timestamp = std::dynamic_pointer_cast<RequestRateWorker>(worker)
-                             ->GetNextTimestamp();
-        REQUIRE(timestamp.count() == expected_current_timestamp.count());
-      }
-    }
-    early_exit = true;
-  }
-
-  void TestCreateSchedule(
-      double rate, PerfAnalyzerParameters params,
-      std::vector<uint32_t>& expected_worker_ratio)
-  {
-    PauseWorkers();
-    ConfigureThreads();
-    GenerateSchedule(rate);
-
-    std::vector<uint32_t> worker_schedule_sizes;
-    uint32_t total_num_seqs{0};
-
-    for (auto worker : workers_) {
-      auto w = std::dynamic_pointer_cast<RequestRateWorker>(worker);
-      total_num_seqs += w->thread_config_->num_sequences_;
-      worker_schedule_sizes.push_back(w->schedule_->intervals.size());
-    }
-    early_exit = true;
-
-    CHECK(num_of_sequences_ == total_num_seqs);
-    for (int i = 0; i < worker_schedule_sizes.size() - 1; i++) {
-      CHECK(
-          worker_schedule_sizes[i] / expected_worker_ratio[i] ==
-          worker_schedule_sizes[i + 1] / expected_worker_ratio[i + 1]);
-    }
-  }
-
-  /// Test that the correct Infer function is called in the backend
-  ///
-  void TestInferType()
-  {
-    double request_rate = 50;
-    auto sleep_time = milliseconds(100);
-
-    ChangeRequestRate(request_rate);
-    std::this_thread::sleep_for(sleep_time);
-    StopWorkerThreads();
-
-    CheckInferType();
-  }
-
-  /// Test that the inference distribution is as expected
-  ///
-  void TestDistribution(uint request_rate, uint duration_ms)
-  {
-    ChangeRequestRate(request_rate);
-    std::this_thread::sleep_for(milliseconds(duration_ms));
-    StopWorkerThreads();
-
-    CheckCallDistribution(request_rate);
-  }
-
-  /// Test that the schedule is properly update after calling ChangeRequestRate
-  ///
-  void TestMultipleRequestRate()
-  {
-    std::vector<double> request_rates = {50, 200};
-    auto sleep_time = milliseconds(500);
-
-    for (auto request_rate : request_rates) {
-      ChangeRequestRate(request_rate);
-      ResetStats();
-      std::this_thread::sleep_for(sleep_time);
-      CheckCallDistribution(request_rate);
-    }
-  }
-
-  /// Test sequence handling
-  ///
-  void TestSequences(bool verify_seq_balance, bool check_expected_count)
-  {
-    stats_->SetDelays({10});
-    double request_rate1 = 100;
-    double request_rate2 = 200;
-
-    // A single sequence can't maintain the above rates
-    //
-    if (params_.num_of_sequences == 1) {
-      request_rate1 = 50;
-      request_rate2 = 100;
-    }
-
-    auto stats = cb::InferStat();
-    int sleep_ms = 500;
-    double num_seconds = double(sleep_ms) / 1000;
-
-    auto sleep_time = milliseconds(sleep_ms);
-    size_t expected_count1 = num_seconds * request_rate1;
-    size_t expected_count2 = num_seconds * request_rate2 + expected_count1;
-
-    // Run and check request rate 1
-    //
-    ChangeRequestRate(request_rate1);
-    std::this_thread::sleep_for(sleep_time);
-
-    stats = cb::InferStat();
-    GetAccumulatedClientStat(&stats);
-    if (check_expected_count) {
-      CHECK(
-          stats.completed_request_count ==
-          doctest::Approx(expected_count1).epsilon(0.10));
-    }
-
-    PauseWorkers();
-    CheckSequences(params_.num_of_sequences);
-
-    // Make sure that the client and the manager are in agreement on the request
-    // count in between rates
-    //
-    stats = cb::InferStat();
-    GetAccumulatedClientStat(&stats);
-    int client_total_requests = stats_->num_async_infer_calls +
-                                stats_->num_async_stream_infer_calls +
-                                stats_->num_infer_calls;
-    CHECK(stats.completed_request_count == client_total_requests);
-
-    if (verify_seq_balance) {
-      CheckSequenceBalance();
-    }
-
-    ResetStats();
-
-    // Run and check request rate 2
-    //
-    ChangeRequestRate(request_rate2);
-    std::this_thread::sleep_for(sleep_time);
-
-    stats = cb::InferStat();
-    GetAccumulatedClientStat(&stats);
-    if (check_expected_count) {
-      CHECK(
-          stats.completed_request_count ==
-          doctest::Approx(expected_count2).epsilon(0.10));
-    }
-
-    // Stop all threads and make sure everything is as expected
-    //
-    StopWorkerThreads();
-
-    CheckSequences(params_.num_of_sequences);
-  }
-
-  /// Test that the shared memory methods are called correctly
-  ///
-  void TestSharedMemory(uint request_rate, uint duration_ms)
-  {
-    ChangeRequestRate(request_rate);
-    std::this_thread::sleep_for(milliseconds(duration_ms));
-    StopWorkerThreads();
-  }
-
-  /// Test that tries to find deadlocks and livelocks
-  ///
-  void TestTimeouts()
-  {
-    TestWatchDog watchdog(1000);
-    ChangeRequestRate(100);
-    std::this_thread::sleep_for(milliseconds(100));
-    StopWorkerThreads();
-    watchdog.stop();
-  }
-
-  /// Test that idle time is tracked correctly
-  void TestOverhead(uint request_rate)
-  {
-    stats_->SetDelays({1});
-    ChangeRequestRate(request_rate);
-    std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    // During a run of 100 ms (100,000,000 ns), make sure that the idle time is
-    // at least 95% of that
-    //
-    auto idle_time_ns = GetIdleTime();
-    CHECK(idle_time_ns > 95000000);
-    StopWorkerThreads();
-  }
-
-  /// Helper function that will setup and run a case to verify custom data
-  /// behavior
-  /// \param num_requests Integer number of requests to send during the test
-  /// \param num_threads Number of worker threads to create
-  /// \param tensors Vector of input ModelTensors
-  /// \param json_str The custom data json text
-  /// \param expected_values Vector of expected input values for each inference
-  /// \param expect_init_failure True if InitManager is expected to throw an
-  /// error
-  /// \param expect_thread_failure True if the thread is expected to have
-  /// an error
-  void TestCustomData(
-      size_t num_requests, size_t num_threads,
-      std::vector<ModelTensor>& tensors, const std::string json_str,
-      std::vector<std::vector<int32_t>>& expected_values,
-      bool expect_init_failure, bool expect_thread_failure)
-  {
-    CustomDataTestSetup(tensors, json_str, expect_init_failure, num_threads);
-    if (expect_init_failure) {
-      // The rest of the test is invalid if init failed
-      return;
-    }
-    auto thread_status = CustomDataTestSendRequests(num_requests, num_threads);
-    CustomDataTestCheckResults(
-        thread_status, expect_thread_failure, expected_values);
-  }
-
-  void CustomDataTestSetup(
-      std::vector<ModelTensor>& tensors, const std::string json_str,
-      bool expect_init_failure, size_t num_threads)
-  {
-    params_.user_data = {json_str};
-
-    std::shared_ptr<MockDataLoader> mdl{
-        std::make_shared<MockDataLoader>(params_.batch_size)};
-
-    std::shared_ptr<MockModelParser> mmp{
-        std::make_shared<MockModelParser>(on_sequence_model_, false)};
-    mmp->inputs_ = std::make_shared<ModelTensorMap>();
-    for (auto t : tensors) {
-      (*mmp->inputs_)[t.name_] = t;
-    }
-
-    infer_data_manager_ =
-        MockInferDataManagerFactory::CreateMockInferDataManager(
-            params_.max_threads, params_.batch_size, params_.shared_memory_type,
-            params_.output_shm_size, params_.request_parameters, mmp, factory_,
-            mdl);
-
-    parser_ = mmp;
-    data_loader_ = mdl;
-    using_json_data_ = true;
-    execute_ = true;
-    max_threads_ = num_threads;
-
-    if (expect_init_failure) {
-      REQUIRE_THROWS_AS(
-          InitManager(
-              params_.string_length, params_.string_data, params_.zero_input,
-              params_.user_data, params_.start_sequence_id,
-              params_.sequence_id_range, params_.sequence_length,
-              params_.sequence_length_specified,
-              params_.sequence_length_variation),
-          PerfAnalyzerException);
-      return;
-    } else {
-      REQUIRE_NOTHROW(InitManager(
-          params_.string_length, params_.string_data, params_.zero_input,
-          params_.user_data, params_.start_sequence_id,
-          params_.sequence_id_range, params_.sequence_length,
-          params_.sequence_length_specified,
-          params_.sequence_length_variation));
-    }
-  }
-
-  cb::Error CustomDataTestSendRequests(size_t num_requests, size_t num_threads)
-  {
-    std::vector<std::shared_ptr<MockRequestRateWorker>> workers;
-    std::vector<std::shared_ptr<ThreadStat>> thread_stats;
-
-    for (auto i = 0; i < num_threads; i++) {
-      std::shared_ptr<ThreadStat> ts{std::make_shared<ThreadStat>()};
-      thread_stats.push_back(ts);
-      std::shared_ptr<ThreadConfig> tc{std::make_shared<ThreadConfig>(i)};
-      std::shared_ptr<IWorker> worker{MakeWorker(ts, tc)};
-      workers_.push_back(worker);
-
-      workers.push_back(
-          std::dynamic_pointer_cast<MockRequestRateWorker>(worker));
-
-      workers[i]->CreateContext();
-    }
-
-    size_t sent_requests = 0;
-    while (sent_requests < num_requests) {
-      for (auto i = 0; i < workers.size(); i++) {
-        workers[i]->SendInferRequest();
-        sent_requests++;
-      }
-    }
-
-    return thread_stats[0]->status_;
-  }
-
-  void CustomDataTestCheckResults(
-      cb::Error& thread_status, bool expect_thread_failure,
-      std::vector<std::vector<int32_t>>& expected_values)
-  {
-    if (expect_thread_failure) {
-      REQUIRE(!thread_status.IsOk());
-    } else {
-      REQUIRE_MESSAGE(thread_status.IsOk(), thread_status.Message());
-    }
-
-    auto recorded_values = GetRecordedInputValues();
-
-    // Check that results are exactly as expected
-    REQUIRE(recorded_values.size() == expected_values.size());
-    for (size_t i = 0; i < expected_values.size(); i++) {
-      REQUIRE(recorded_values[i].size() == expected_values[i].size());
-      for (size_t j = 0; j < expected_values[i].size(); j++) {
-        CHECK(recorded_values[i][j] == expected_values[i][j]);
-      }
-    }
-  }
-
-  std::shared_ptr<ModelParser>& parser_{LoadManager::parser_};
-  std::shared_ptr<DataLoader>& data_loader_{LoadManager::data_loader_};
-  std::shared_ptr<SequenceManager>& sequence_manager_{
-      LoadManager::sequence_manager_};
-  bool& using_json_data_{LoadManager::using_json_data_};
-  bool& execute_{RequestRateManager::execute_};
-  size_t& batch_size_{LoadManager::batch_size_};
-  std::chrono::steady_clock::time_point& start_time_{
-      RequestRateManager::start_time_};
-  size_t& max_threads_{LoadManager::max_threads_};
-  bool& async_{LoadManager::async_};
-  bool& streaming_{LoadManager::streaming_};
-  std::shared_ptr<cb::ClientBackendFactory>& factory_{
-      TestLoadManagerBase::factory_};
-  std::shared_ptr<IInferDataManager>& infer_data_manager_{
-      LoadManager::infer_data_manager_};
-
- private:
-  bool use_mock_infer_;
-
-  void CheckCallDistribution(int request_rate)
-  {
-    auto request_distribution = params_.request_distribution;
-
-    auto timestamps = GetStats()->request_timestamps;
-    std::vector<int64_t> time_delays = GatherTimeBetweenRequests(timestamps);
-
-    double delay_average = CalculateAverage(time_delays);
-    double delay_variance = CalculateVariance(time_delays, delay_average);
-
-    double expected_delay_average =
-        NANOS_PER_SECOND / static_cast<double>(request_rate);
-
-    if (request_distribution == POISSON) {
-      // By definition, variance == average for Poisson.
-      //
-      // With such a small sample size for a poisson distribution, there will be
-      // noise. Allow 5% slop
-      //
-      CHECK(
-          delay_average ==
-          doctest::Approx(expected_delay_average).epsilon(0.05));
-      CHECK(delay_variance == doctest::Approx(delay_average).epsilon(0.05));
-    } else if (request_distribution == CONSTANT) {
-      // constant should in theory have 0 variance, but with thread timing
-      // there is obviously some noise.
-      //
-      // Allow it to be at most 5% of average
-      //
-      auto max_allowed_delay_variance = 0.05 * delay_average;
-
-      // Constant should be pretty tight. Allowing 1% slop there is noise in the
-      // thread scheduling
-      //
-      CHECK(
-          delay_average ==
-          doctest::Approx(expected_delay_average).epsilon(0.1));
-      CHECK_LT(delay_variance, max_allowed_delay_variance);
-    } else {
-      throw std::invalid_argument("Unexpected distribution type");
-    }
-  }
-
-  std::vector<int64_t> GatherTimeBetweenRequests(
-      const std::vector<std::chrono::time_point<std::chrono::system_clock>>&
-          timestamps)
-  {
-    std::vector<int64_t> time_between_requests;
-
-    for (size_t i = 1; i < timestamps.size(); i++) {
-      auto diff = timestamps[i] - timestamps[i - 1];
-      nanoseconds diff_ns = std::chrono::duration_cast<nanoseconds>(diff);
-      time_between_requests.push_back(diff_ns.count());
-    }
-    return time_between_requests;
-  }
-
-  // Gets the inputs recorded in the mock backend
-  // Returns a vector of vector of int32_t. Each entry in the parent vector is a
-  // list of all input values for a single inference request
-  //
-  std::vector<std::vector<int32_t>> GetRecordedInputValues()
-  {
-    auto recorded_inputs{stats_->recorded_inputs};
-    std::vector<std::vector<int32_t>> recorded_values;
-    // Convert the recorded inputs into values, for both shared memory and non
-    // shared memory cases
-    //
-    if (params_.shared_memory_type != SharedMemoryType::NO_SHARED_MEMORY) {
-      auto recorded_memory_regions =
-          std::dynamic_pointer_cast<MockInferDataManagerShm>(
-              infer_data_manager_)
-              ->mocked_shared_memory_regions;
-      for (auto recorded_input : recorded_inputs) {
-        std::vector<int32_t> recorded_value;
-        for (auto memory_label : recorded_input) {
-          auto itr =
-              recorded_memory_regions.find(memory_label.shared_memory_label);
-          if (itr == recorded_memory_regions.end()) {
-            std::string err_str = "Test error: Could not find label " +
-                                  memory_label.shared_memory_label +
-                                  " in recorded shared memory";
-            REQUIRE_MESSAGE(false, err_str);
-          } else {
-            for (auto val : itr->second) {
-              recorded_value.push_back(val);
-            }
-          }
-        }
-        recorded_values.push_back(recorded_value);
-      }
-    } else {
-      for (auto recorded_input : recorded_inputs) {
-        std::vector<int32_t> recorded_value;
-        for (auto val : recorded_input) {
-          recorded_value.push_back(val.data);
-        }
-        recorded_values.push_back(recorded_value);
-      }
-    }
-    return recorded_values;
-  }
-
-  std::shared_ptr<SequenceManager> MakeSequenceManager(
-      const uint64_t start_sequence_id, const uint64_t sequence_id_range,
-      const size_t sequence_length, const bool sequence_length_specified,
-      const double sequence_length_variation, const bool using_json_data,
-      std::shared_ptr<DataLoader> data_loader) override
-  {
-    return std::make_shared<MockSequenceManager>(
-        start_sequence_id, sequence_id_range, sequence_length,
-        sequence_length_specified, sequence_length_variation, using_json_data,
-        data_loader);
-  }
-};
-
-TEST_CASE("request_rate_schedule")
-{
-  PerfAnalyzerParameters params;
-  params.measurement_window_ms = 1000;
-  params.max_trials = 10;
-  bool is_sequence = false;
-  bool is_decoupled = false;
-  bool use_mock_infer = true;
-  double rate;
-
-
-  const auto& ParameterizeRate{[&]() {
-    SUBCASE("rate 10")
-    {
-      rate = 10;
-    }
-    SUBCASE("rate 30")
-    {
-      rate = 30;
-    }
-    SUBCASE("rate 100")
-    {
-      rate = 100;
-    }
-  }};
-
-  const auto& ParameterizeThreads{[&]() {
-    SUBCASE("threads 1")
-    {
-      ParameterizeRate();
-      params.max_threads = 1;
-    }
-    SUBCASE("threads 2")
-    {
-      ParameterizeRate();
-      params.max_threads = 2;
-    }
-    SUBCASE("threads 4")
-    {
-      ParameterizeRate();
-      params.max_threads = 4;
-    }
-    SUBCASE("threads 7")
-    {
-      ParameterizeRate();
-      params.max_threads = 7;
-    }
-  }};
-
-  const auto& ParameterizeTrials{[&]() {
-    SUBCASE("trials 3")
-    {
-      ParameterizeThreads();
-      params.max_trials = 3;
-    }
-    SUBCASE("trials 10")
-    {
-      ParameterizeThreads();
-      params.max_trials = 10;
-    }
-    SUBCASE("trials 20")
-    {
-      ParameterizeThreads();
-      params.max_trials = 20;
-    }
-  }};
-
-  const auto& ParameterizeMeasurementWindow{[&]() {
-    SUBCASE("window 1000")
-    {
-      ParameterizeTrials();
-      params.measurement_window_ms = 1000;
-    }
-    SUBCASE("window 10000")
-    {
-      ParameterizeTrials();
-      params.measurement_window_ms = 10000;
-    }
-    SUBCASE("window 500")
-    {
-      ParameterizeTrials();
-      params.measurement_window_ms = 500;
-    }
-  }};
-
-  ParameterizeMeasurementWindow();
-
-  TestRequestRateManager trrm(
-      params, is_sequence, is_decoupled, use_mock_infer);
-
-  trrm.InitManager(
-      params.string_length, params.string_data, params.zero_input,
-      params.user_data, params.start_sequence_id, params.sequence_id_range,
-      params.sequence_length, params.sequence_length_specified,
-      params.sequence_length_variation);
-  trrm.TestSchedule(rate, params);
-}
-
-/// Check that the correct inference function calls
-/// are used given different param values for async and stream
-///
-TEST_CASE("request_rate_infer_type")
-{
-  bool async;
-  bool stream;
-
-  SUBCASE("async_stream")
-  {
-    async = true;
-    stream = true;
-  }
-  SUBCASE("async_no_stream")
-  {
-    async = true;
-    stream = false;
-  }
-  SUBCASE("no_async_stream")
-  {
-    async = false;
-    stream = true;
-  }
-  SUBCASE("no_async_no_stream")
-  {
-    async = false;
-    stream = false;
-  }
-
-  PerfAnalyzerParameters params;
-  params.async = async;
-  params.streaming = stream;
-
-  TestRequestRateManager trrm(params, false);
-
-  trrm.InitManager(
-      params.string_length, params.string_data, params.zero_input,
-      params.user_data, params.start_sequence_id, params.sequence_id_range,
-      params.sequence_length, params.sequence_length_specified,
-      params.sequence_length_variation);
-  trrm.TestInferType();
-}
-
-/// Check that the request distribution is correct for
-/// different Distribution types
-///
-TEST_CASE("request_rate_distribution")
-{
-  PerfAnalyzerParameters params;
-  uint request_rate = 500;
-  uint duration_ms = 1000;
-
-  SUBCASE("constant")
-  {
-    params.request_distribution = CONSTANT;
-  }
-  SUBCASE("poisson")
-  {
-    params.request_distribution = POISSON;
-  }
-
-  TestRequestRateManager trrm(params);
-
-  trrm.InitManager(
-      params.string_length, params.string_data, params.zero_input,
-      params.user_data, params.start_sequence_id, params.sequence_id_range,
-      params.sequence_length, params.sequence_length_specified,
-      params.sequence_length_variation);
-  trrm.TestDistribution(request_rate, duration_ms);
-}
-
-/// Check that the request distribution is correct
-/// for the case where the measurement window is tiny.
-///
-TEST_CASE("request_rate_tiny_window")
-{
-  PerfAnalyzerParameters params;
-  params.request_distribution = CONSTANT;
-  params.measurement_window_ms = 10;
-  params.max_trials = 100;
-  uint request_rate = 500;
-  uint duration_ms = 1000;
-
-
-  SUBCASE("one_thread")
-  {
-    params.max_threads = 1;
-  }
-  SUBCASE("odd_threads")
-  {
-    params.max_threads = 9;
-  }
-
-
-  TestRequestRateManager trrm(params);
-  trrm.InitManager(
-      params.string_length, params.string_data, params.zero_input,
-      params.user_data, params.start_sequence_id, params.sequence_id_range,
-      params.sequence_length, params.sequence_length_specified,
-      params.sequence_length_variation);
-  trrm.TestDistribution(request_rate, duration_ms);
-}
-
-/// Check that the schedule properly handles mid-test
-/// update to the request rate
-///
-TEST_CASE("request_rate_multiple")
-{
-  PerfAnalyzerParameters params{};
-  TestRequestRateManager trrm(PerfAnalyzerParameters{});
-
-  trrm.InitManager(
-      params.string_length, params.string_data, params.zero_input,
-      params.user_data, params.start_sequence_id, params.sequence_id_range,
-      params.sequence_length, params.sequence_length_specified,
-      params.sequence_length_variation);
-  trrm.TestMultipleRequestRate();
-}
-
-/// Check that the inference requests for sequences
-/// follow all rules and parameters
-///
-TEST_CASE("request_rate_sequence")
-{
-  PerfAnalyzerParameters params = TestLoadManagerBase::GetSequenceTestParams();
-  bool verify_seq_balance = false;
-  bool check_expected_count = true;
-  bool is_sequence_model = true;
-
-  TestRequestRateManager trrm(params, is_sequence_model);
-  trrm.InitManager(
-      params.string_length, params.string_data, params.zero_input,
-      params.user_data, params.start_sequence_id, params.sequence_id_range,
-      params.sequence_length, params.sequence_length_specified,
-      params.sequence_length_variation);
-  trrm.TestSequences(verify_seq_balance, check_expected_count);
-}
-
-TEST_CASE("request_rate_serial_sequences")
-{
-  PerfAnalyzerParameters params;
-  params.serial_sequences = true;
-  bool verify_seq_balance = false;
-  bool check_expected_count = true;
-  bool is_sequence_model = true;
-
-  const auto& ParameterizeDistribution{[&]() {
-    SUBCASE("Constant")
-    {
-      params.request_distribution = CONSTANT;
-    }
-    SUBCASE("Poisson")
-    {
-      params.request_distribution = POISSON;
-      check_expected_count = false;
-    }
-  }};
-
-  SUBCASE("num seqs 7, threads 4")
-  {
-    verify_seq_balance = true;
-    params.sequence_length = 100;
-    params.num_of_sequences = 7;
-    params.max_threads = 4;
-    ParameterizeDistribution();
-  }
-  SUBCASE("num seqs 13, threads 5")
-  {
-    verify_seq_balance = true;
-    params.sequence_length = 100;
-    params.num_of_sequences = 13;
-    params.max_threads = 5;
-    ParameterizeDistribution();
-  }
-
-  TestRequestRateManager trrm(params, is_sequence_model);
-  trrm.InitManager(
-      params.string_length, params.string_data, params.zero_input,
-      params.user_data, params.start_sequence_id, params.sequence_id_range,
-      params.sequence_length, params.sequence_length_specified,
-      params.sequence_length_variation);
-  trrm.TestSequences(verify_seq_balance, check_expected_count);
-}
-
-TEST_CASE("request_rate max inflight per seq")
-{
-  // Confirm that we can have multiple inferences in-flight for a given sequence
-  // unless in serial-sequence mode
-  PerfAnalyzerParameters params;
-  bool is_sequence_model = true;
-  params.num_of_sequences = 2;
-  size_t rate = 1000;
-  size_t time_ms = 10;
-
-  bool expect_multiple_in_flight_sequences = false;
-
-  SUBCASE("sync will never have multiple in flight")
-  {
-    params.async = false;
-    expect_multiple_in_flight_sequences = false;
-
-    SUBCASE("serial_sequences on")
-    {
-      params.serial_sequences = true;
-    }
-    SUBCASE("serial_sequences off")
-    {
-      params.serial_sequences = false;
-    }
-  }
-  SUBCASE("async may have multiple in flight depending on serial sequences")
-  {
-    params.async = true;
-
-    SUBCASE("serial_sequences on")
-    {
-      params.serial_sequences = true;
-      expect_multiple_in_flight_sequences = false;
-    }
-    SUBCASE("serial_sequences off")
-    {
-      params.serial_sequences = false;
-      expect_multiple_in_flight_sequences = true;
-    }
-  }
-
-  TestRequestRateManager trrm(params, is_sequence_model);
-  trrm.InitManager(
-      params.string_length, params.string_data, params.zero_input,
-      params.user_data, params.start_sequence_id, params.sequence_id_range,
-      params.sequence_length, params.sequence_length_specified,
-      params.sequence_length_variation);
-
-  trrm.stats_->SetDelays({100});
-
-  trrm.ChangeRequestRate(rate);
-  std::this_thread::sleep_for(std::chrono::milliseconds(time_ms));
-
-  auto max_observed_inflight =
-      trrm.stats_->sequence_status.max_inflight_seq_count;
-
-  if (expect_multiple_in_flight_sequences) {
-    CHECK(max_observed_inflight > 1);
-  } else {
-    CHECK(max_observed_inflight == 1);
-  }
-
-  trrm.StopWorkerThreads();
-}
-
-
-TEST_CASE("request_rate_streaming: test that streaming-specific logic works")
-{
-  bool is_sequence = false;
-  bool is_decoupled;
-  bool expected_enable_stats_value;
-
-  SUBCASE("enable_stats true")
-  {
-    is_decoupled = false;
-    expected_enable_stats_value = true;
-  }
-  SUBCASE("enable_stats false")
-  {
-    is_decoupled = true;
-    expected_enable_stats_value = false;
-  }
-
-  PerfAnalyzerParameters params{};
-  params.streaming = true;
-
-  RateSchedulePtr_t schedule = std::make_shared<RateSchedule>();
-  schedule->intervals = NanoIntervals{nanoseconds(1)};
-  schedule->duration = nanoseconds{1};
-
-  std::shared_ptr<ThreadStat> thread_stat{std::make_shared<ThreadStat>()};
-  std::shared_ptr<ThreadConfig> thread_config{
-      std::make_shared<ThreadConfig>(0)};
-
-  TestRequestRateManager trrm(params, is_sequence, is_decoupled);
-  trrm.InitManager(
-      params.string_length, params.string_data, params.zero_input,
-      params.user_data, params.start_sequence_id, params.sequence_id_range,
-      params.sequence_length, params.sequence_length_specified,
-      params.sequence_length_variation);
-
-  auto worker = trrm.MakeWorker(thread_stat, thread_config);
-  std::dynamic_pointer_cast<IScheduler>(worker)->SetSchedule(schedule);
-  std::future<void> infer_future{std::async(&IWorker::Infer, worker)};
-
-  early_exit = true;
-  infer_future.get();
-
-  CHECK(
-      trrm.stats_->start_stream_enable_stats_value ==
-      expected_enable_stats_value);
-}
-
-TEST_CASE(
-    "custom_json_data: Check custom json data to ensure that it is processed "
-    "correctly")
-{
-  PerfAnalyzerParameters params{};
-  params.user_data = {"fake_file.json"};
-  bool is_sequence_model{false};
-
-  std::vector<std::vector<int32_t>> expected_results;
-  std::vector<ModelTensor> tensors;
-  bool expect_init_failure = false;
-  bool expect_thread_failure = false;
-
-  ModelTensor model_tensor1{};
-  model_tensor1.datatype_ = "INT32";
-  model_tensor1.is_optional_ = false;
-  model_tensor1.is_shape_tensor_ = false;
-  model_tensor1.name_ = "INPUT1";
-  model_tensor1.shape_ = {1};
-
-  ModelTensor model_tensor2 = model_tensor1;
-  model_tensor2.name_ = "INPUT2";
-
-  size_t num_requests = 4;
-  size_t num_threads = 1;
-  std::string json_str;
-
-  const auto& ParameterizeTensors{[&]() {
-    SUBCASE("one tensor")
-    {
-      tensors.push_back(model_tensor1);
-
-      json_str = R"({
-                "data": [
-                    { "INPUT1": [1] },
-                    { "INPUT1": [2] },
-                    { "INPUT1": [3] }
-                ]})";
-
-      switch (params.batch_size) {
-        case 1:
-          expected_results = {{1}, {2}, {3}, {1}};
-          break;
-        case 2:
-          expected_results = {{1, 2}, {3, 1}, {2, 3}, {1, 2}};
-          break;
-        case 4:
-          expected_results = {
-              {1, 2, 3, 1}, {2, 3, 1, 2}, {3, 1, 2, 3}, {1, 2, 3, 1}};
-          break;
-        default:
-          REQUIRE(false);
-      }
-    }
-    SUBCASE("two tensors")
-    {
-      tensors.push_back(model_tensor1);
-      tensors.push_back(model_tensor2);
-
-      json_str = R"({
-                "data": [
-                    { "INPUT1": [1], "INPUT2": [21] },
-                    { "INPUT1": [2], "INPUT2": [22] },
-                    { "INPUT1": [3], "INPUT2": [23] }
-                ]})";
-
-      switch (params.batch_size) {
-        case 1:
-          expected_results = {{1, 21}, {2, 22}, {3, 23}, {1, 21}};
-          break;
-        case 2:
-          expected_results = {
-              {1, 2, 21, 22}, {3, 1, 23, 21}, {2, 3, 22, 23}, {1, 2, 21, 22}};
-          break;
-        case 4:
-          expected_results = {
-              {1, 2, 3, 1, 21, 22, 23, 21},
-              {2, 3, 1, 2, 22, 23, 21, 22},
-              {3, 1, 2, 3, 23, 21, 22, 23},
-              {1, 2, 3, 1, 21, 22, 23, 21}};
-          break;
-        default:
-          REQUIRE(false);
-      }
-    }
-  }};
-
-  const auto& ParameterizeBatchSize{[&]() {
-    SUBCASE("batchsize = 1")
-    {
-      params.batch_size = 1;
-      ParameterizeTensors();
-    }
-    SUBCASE("batchsize = 2")
-    {
-      params.batch_size = 2;
-      ParameterizeTensors();
-    }
-    SUBCASE("batchsize = 4")
-    {
-      params.batch_size = 4;
-      ParameterizeTensors();
-    }
-  }};
-
-  const auto& ParameterizeSharedMemory{[&]() {
-    SUBCASE("no_shared_memory")
-    {
-      params.shared_memory_type = SharedMemoryType::NO_SHARED_MEMORY;
-      ParameterizeBatchSize();
-    }
-    SUBCASE("system_shared_memory")
-    {
-      params.shared_memory_type = SharedMemoryType::SYSTEM_SHARED_MEMORY;
-      ParameterizeBatchSize();
-    }
-    SUBCASE("cuda_shared_memory")
-    {
-      params.shared_memory_type = SharedMemoryType::CUDA_SHARED_MEMORY;
-      ParameterizeBatchSize();
-    }
-  }};
-
-  const auto& ParameterizeNumThreads{[&]() {
-    SUBCASE("1 thread")
-    {
-      num_threads = 1;
-      ParameterizeSharedMemory();
-    }
-    SUBCASE("2 threads")
-    {
-      num_threads = 2;
-      ParameterizeSharedMemory();
-    }
-  }};
-
-  ParameterizeNumThreads();
-
-  TestRequestRateManager trrm(params, is_sequence_model);
-
-  trrm.TestCustomData(
-      num_requests, num_threads, tensors, json_str, expected_results,
-      expect_init_failure, expect_thread_failure);
-}
-
-TEST_CASE("custom_json_data: handling is_shape_tensor")
-{
-  // Test the case where is_shape_tensor is true and is the same
-  // across a batch: it only ends up in each batch once
-  PerfAnalyzerParameters params{};
-  params.user_data = {"fake_file.json"};
-  bool is_sequence_model{false};
-
-  std::vector<std::vector<int32_t>> expected_results;
-  std::vector<ModelTensor> tensors;
-  bool expect_init_failure = false;
-  bool expect_thread_failure = false;
-
-  ModelTensor model_tensor1{};
-  model_tensor1.datatype_ = "INT32";
-  model_tensor1.is_optional_ = false;
-  model_tensor1.is_shape_tensor_ = false;
-  model_tensor1.name_ = "INPUT1";
-  model_tensor1.shape_ = {1};
-
-  ModelTensor model_tensor2 = model_tensor1;
-  model_tensor2.name_ = "INPUT2";
-
-  std::string json_str{R"({
-   "data": [
-     { "INPUT1": [1], "INPUT2": [21] },
-     { "INPUT1": [1], "INPUT2": [22] },
-     { "INPUT1": [1], "INPUT2": [23] }
-   ]})"};
-
-  model_tensor1.is_shape_tensor_ = true;
-  model_tensor2.is_optional_ = true;
-
-  size_t num_requests = 4;
-  size_t num_threads = 1;
-
-  const auto& ParameterizeBatch{[&]() {
-    SUBCASE("batch 1")
-    {
-      params.batch_size = 1;
-      expected_results = {{1, 21}, {1, 22}, {1, 23}, {1, 21}};
-    }
-    SUBCASE("batch 2")
-    {
-      params.batch_size = 2;
-      expected_results = {{1, 21, 22}, {1, 23, 21}, {1, 22, 23}, {1, 21, 22}};
-    }
-    SUBCASE("batch 4")
-    {
-      params.batch_size = 4;
-      expected_results = {
-          {1, 21, 22, 23, 21},
-          {1, 22, 23, 21, 22},
-          {1, 23, 21, 22, 23},
-          {1, 21, 22, 23, 21}};
-    }
-  }};
-
-  const auto& ParameterizeNumThreads{[&]() {
-    SUBCASE("1 thread")
-    {
-      num_threads = 1;
-      ParameterizeBatch();
-    }
-    SUBCASE("2 threads")
-    {
-      num_threads = 2;
-      ParameterizeBatch();
-    }
-  }};
-
-  // Being optional should have no impact
-  SUBCASE("optional = 0,0")
-  {
-    model_tensor1.is_optional_ = false;
-    model_tensor2.is_optional_ = false;
-    ParameterizeNumThreads();
-  }
-  SUBCASE("optional = 0,1")
-  {
-    model_tensor1.is_optional_ = false;
-    model_tensor2.is_optional_ = true;
-    ParameterizeNumThreads();
-  }
-  SUBCASE("optional = 1,0")
-  {
-    model_tensor1.is_optional_ = true;
-    model_tensor2.is_optional_ = false;
-    ParameterizeNumThreads();
-  }
-  SUBCASE("optional = 1,1")
-  {
-    model_tensor1.is_optional_ = true;
-    model_tensor2.is_optional_ = true;
-    ParameterizeNumThreads();
-  }
-
-
-  TestRequestRateManager trrm(params, is_sequence_model);
-
-  tensors.push_back(model_tensor1);
-  tensors.push_back(model_tensor2);
-
-  trrm.TestCustomData(
-      num_requests, num_threads, tensors, json_str, expected_results,
-      expect_init_failure, expect_thread_failure);
-}
-
-TEST_CASE("custom_json_data: handling missing optional is_shape_tensor")
-{
-  // Test the case where is_shape_tensor is true and is_optional_ is true
-  // and data for that input is completely omitted
-  PerfAnalyzerParameters params{};
-  params.user_data = {"fake_file.json"};
-  bool is_sequence_model{false};
-
-  std::vector<std::vector<int32_t>> expected_results;
-  std::vector<ModelTensor> tensors;
-  bool expect_init_failure = false;
-  bool expect_thread_failure = false;
-
-  ModelTensor model_tensor1{};
-  model_tensor1.datatype_ = "INT32";
-  model_tensor1.is_optional_ = true;
-  model_tensor1.is_shape_tensor_ = true;
-  model_tensor1.name_ = "INPUT1";
-  model_tensor1.shape_ = {1};
-
-  ModelTensor model_tensor2 = model_tensor1;
-  model_tensor2.is_shape_tensor_ = false;
-  model_tensor2.is_optional_ = false;
-  model_tensor2.name_ = "INPUT2";
-
-  std::string json_str{R"({
-   "data": [
-     { "INPUT2": [21] },
-     { "INPUT2": [22] },
-     { "INPUT2": [23] }
-   ]})"};
-
-
-  size_t num_requests = 4;
-  size_t num_threads = 1;
-
-  const auto& ParameterizeBatch{[&]() {
-    SUBCASE("batch 1")
-    {
-      params.batch_size = 1;
-      expected_results = {{21}, {22}, {23}, {21}};
-    }
-    SUBCASE("batch 2")
-    {
-      params.batch_size = 2;
-      expected_results = {{21, 22}, {23, 21}, {22, 23}, {21, 22}};
-    }
-    SUBCASE("batch 4")
-    {
-      params.batch_size = 4;
-      expected_results = {
-          {21, 22, 23, 21},
-          {22, 23, 21, 22},
-          {23, 21, 22, 23},
-          {21, 22, 23, 21}};
-    }
-  }};
-
-  const auto& ParameterizeNumThreads{[&]() {
-    SUBCASE("1 thread")
-    {
-      num_threads = 1;
-      ParameterizeBatch();
-    }
-    SUBCASE("2 threads")
-    {
-      num_threads = 2;
-      ParameterizeBatch();
-    }
-  }};
-
-  SUBCASE("no shm")
-  {
-    params.shared_memory_type = SharedMemoryType::NO_SHARED_MEMORY;
-    ParameterizeNumThreads();
-  }
-  SUBCASE("system shm")
-  {
-    params.shared_memory_type = SharedMemoryType::SYSTEM_SHARED_MEMORY;
-    ParameterizeNumThreads();
-    expect_init_failure = true;
-  }
-  SUBCASE("cuda shm")
-  {
-    params.shared_memory_type = SharedMemoryType::CUDA_SHARED_MEMORY;
-    ParameterizeNumThreads();
-    expect_init_failure = true;
-  }
-
-  TestRequestRateManager trrm(params, is_sequence_model);
-
-  tensors.push_back(model_tensor1);
-  tensors.push_back(model_tensor2);
-
-  trrm.TestCustomData(
-      num_requests, num_threads, tensors, json_str, expected_results,
-      expect_init_failure, expect_thread_failure);
-}
-
-TEST_CASE("custom_json_data: handling invalid is_shape_tensor")
-{
-  PerfAnalyzerParameters params{};
-  params.user_data = {"fake_file.json"};
-  bool is_sequence_model{false};
-
-  std::vector<std::vector<int32_t>> expected_results;
-  std::vector<ModelTensor> tensors;
-  bool expect_init_failure = false;
-  bool expect_thread_failure = false;
-
-  ModelTensor model_tensor1{};
-  model_tensor1.datatype_ = "INT32";
-  model_tensor1.is_optional_ = true;
-  model_tensor1.is_shape_tensor_ = true;
-  model_tensor1.name_ = "INPUT1";
-  model_tensor1.shape_ = {1};
-
-  ModelTensor model_tensor2 = model_tensor1;
-  model_tensor2.name_ = "INPUT2";
-
-  size_t num_requests = 4;
-  size_t num_threads = 1;
-
-  std::string json_str;
-
-
-  const auto& ParameterizeJson{[&]() {
-    SUBCASE("different data")
-    {
-      json_str = R"({
-   "data": [
-     { "INPUT1": [1], "INPUT2": [21] },
-     { "INPUT1": [2], "INPUT2": [22] },
-     { "INPUT1": [3], "INPUT2": [23] }
-   ]})";
-      expected_results = {{1, 21}, {2, 22}, {3, 23}, {1, 21}};
-    }
-    SUBCASE("missing data")
-    {
-      json_str = R"({
-   "data": [
-     { "INPUT2": [21] },
-     { "INPUT2": [22] }
-   ]})";
-      expected_results = {{21}, {22}, {21}, {22}};
-    }
-  }};
-
-  const auto& ParameterizeNumThreads{[&]() {
-    SUBCASE("1 thread")
-    {
-      num_threads = 1;
-      ParameterizeJson();
-    }
-    SUBCASE("2 threads")
-    {
-      num_threads = 2;
-      ParameterizeJson();
-    }
-  }};
-
-  SUBCASE("no batching is ok")
-  {
-    params.batch_size = 1;
-    ParameterizeNumThreads();
-  }
-  SUBCASE("batching - no shm")
-  {
-    params.batch_size = 2;
-    params.shared_memory_type = SharedMemoryType::NO_SHARED_MEMORY;
-    expect_init_failure = true;
-    ParameterizeNumThreads();
-  }
-  SUBCASE("batching - shm")
-  {
-    params.batch_size = 2;
-    params.shared_memory_type = SharedMemoryType::SYSTEM_SHARED_MEMORY;
-    expect_init_failure = true;
-    ParameterizeNumThreads();
-  }
-
-  TestRequestRateManager trrm(params, is_sequence_model);
-
-  tensors.push_back(model_tensor1);
-  tensors.push_back(model_tensor2);
-
-  trrm.TestCustomData(
-      num_requests, num_threads, tensors, json_str, expected_results,
-      expect_init_failure, expect_thread_failure);
-}
-
-
-TEST_CASE("custom_json_data: handling of optional tensors")
-{
-  PerfAnalyzerParameters params{};
-  params.user_data = {"fake_file.json"};
-  bool is_sequence_model{false};
-
-  std::vector<std::vector<int32_t>> expected_results;
-  std::vector<ModelTensor> tensors;
-  bool expect_init_failure = false;
-  bool expect_thread_failure = false;
-
-  ModelTensor model_tensor1{};
-  model_tensor1.datatype_ = "INT32";
-  model_tensor1.is_optional_ = false;
-  model_tensor1.is_shape_tensor_ = false;
-  model_tensor1.name_ = "INPUT1";
-  model_tensor1.shape_ = {1};
-
-  ModelTensor model_tensor2 = model_tensor1;
-  model_tensor2.name_ = "INPUT2";
-
-  std::string json_str{R"({
-  "data": [
-    { "INPUT1": [1] },
-    { "INPUT1": [2], "INPUT2": [22] },
-    { "INPUT1": [3] }
-  ]})"};
-
-  size_t num_requests = 4;
-  size_t num_threads = 1;
-
-  const auto& ParameterizeNumThreads{[&]() {
-    SUBCASE("1 thread")
-    {
-      num_threads = 1;
-    }
-    SUBCASE("2 threads")
-    {
-      num_threads = 2;
-    }
-  }};
-
-  SUBCASE("normal")
-  {
-    model_tensor2.is_optional_ = true;
-    params.batch_size = 1;
-    expected_results = {{1}, {2, 22}, {3}, {1}};
-    ParameterizeNumThreads();
-  }
-  SUBCASE("tensor not optional -- expect parsing fail")
-  {
-    model_tensor2.is_optional_ = false;
-    expect_init_failure = true;
-    ParameterizeNumThreads();
-  }
-  SUBCASE("shared memory not supported")
-  {
-    model_tensor2.is_optional_ = true;
-    params.shared_memory_type = SharedMemoryType::SYSTEM_SHARED_MEMORY;
-    // FIXME: TMA-765 - Shared memory mode does not support optional inputs,
-    // currently, and will be implemented in the associated story.
-    expect_init_failure = true;
-    ParameterizeNumThreads();
-  }
-  SUBCASE("batching with mismatching data")
-  {
-    model_tensor2.is_optional_ = true;
-    params.batch_size = 2;
-    // For batch sizes larger than 1, the same set of inputs
-    // must be specified for each batch. You cannot use different
-    // set of optional inputs for each individual batch.
-    expect_init_failure = true;
-    ParameterizeNumThreads();
-  }
-
-  TestRequestRateManager trrm(params, is_sequence_model);
-
-  tensors.push_back(model_tensor1);
-  tensors.push_back(model_tensor2);
-
-  trrm.TestCustomData(
-      num_requests, num_threads, tensors, json_str, expected_results,
-      expect_init_failure, expect_thread_failure);
-}
-
-TEST_CASE("custom_json_data: multiple streams")
-{
-  PerfAnalyzerParameters params{};
-  params.user_data = {"fake_file.json"};
-  params.num_of_sequences = 1;
-  bool is_sequence_model{false};
-
-  std::vector<std::vector<int32_t>> expected_results;
-  std::vector<ModelTensor> tensors;
-  bool expect_init_failure = false;
-  bool expect_thread_failure = false;
-
-  ModelTensor model_tensor1{};
-  model_tensor1.datatype_ = "INT32";
-  model_tensor1.is_optional_ = false;
-  model_tensor1.is_shape_tensor_ = false;
-  model_tensor1.name_ = "INPUT1";
-  model_tensor1.shape_ = {1};
-
-  ModelTensor model_tensor2 = model_tensor1;
-  model_tensor2.name_ = "INPUT2";
-
-  std::string json_str{R"({
-  "data": [[
-    { "INPUT1": [1], "INPUT2": [21] },
-    { "INPUT1": [2], "INPUT2": [22] },
-    { "INPUT1": [3], "INPUT2": [23] }
-  ],[
-    { "INPUT1": [201], "INPUT2": [221] },
-    { "INPUT1": [202], "INPUT2": [222] }
-  ]]})"};
-
-  size_t num_requests = 10;
-  size_t num_threads = 1;
-
-  const auto& ParameterizeMemory{[&]() {
-    SUBCASE("No shared memory")
-    {
-      params.shared_memory_type = NO_SHARED_MEMORY;
-    }
-    SUBCASE("system shared memory")
-    {
-      params.shared_memory_type = SYSTEM_SHARED_MEMORY;
-    }
-    SUBCASE("cuda shared memory")
-    {
-      params.shared_memory_type = CUDA_SHARED_MEMORY;
-    }
-  }};
-
-  const auto& ParameterizeNumThreads{[&]() {
-    SUBCASE("1 thread")
-    {
-      num_threads = 1;
-      ParameterizeMemory();
-    }
-    SUBCASE("2 threads")
-    {
-      num_threads = 2;
-      ParameterizeMemory();
-    }
-  }};
-
-  SUBCASE("yes sequence")
-  {
-    // Sequences will randomly pick among all streams
-    // (Although this test is hardcoded to pick ID 1 twice, and then ID 0
-    // forever after)
-    is_sequence_model = true;
-    expected_results = {{201, 221}, {202, 222}, {201, 221}, {202, 222},
-                        {1, 21},    {2, 22},    {3, 23},    {1, 21},
-                        {2, 22},    {3, 23}};
-    ParameterizeNumThreads();
-  }
-  SUBCASE("no sequence")
-  {
-    // For the case of no sequences, only a single data stream is supported. The
-    // rest will be ignored
-    is_sequence_model = false;
-    expected_results = {{1, 21}, {2, 22}, {3, 23}, {1, 21}, {2, 22},
-                        {3, 23}, {1, 21}, {2, 22}, {3, 23}, {1, 21}};
-    ParameterizeNumThreads();
-  }
-
-  TestRequestRateManager trrm(params, is_sequence_model);
-
-  tensors.push_back(model_tensor1);
-  tensors.push_back(model_tensor2);
-
-  trrm.CustomDataTestSetup(tensors, json_str, expect_init_failure, num_threads);
-
-  if (is_sequence_model) {
-    // Force GetNewDataStreamId to return 1 twice and 0 every time after
-    EXPECT_CALL(
-        *std::dynamic_pointer_cast<MockSequenceManager>(trrm.sequence_manager_),
-        GetNewDataStreamId())
-        .WillOnce(testing::Return(1))
-        .WillOnce(testing::Return(1))
-        .WillRepeatedly(testing::Return(0));
-  } else {
-    // Expect that GetNewDataStreamId will never be called
-    EXPECT_CALL(
-        *std::dynamic_pointer_cast<MockSequenceManager>(trrm.sequence_manager_),
-        GetNewDataStreamId())
-        .Times(0);
-  }
-  auto thread_status =
-      trrm.CustomDataTestSendRequests(num_requests, num_threads);
-  trrm.CustomDataTestCheckResults(
-      thread_status, expect_thread_failure, expected_results);
-}
-
-/// Verify Shared Memory api calls
-///
-TEST_CASE("Request rate - Shared memory methods")
-{
-  PerfAnalyzerParameters params;
-  bool is_sequence = false;
-  bool is_decoupled = false;
-  bool use_mock_infer = true;
-
-  const std::string json_str{R"(
-  {
-    "data": [
-      {
-        "INPUT0": [2123456789]
-      }
-    ]
-  }
-      )"};
-
-
-  MockInputPipeline mip = TestLoadManagerBase::ProcessCustomJsonData(json_str);
-
-  cb::MockClientStats::SharedMemoryStats expected_stats;
-  SUBCASE("System shared memory usage")
-  {
-    params.shared_memory_type = SYSTEM_SHARED_MEMORY;
-    TestRequestRateManager trrm(
-        params, is_sequence, is_decoupled, use_mock_infer);
-
-    trrm.infer_data_manager_ =
-        MockInferDataManagerFactory::CreateMockInferDataManager(
-            params.max_threads, params.batch_size, params.shared_memory_type,
-            params.output_shm_size, params.request_parameters,
-            mip.mock_model_parser_, trrm.factory_, mip.mock_data_loader_);
-
-    trrm.parser_ = mip.mock_model_parser_;
-    trrm.data_loader_ = mip.mock_data_loader_;
-    trrm.InitManager(
-        params.string_length, params.string_data, params.zero_input,
-        params.user_data, params.start_sequence_id, params.sequence_id_range,
-        params.sequence_length, params.sequence_length_specified,
-        params.sequence_length_variation);
-
-    expected_stats.num_unregister_all_shared_memory_calls = 1;
-    expected_stats.num_register_system_shared_memory_calls = 1;
-    expected_stats.num_create_shared_memory_region_calls = 1;
-    expected_stats.num_map_shared_memory_calls = 1;
-    trrm.CheckSharedMemory(expected_stats);
-  }
-
-  SUBCASE("Cuda shared memory usage")
-  {
-    params.shared_memory_type = CUDA_SHARED_MEMORY;
-    TestRequestRateManager trrm(
-        params, is_sequence, is_decoupled, use_mock_infer);
-
-    trrm.infer_data_manager_ =
-        MockInferDataManagerFactory::CreateMockInferDataManager(
-            params.max_threads, params.batch_size, params.shared_memory_type,
-            params.output_shm_size, params.request_parameters,
-            mip.mock_model_parser_, trrm.factory_, mip.mock_data_loader_);
-
-    trrm.parser_ = mip.mock_model_parser_;
-    trrm.data_loader_ = mip.mock_data_loader_;
-    trrm.InitManager(
-        params.string_length, params.string_data, params.zero_input,
-        params.user_data, params.start_sequence_id, params.sequence_id_range,
-        params.sequence_length, params.sequence_length_specified,
-        params.sequence_length_variation);
-
-    expected_stats.num_unregister_all_shared_memory_calls = 1;
-    expected_stats.num_register_cuda_shared_memory_calls = 1;
-    trrm.CheckSharedMemory(expected_stats);
-  }
-
-  SUBCASE("No shared memory usage")
-  {
-    params.shared_memory_type = NO_SHARED_MEMORY;
-    TestRequestRateManager trrm(
-        params, is_sequence, is_decoupled, use_mock_infer);
-
-    trrm.infer_data_manager_ =
-        MockInferDataManagerFactory::CreateMockInferDataManager(
-            params.max_threads, params.batch_size, params.shared_memory_type,
-            params.output_shm_size, params.request_parameters,
-            mip.mock_model_parser_, trrm.factory_, mip.mock_data_loader_);
-
-    trrm.parser_ = mip.mock_model_parser_;
-    trrm.data_loader_ = mip.mock_data_loader_;
-    trrm.InitManager(
-        params.string_length, params.string_data, params.zero_input,
-        params.user_data, params.start_sequence_id, params.sequence_id_range,
-        params.sequence_length, params.sequence_length_specified,
-        params.sequence_length_variation);
-
-    trrm.CheckSharedMemory(expected_stats);
-  }
-}
-
-TEST_CASE("Request rate - Shared memory infer input calls")
-{
-  PerfAnalyzerParameters params{};
-  bool is_sequence_model{false};
-
-  const auto& ParameterizeAsyncAndStreaming{[&]() {
-    SUBCASE("sync non-streaming")
-    {
-      params.async = false;
-      params.streaming = false;
-    }
-    SUBCASE("async non-streaming")
-    {
-      params.async = true;
-      params.streaming = false;
-    }
-    SUBCASE("async streaming")
-    {
-      params.async = true;
-      params.streaming = true;
-    }
-  }};
-
-  const auto& ParameterizeSequence{[&]() {
-    SUBCASE("non-sequence")
-    {
-      is_sequence_model = false;
-      ParameterizeAsyncAndStreaming();
-    }
-    SUBCASE("sequence")
-    {
-      is_sequence_model = true;
-      params.num_of_sequences = 1;
-      ParameterizeAsyncAndStreaming();
-    }
-  }};
-
-  const auto& ParameterizeMemory{[&]() {
-    SUBCASE("No shared memory")
-    {
-      params.shared_memory_type = NO_SHARED_MEMORY;
-      ParameterizeSequence();
-    }
-    SUBCASE("system shared memory")
-    {
-      params.shared_memory_type = SYSTEM_SHARED_MEMORY;
-      ParameterizeSequence();
-    }
-    SUBCASE("cuda shared memory")
-    {
-      params.shared_memory_type = CUDA_SHARED_MEMORY;
-      ParameterizeSequence();
-    }
-  }};
-
-  ParameterizeMemory();
-  TestRequestRateManager trrm(params, is_sequence_model);
-
-  const std::string json_str{R"(
-  {
-    "data": [
-      {
-        "INPUT0": [2000000000]
-      },
-      {
-        "INPUT0": [2000000001]
-      }
-    ]
-  }
-      )"};
-  MockInputPipeline mip =
-      TestLoadManagerBase::ProcessCustomJsonData(json_str, is_sequence_model);
-
-  trrm.infer_data_manager_ =
-      MockInferDataManagerFactory::CreateMockInferDataManager(
-          params.max_threads, params.batch_size, params.shared_memory_type,
-          params.output_shm_size, params.request_parameters,
-          mip.mock_model_parser_, trrm.factory_, mip.mock_data_loader_);
-
-  std::shared_ptr<ThreadStat> thread_stat{std::make_shared<ThreadStat>()};
-  std::shared_ptr<ThreadConfig> thread_config{
-      std::make_shared<ThreadConfig>(0)};
-
-  trrm.parser_ = mip.mock_model_parser_;
-  trrm.data_loader_ = mip.mock_data_loader_;
-  trrm.using_json_data_ = true;
-  trrm.execute_ = true;
-  trrm.batch_size_ = 1;
-  trrm.max_threads_ = 1;
-
-  RateSchedulePtr_t schedule = std::make_shared<RateSchedule>();
-  schedule->intervals = NanoIntervals{
-      milliseconds(4), milliseconds(8), milliseconds(12), milliseconds(16)};
-  schedule->duration = nanoseconds{16000000};
-
-  trrm.InitManager(
-      params.string_length, params.string_data, params.zero_input,
-      params.user_data, params.start_sequence_id, params.sequence_id_range,
-      params.sequence_length, params.sequence_length_specified,
-      params.sequence_length_variation);
-
-  trrm.start_time_ = std::chrono::steady_clock::now();
-
-  std::shared_ptr<IWorker> worker{trrm.MakeWorker(thread_stat, thread_config)};
-  std::dynamic_pointer_cast<IScheduler>(worker)->SetSchedule(schedule);
-  std::future<void> infer_future{std::async(&IWorker::Infer, worker)};
-
-  std::this_thread::sleep_for(milliseconds(18));
-
-  early_exit = true;
-  infer_future.get();
-
-  const auto& actual_append_raw_calls{trrm.stats_->num_append_raw_calls};
-  const auto& actual_set_shared_memory_calls{
-      trrm.stats_->num_set_shared_memory_calls};
-
-  if (params.shared_memory_type == NO_SHARED_MEMORY) {
-    CHECK(actual_append_raw_calls > 0);
-    CHECK(actual_set_shared_memory_calls == 0);
-  } else {
-    CHECK(actual_append_raw_calls == 0);
-    CHECK(actual_set_shared_memory_calls > 0);
-  }
-}
-
-TEST_CASE("request_rate_deadlock")
-{
-  PerfAnalyzerParameters params{};
-  params.max_concurrency = 6;
-  bool is_sequence_model{true};
-  bool some_infer_failures{false};
-
-  const auto& ParameterizeSync{[&]() {
-    SUBCASE("sync")
-    {
-      params.async = false;
-      params.streaming = false;
-    }
-    SUBCASE("aync no streaming")
-    {
-      params.async = true;
-      params.streaming = false;
-    }
-    SUBCASE("async streaming")
-    {
-      params.async = true;
-      params.streaming = true;
-    }
-  }};
-
-  const auto& ParameterizeThreads{[&]() {
-    SUBCASE("2 thread")
-    {
-      ParameterizeSync();
-      params.max_threads = 2;
-    }
-    SUBCASE("10 thread")
-    {
-      ParameterizeSync();
-      params.max_threads = 10;
-    }
-  }};
-
-  const auto& ParameterizeSequence{[&]() {
-    SUBCASE("non-sequence")
-    {
-      ParameterizeThreads();
-      is_sequence_model = false;
-    }
-    SUBCASE("sequence")
-    {
-      ParameterizeThreads();
-      is_sequence_model = true;
-      params.num_of_sequences = 3;
-    }
-  }};
-
-  const auto& ParameterizeFailures{[&]() {
-    SUBCASE("yes_failures")
-    {
-      some_infer_failures = true;
-      ParameterizeSequence();
-    }
-    SUBCASE("no_failures")
-    {
-      some_infer_failures = false;
-      ParameterizeSequence();
-    }
-  }};
-
-  std::vector<uint64_t> delays;
-
-  const auto& ParameterizeDelays{[&]() {
-    SUBCASE("no_delay")
-    {
-      delays = {0};
-      ParameterizeFailures();
-    }
-    SUBCASE("random_delay")
-    {
-      delays = {1, 5, 20, 4, 3};
-      ParameterizeFailures();
-    }
-  }};
-
-  ParameterizeDelays();
-
-  TestRequestRateManager trrm(params, is_sequence_model);
-  trrm.stats_->SetDelays(delays);
-  trrm.InitManager(
-      params.string_length, params.string_data, params.zero_input,
-      params.user_data, params.start_sequence_id, params.sequence_id_range,
-      params.sequence_length, params.sequence_length_specified,
-      params.sequence_length_variation);
-
-  // Sometimes have a request fail
-  if (some_infer_failures) {
-    trrm.stats_->SetReturnStatuses({true, true, true, false});
-  }
-
-  trrm.TestTimeouts();
-}
-
-TEST_CASE("request_rate_overhead")
-{
-  uint rate;
-  PerfAnalyzerParameters params{};
-  SUBCASE("sync, rate 10")
-  {
-    params.async = false;
-    rate = 10;
-  }
-  SUBCASE("sync, rate 100")
-  {
-    params.async = false;
-    rate = 100;
-  }
-  SUBCASE("async, rate 10")
-  {
-    params.async = true;
-    rate = 10;
-  }
-  SUBCASE("async, rate 100")
-  {
-    params.async = true;
-    rate = 100;
-  }
-  TestRequestRateManager trrm(params, false);
-  trrm.InitManager(
-      params.string_length, params.string_data, params.zero_input,
-      params.user_data, params.start_sequence_id, params.sequence_id_range,
-      params.sequence_length, params.sequence_length_specified,
-      params.sequence_length_variation);
-
-  trrm.TestOverhead(rate);
-}
-
-std::chrono::steady_clock::time_point mk_start{};
-
-TEST_CASE(
-    "send_request_rate_request_rate_manager: testing logic around detecting "
-    "send request count")
-{
-  PerfAnalyzerParameters params{};
-
-  std::vector<uint64_t> delays;
-  bool is_sequence_model = false;
-  size_t rate = 1000;
-  size_t time_ms = 50;
-  size_t expected_count = time_ms;
-
-  SUBCASE("sync")
-  {
-    params.async = false;
-    delays = {0};
-  }
-  SUBCASE("async - fast response")
-  {
-    params.async = true;
-    delays = {0};
-  }
-  SUBCASE(
-      "async - slow response with sequences off should not slow down our send "
-      "rate")
-  {
-    params.async = true;
-    delays = {100};
-  }
-  SUBCASE("async - slow response with sequences on")
-  {
-    is_sequence_model = true;
-    params.async = true;
-    params.num_of_sequences = 5;
-    delays = {100};
-
-    SUBCASE("send rate can be limited if serial sequences is on")
-    {
-      params.serial_sequences = true;
-      expected_count = params.num_of_sequences;
-    }
-    SUBCASE(
-        "send rate will not be affected by response time if serial sequences "
-        "is off")
-    {
-      params.serial_sequences = false;
-    }
-  }
-
-  TestRequestRateManager trrm(params, is_sequence_model);
-
-  trrm.stats_->SetDelays(delays);
-
-  trrm.InitManager(
-      params.string_length, params.string_data, params.zero_input,
-      params.user_data, params.start_sequence_id, params.sequence_id_range,
-      params.sequence_length, params.sequence_length_specified,
-      params.sequence_length_variation);
-
-  trrm.ChangeRequestRate(rate);
-  std::this_thread::sleep_for(std::chrono::milliseconds(time_ms));
-  const size_t num_sent_requests{trrm.GetAndResetNumSentRequests()};
-  CHECK(num_sent_requests == doctest::Approx(expected_count).epsilon(0.1));
-
-  trrm.StopWorkerThreads();
-}
-
-TEST_CASE("request rate manager - Configure threads")
-{
-  PerfAnalyzerParameters params{};
-  std::vector<ThreadConfig> expected_config_values;
-  std::vector<size_t> expected_number_of_sequences_owned_by_thread;
-  std::vector<size_t> expected_seq_stat_index_offsets;
-  std::vector<size_t> expected_num_requests;
-  bool is_sequence_model = true;
-  bool is_decoupled_model = false;
-  bool use_mock_infer = true;
-  size_t target_num_requests = 0;
-
-  SUBCASE("normal")
-  {
-    params.max_threads = 4;
-    params.num_of_sequences = 4;
-    target_num_requests = 0;
-
-    expected_number_of_sequences_owned_by_thread = {1, 1, 1, 1};
-    expected_seq_stat_index_offsets = {0, 1, 2, 3};
-    expected_num_requests = {0, 0, 0, 0};
-  }
-
-  SUBCASE("max_threads > num_seqs")
-  {
-    params.max_threads = 10;
-    params.num_of_sequences = 4;
-    target_num_requests = 8;
-
-    expected_number_of_sequences_owned_by_thread = {1, 1, 1, 1};
-    expected_seq_stat_index_offsets = {0, 1, 2, 3};
-    expected_num_requests = {2, 2, 2, 2};
-  }
-
-  SUBCASE("num_seqs > max_threads")
-  {
-    params.max_threads = 4;
-    params.num_of_sequences = 10;
-    target_num_requests = 20;
-
-    expected_number_of_sequences_owned_by_thread = {3, 3, 2, 2};
-    expected_seq_stat_index_offsets = {0, 3, 6, 8};
-    expected_num_requests = {5, 5, 5, 5};
-  }
-
-  SUBCASE("not divisible")
-  {
-    params.max_threads = 4;
-    params.num_of_sequences = 7;
-    target_num_requests = 13;
-
-    expected_number_of_sequences_owned_by_thread = {2, 2, 2, 1};
-    expected_seq_stat_index_offsets = {0, 2, 4, 6};
-    expected_num_requests = {4, 3, 3, 3};
-  }
-
-  for (auto i = 0; i < expected_number_of_sequences_owned_by_thread.size();
-       i++) {
-    ThreadConfig tc(i);
-    tc.num_sequences_ = expected_number_of_sequences_owned_by_thread[i];
-    tc.seq_stat_index_offset_ = expected_seq_stat_index_offsets[i];
-    tc.num_requests_ = expected_num_requests[i];
-
-    expected_config_values.push_back(tc);
-  }
-  TestRequestRateManager trrm(
-      params, is_sequence_model, is_decoupled_model, use_mock_infer);
-  trrm.TestConfigureThreads(expected_config_values, target_num_requests);
-}
-
-TEST_CASE("request rate manager - Calculate thread ids")
-{
-  PerfAnalyzerParameters params{};
-  bool is_sequence_model;
-  bool is_decoupled_model = false;
-  bool use_mock_infer = true;
-  std::vector<size_t> expected_thread_ids;
-
-  SUBCASE("normal, on sequence model")
-  {
-    is_sequence_model = true;
-    params.max_threads = 4;
-    params.num_of_sequences = 4;
-    expected_thread_ids = {0, 1, 2, 3};
-  }
-  SUBCASE("normal, not sequence model")
-  {
-    is_sequence_model = false;
-    params.max_threads = 4;
-    params.num_of_sequences = 4;
-    expected_thread_ids = {0, 1, 2, 3};
-  }
-  SUBCASE("num_seq > max_threads, on sequence model")
-  {
-    is_sequence_model = true;
-    params.max_threads = 4;
-    params.num_of_sequences = 5;
-    expected_thread_ids = {0, 1, 2, 3, 0};
-  }
-  SUBCASE("num_seq > max_threads, not sequence model")
-  {
-    is_sequence_model = false;
-    params.max_threads = 4;
-    params.num_of_sequences = 5;
-    expected_thread_ids = {0, 1, 2, 3};
-  }
-  SUBCASE("max_threads > num_seq, on sequence model")
-  {
-    is_sequence_model = true;
-    params.max_threads = 5;
-    params.num_of_sequences = 4;
-    expected_thread_ids = {0, 1, 2, 3};
-  }
-  SUBCASE("max_threads > num_seq, not sequence model")
-  {
-    is_sequence_model = false;
-    params.max_threads = 5;
-    params.num_of_sequences = 4;
-    expected_thread_ids = {0, 1, 2, 3, 4};
-  }
-  SUBCASE("large example")
-  {
-    is_sequence_model = true;
-    params.max_threads = 4;
-    params.num_of_sequences = 7;
-    expected_thread_ids = {0, 1, 2, 3, 0, 1, 2};
-  }
-
-  TestRequestRateManager trrm(
-      params, is_sequence_model, is_decoupled_model, use_mock_infer);
-  trrm.TestCalculateThreadIds(expected_thread_ids);
-}
-
-TEST_CASE("request rate create schedule")
-{
-  PerfAnalyzerParameters params;
-  params.measurement_window_ms = 1000;
-  params.max_trials = 10;
-  bool is_sequence_model = false;
-  bool is_decoupled = false;
-  bool use_mock_infer = false;
-  double rate = 10;
-  std::vector<uint32_t> expected_worker_ratio;
-
-  SUBCASE("num_seq > max_threads, on sequence model, CONSTANT")
-  {
-    is_sequence_model = true;
-    params.max_threads = 4;
-    params.num_of_sequences = 5;
-    expected_worker_ratio = {2, 1, 1, 1};
-  }
-
-  SUBCASE("num_seq = 7, max_threads = 4, on sequence model, CONSTANT")
-  {
-    is_sequence_model = true;
-    params.max_threads = 4;
-    params.num_of_sequences = 7;
-    expected_worker_ratio = {2, 2, 2, 1};
-  }
-
-  SUBCASE("num_seq = 4, max_threads = 2, on sequence model, CONSTANT")
-  {
-    is_sequence_model = true;
-    params.max_threads = 2;
-    params.num_of_sequences = 4;
-    expected_worker_ratio = {1, 1};
-  }
-
-  SUBCASE("num_seq > max_threads, on sequence model, POISSON")
-  {
-    is_sequence_model = true;
-    params.max_threads = 4;
-    params.num_of_sequences = 5;
-    expected_worker_ratio = {2, 1, 1, 1};
-    params.request_distribution = POISSON;
-  }
-
-  TestRequestRateManager trrm(
-      params, is_sequence_model, is_decoupled, use_mock_infer);
-
-  trrm.InitManager(
-      params.string_length, params.string_data, params.zero_input,
-      params.user_data, params.start_sequence_id, params.sequence_id_range,
-      params.sequence_length, params.sequence_length_specified,
-      params.sequence_length_variation);
-  trrm.TestCreateSchedule(rate, params, expected_worker_ratio);
-}
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/test_sequence_manager.cc b/src/c++/perf_analyzer/test_sequence_manager.cc
deleted file mode 100644
index 243500b85..000000000
--- a/src/c++/perf_analyzer/test_sequence_manager.cc
+++ /dev/null
@@ -1,298 +0,0 @@
-// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "doctest.h"
-#include "mock_data_loader.h"
-#include "mock_sequence_manager.h"
-#include "sequence_manager.h"
-
-namespace triton { namespace perfanalyzer {
-
-TEST_CASE("get_sequence_id: testing the GetSequenceID function")
-{
-  MockSequenceManager msm{};
-
-  std::shared_ptr<SequenceStatus> sequence_status{
-      std::make_shared<SequenceStatus>(5)};
-
-  msm.sequence_statuses_.push_back(sequence_status);
-
-  CHECK(msm.GetSequenceID(0) == 5);
-}
-
-TEST_CASE(
-    "test_set_infer_sequence_options: testing the SetInferSequenceOptions "
-    "function")
-{
-  const uint64_t seq_id{5};
-  std::vector<std::shared_ptr<SequenceStatus>> sequence_statuses{
-      std::make_shared<SequenceStatus>(seq_id)};
-  std::uniform_int_distribution<uint64_t> distribution(0, 0);
-  const uint64_t start_sequence_id{1};
-  const uint64_t sequence_id_range{UINT32_MAX};
-  const size_t sequence_length{20};
-  const bool sequence_length_specified{false};
-  const double sequence_length_variation{0.0};
-  bool using_json_data{false};
-  std::shared_ptr<MockDataLoader> data_loader{
-      std::make_shared<MockDataLoader>()};
-  const uint32_t seq_stat_index{0};
-  const std::string model_name{"model"};
-  std::unique_ptr<cb::InferOptions> options{
-      std::make_unique<cb::InferOptions>(model_name)};
-
-  SUBCASE("start false, end false")
-  {
-    sequence_statuses[seq_stat_index]->remaining_queries_ = 2;
-
-    MockSequenceManager msm(
-        start_sequence_id, sequence_id_range, sequence_length,
-        sequence_length_specified, sequence_length_variation, using_json_data,
-        data_loader);
-    msm.sequence_statuses_ = sequence_statuses;
-    msm.curr_seq_id_ = 5;
-
-    msm.SetInferSequenceOptions(seq_stat_index, options);
-
-    CHECK(options->sequence_start_ == false);
-    CHECK(options->sequence_id_ == 5);
-    CHECK(options->sequence_end_ == false);
-  }
-  SUBCASE("start true, end false")
-  {
-    sequence_statuses[seq_stat_index]->remaining_queries_ = 0;
-
-    MockSequenceManager msm(
-        start_sequence_id, sequence_id_range, sequence_length,
-        sequence_length_specified, sequence_length_variation, using_json_data,
-        data_loader);
-    msm.sequence_statuses_ = sequence_statuses;
-    msm.curr_seq_id_ = 5;
-
-    msm.SetInferSequenceOptions(seq_stat_index, options);
-
-    CHECK(options->sequence_start_ == true);
-    CHECK(options->sequence_id_ == 6);
-    CHECK(options->sequence_end_ == false);
-  }
-  SUBCASE("start false, end true")
-  {
-    sequence_statuses[seq_stat_index]->remaining_queries_ = 1;
-
-    MockSequenceManager msm(
-        start_sequence_id, sequence_id_range, sequence_length,
-        sequence_length_specified, sequence_length_variation, using_json_data,
-        data_loader);
-    msm.sequence_statuses_ = sequence_statuses;
-    msm.curr_seq_id_ = 5;
-
-    msm.SetInferSequenceOptions(seq_stat_index, options);
-
-    CHECK(options->sequence_start_ == false);
-    CHECK(options->sequence_id_ == 5);
-    CHECK(options->sequence_end_ == true);
-  }
-  SUBCASE("start true, end true")
-  {
-    sequence_statuses[seq_stat_index]->remaining_queries_ = 0;
-    using_json_data = true;
-    data_loader->step_num_.push_back(1);
-    data_loader->data_stream_cnt_ = 1;
-
-    MockSequenceManager msm(
-        start_sequence_id, sequence_id_range, sequence_length,
-        sequence_length_specified, sequence_length_variation, using_json_data,
-        data_loader);
-    msm.sequence_statuses_ = sequence_statuses;
-    msm.curr_seq_id_ = 5;
-
-    msm.SetInferSequenceOptions(seq_stat_index, options);
-
-    CHECK(options->sequence_start_ == true);
-    CHECK(options->sequence_id_ == 6);
-    CHECK(options->sequence_end_ == true);
-  }
-}
-
-TEST_CASE("init_new_sequence: testing the InitNewSequence function")
-{
-  const uint64_t seq_id{5};
-  std::vector<std::shared_ptr<SequenceStatus>> sequence_statuses{
-      std::make_shared<SequenceStatus>(seq_id)};
-  std::uniform_int_distribution<uint64_t> distribution(0, 0);
-  const uint64_t start_sequence_id{1};
-  const uint64_t sequence_id_range{UINT32_MAX};
-  size_t sequence_length{20};
-  bool sequence_length_specified{false};
-  const double sequence_length_variation{0.0};
-  bool using_json_data{false};
-  std::shared_ptr<MockDataLoader> data_loader{
-      std::make_shared<MockDataLoader>()};
-  int seq_stat_index{0};
-  size_t expected_sequence_length{0};
-
-  SUBCASE("not using json data")
-  {
-    MockSequenceManager msm(
-        start_sequence_id, sequence_id_range, sequence_length,
-        sequence_length_specified, sequence_length_variation, using_json_data,
-        data_loader);
-    msm.sequence_statuses_ = sequence_statuses;
-    msm.curr_seq_id_ = 5;
-
-    msm.InitNewSequence(seq_stat_index);
-
-    CHECK(msm.sequence_statuses_[seq_stat_index]->seq_id_ == 6);
-    CHECK(msm.sequence_statuses_[seq_stat_index]->remaining_queries_ > 0);
-  }
-
-  SUBCASE("using json data")
-  {
-    using_json_data = true;
-    data_loader->step_num_.push_back(5);
-    data_loader->data_stream_cnt_ = 1;
-
-    SUBCASE("sequence length not specified")
-    {
-      sequence_length_specified = false;
-      expected_sequence_length = 5;
-    }
-
-    SUBCASE("sequence length specified, smaller than input data")
-    {
-      sequence_length_specified = true;
-      sequence_length = 4;
-      expected_sequence_length = 4;
-    }
-
-    SUBCASE("sequence length specified, larger than input data")
-    {
-      sequence_length_specified = true;
-      sequence_length = 6;
-      expected_sequence_length = 6;
-    }
-
-    MockSequenceManager msm(
-        start_sequence_id, sequence_id_range, sequence_length,
-        sequence_length_specified, sequence_length_variation, using_json_data,
-        data_loader);
-    msm.sequence_statuses_ = sequence_statuses;
-    msm.curr_seq_id_ = 5;
-
-    msm.InitNewSequence(seq_stat_index);
-
-    CHECK(msm.sequence_statuses_[seq_stat_index]->seq_id_ == 6);
-    CHECK(
-        msm.sequence_statuses_[seq_stat_index]->remaining_queries_ ==
-        expected_sequence_length);
-    CHECK(
-        msm.sequence_statuses_[seq_stat_index]->sequence_length_ ==
-        expected_sequence_length);
-  }
-}
-
-TEST_CASE("get_next_seq_id: testing the GetNextSeqId function")
-{
-  std::vector<std::shared_ptr<SequenceStatus>> sequence_statuses{};
-  std::uniform_int_distribution<uint64_t> distribution(0, 0);
-  uint64_t start_sequence_id{0};
-  uint64_t sequence_id_range{0};
-  const size_t sequence_length{20};
-  const bool sequence_length_specified{false};
-  const double sequence_length_variation{0.0};
-  const bool using_json_data{false};
-  std::shared_ptr<MockDataLoader> data_loader{
-      std::make_shared<MockDataLoader>()};
-  int seq_stat_index{0};
-
-  SUBCASE("next sequence id not in use")
-  {
-    sequence_statuses.push_back(std::make_shared<SequenceStatus>(1));
-    start_sequence_id = 1;
-    sequence_id_range = 2;
-
-    MockSequenceManager msm(
-        start_sequence_id, sequence_id_range, sequence_length,
-        sequence_length_specified, sequence_length_variation, using_json_data,
-        data_loader);
-    msm.sequence_statuses_ = sequence_statuses;
-    msm.curr_seq_id_ = 3;
-
-    uint64_t result{msm.GetNextSeqId(seq_stat_index)};
-
-    CHECK(result == 2);
-  }
-
-  SUBCASE("next sequence id in use")
-  {
-    sequence_statuses.push_back(std::make_shared<SequenceStatus>(1));
-    sequence_statuses.push_back(std::make_shared<SequenceStatus>(2));
-    start_sequence_id = 1;
-    sequence_id_range = 2;
-
-    MockSequenceManager msm(
-        start_sequence_id, sequence_id_range, sequence_length,
-        sequence_length_specified, sequence_length_variation, using_json_data,
-        data_loader);
-    msm.sequence_statuses_ = sequence_statuses;
-    msm.curr_seq_id_ = 3;
-
-    uint64_t result{msm.GetNextSeqId(seq_stat_index)};
-
-    CHECK(result == 1);
-  }
-}
-
-TEST_CASE(
-    "get_random_sequence_length: testing the GetRandomSequenceLength function")
-{
-  std::vector<std::shared_ptr<SequenceStatus>> sequence_statuses{};
-  std::uniform_int_distribution<uint64_t> distribution(0, 0);
-  const uint64_t start_sequence_id{0};
-  const uint64_t sequence_id_range{0};
-  size_t sequence_length{20};
-  const bool sequence_length_specified{false};
-  const double sequence_length_variation{0.0};
-  const bool using_json_data{false};
-  std::shared_ptr<MockDataLoader> data_loader{
-      std::make_shared<MockDataLoader>()};
-  int seq_stat_index{0};
-  double offset_ratio{0.2};
-
-  MockSequenceManager msm(
-      start_sequence_id, sequence_id_range, sequence_length,
-      sequence_length_specified, sequence_length_variation, using_json_data,
-      data_loader);
-  msm.sequence_statuses_ = sequence_statuses;
-  msm.curr_seq_id_ = 3;
-
-  uint64_t result{msm.GetRandomSequenceLength(offset_ratio)};
-
-  CHECK(result >= 16);
-  CHECK(result <= 24);
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/test_utils.h b/src/c++/perf_analyzer/test_utils.h
deleted file mode 100644
index 168aba71a..000000000
--- a/src/c++/perf_analyzer/test_utils.h
+++ /dev/null
@@ -1,112 +0,0 @@
-// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#pragma once
-
-#include <atomic>
-#include <cmath>
-#include <cstdint>
-#include <numeric>
-#include <thread>
-#include <vector>
-
-namespace triton { namespace perfanalyzer {
-
-/// This class will create a thread that will raise an error after a fixed
-/// amount of time, unless the stop function is called.
-///
-/// It can be used to detect livelock/deadlock cases in tests so that the test
-/// will be guaranteed to finish instead of hang
-///
-class TestWatchDog {
- public:
-  /// Create the watchdog
-  ///
-  /// @param max_time_ms How long (in milliseconds) until this watchdog will
-  /// raise an error
-  TestWatchDog(unsigned int max_time_ms) { start(max_time_ms); }
-
-  /// Stop the watchdog so that it will not raise any errors
-  ///
-  void stop()
-  {
-    running_ = false;
-    thread_.join();
-  }
-
- private:
-  uint sleep_interval_ms{40};
-  uint max_time_ms_;
-  std::atomic<unsigned int> timer_;
-  std::atomic<bool> running_;
-  std::thread thread_;
-
-  void start(unsigned int max_time_ms)
-  {
-    max_time_ms_ = max_time_ms;
-    timer_ = 0;
-    running_ = true;
-    thread_ = std::thread(&TestWatchDog::loop, this);
-  }
-
-  void loop()
-  {
-    while (running_) {
-      if (timer_ >= max_time_ms_) {
-        running_ = false;
-        REQUIRE_MESSAGE(false, "WATCHDOG TIMEOUT!");
-      }
-
-      std::this_thread::sleep_for(std::chrono::milliseconds(sleep_interval_ms));
-      timer_ += sleep_interval_ms;
-    }
-  }
-};
-
-/// Calculate the average of a vector of integers
-///
-static double
-CalculateAverage(const std::vector<int64_t>& values)
-{
-  double avg =
-      std::accumulate(values.begin(), values.end(), 0.0) / values.size();
-  return avg;
-}
-
-/// Calculate the variance of a vector of integers
-///
-static double
-CalculateVariance(const std::vector<int64_t>& values, double average)
-{
-  double tmp = 0;
-  for (auto value : values) {
-    tmp += (value - average) * (value - average) / values.size();
-  }
-  double variance = std::sqrt(tmp);
-  return variance;
-}
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/thread_config.h b/src/c++/perf_analyzer/thread_config.h
deleted file mode 100644
index 4c4845a6e..000000000
--- a/src/c++/perf_analyzer/thread_config.h
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-namespace triton { namespace perfanalyzer {
-
-// Holds the configuration for a worker thread
-struct ThreadConfig {
-  ThreadConfig(size_t thread_id) : thread_id_(thread_id) {}
-
-  // ID of corresponding worker thread
-  size_t thread_id_{0};
-
-  // The concurrency level that the worker should produce
-  // TPA-69: This is only used in concurrency mode and shouldn't be visible in
-  // other modes
-  size_t concurrency_{0};
-
-  // The number of sequences owned by this worker
-  // TPA-69: This is only used in request-rate mode and shouldn't be visible in
-  // other modes
-  uint32_t num_sequences_{1};
-
-  // How many requests to generate before stopping. If 0, generate indefinitely
-  size_t num_requests_{0};
-
-  // The starting sequence stat index for this worker
-  size_t seq_stat_index_offset_{0};
-
-  // Whether or not the thread is issuing new inference requests
-  bool is_paused_{false};
-};
-
-
-}}  // namespace triton::perfanalyzer
diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt
index ecc33b84d..67975dd68 100644
--- a/src/python/CMakeLists.txt
+++ b/src/python/CMakeLists.txt
@@ -34,7 +34,6 @@ project(python-clients LANGUAGES C CXX)
 set(TRITON_VERSION "0.0.0" CACHE STRING "Version for the clients")
 option(TRITON_ENABLE_PYTHON_HTTP "Enable Python HTTP client libraries" OFF)
 option(TRITON_ENABLE_PYTHON_GRPC "Enable Python GRPC client libraries" OFF)
-option(TRITON_ENABLE_PERF_ANALYZER "Enable Performance Analyzer" OFF)
 option(TRITON_ENABLE_EXAMPLES "Include examples in build" OFF)
 option(TRITON_ENABLE_TESTS "Include tests in build" OFF)
 option(TRITON_ENABLE_GPU "Enable GPU support in libraries" OFF)