diff --git a/src/c++/library/common.h b/src/c++/library/common.h index ba98d82ca..9cf99c478 100644 --- a/src/c++/library/common.h +++ b/src/c++/library/common.h @@ -38,6 +38,7 @@ #include #include #include +#include #include #ifdef TRITON_INFERENCE_SERVER_CLIENT_CLASS @@ -153,6 +154,12 @@ class InferenceServerClient { InferStat infer_stat_; }; +struct RequestParameter { + std::string name; + std::string value; + std::string type; +}; + //============================================================================== /// Structure to hold options for Inference Request. /// @@ -221,6 +228,8 @@ struct InferOptions { uint64_t client_timeout_; /// Whether to tell Triton to enable an empty final response. bool triton_enable_empty_final_response_; + /// Additional parameters to pass to the model + std::unordered_map request_parameters; }; //============================================================================== diff --git a/src/c++/library/grpc_client.cc b/src/c++/library/grpc_client.cc index 537608fba..cc3a9a85f 100644 --- a/src/c++/library/grpc_client.cc +++ b/src/c++/library/grpc_client.cc @@ -35,6 +35,7 @@ #include #include #include +#include #include "common.h" @@ -1408,6 +1409,23 @@ InferenceServerGrpcClient::PreRunProcessing( options.server_timeout_); } + + for (auto& param : options.request_parameters) { + if (param.second.type == "string") { + (*infer_request_.mutable_parameters())[param.first].set_string_param( + param.second.value); + } else if (param.second.type == "int") { + (*infer_request_.mutable_parameters())[param.first].set_int64_param( + std::stoi(param.second.value)); + } else if (param.second.type == "bool") { + bool val = false; + if (param.second.value == "true") { + val = true; + } + (*infer_request_.mutable_parameters())[param.first].set_bool_param(val); + } + } + int index = 0; infer_request_.mutable_raw_input_contents()->Clear(); for (const auto input : inputs) { diff --git a/src/c++/perf_analyzer/client_backend/client_backend.h b/src/c++/perf_analyzer/client_backend/client_backend.h index dd632e67e..870ea3dd5 100644 --- a/src/c++/perf_analyzer/client_backend/client_backend.h +++ b/src/c++/perf_analyzer/client_backend/client_backend.h @@ -192,6 +192,15 @@ struct ModelStatistics { uint64_t cache_miss_time_ns_; }; +/// +/// Structure to hold Request parameter data for Inference Request. +/// +struct RequestParameter { + std::string name; + std::string value; + std::string type; +}; + //============================================================================== /// Structure to hold options for Inference Request. /// @@ -230,6 +239,9 @@ struct InferOptions { bool sequence_end_; /// Whether to tell Triton to enable an empty final response. bool triton_enable_empty_final_response_; + + /// Additional parameters to pass to the model + std::unordered_map request_parameters_; }; struct SslOptionsBase { diff --git a/src/c++/perf_analyzer/client_backend/triton/triton_client_backend.cc b/src/c++/perf_analyzer/client_backend/triton/triton_client_backend.cc index 1be578a95..70de5f52b 100644 --- a/src/c++/perf_analyzer/client_backend/triton/triton_client_backend.cc +++ b/src/c++/perf_analyzer/client_backend/triton/triton_client_backend.cc @@ -590,6 +590,14 @@ TritonClientBackend::ParseInferOptionsToTriton( } triton_options->triton_enable_empty_final_response_ = options.triton_enable_empty_final_response_; + + for (auto& map_entry : options.request_parameters_) { + auto rp = tc::RequestParameter(); + rp.name = map_entry.second.name; + rp.value = map_entry.second.value; + rp.type = map_entry.second.type; + triton_options->request_parameters[map_entry.first] = rp; + } } diff --git a/src/c++/perf_analyzer/command_line_parser.cc b/src/c++/perf_analyzer/command_line_parser.cc index 5a73d5927..b6b2194d6 100644 --- a/src/c++/perf_analyzer/command_line_parser.cc +++ b/src/c++/perf_analyzer/command_line_parser.cc @@ -1601,21 +1601,10 @@ CLParser::ParseCommandLine(int argc, char** argv) std::string value{values[1]}; std::string type{values[2]}; - RequestParameter param; - if (type == "bool") { - param.type = RequestParameterType::BOOL; - param.bool_value = value == "true" ? true : false; - } else if (type == "int") { - param.type = RequestParameterType::INT; - param.int_value = std::stoll(value); - } else if (type == "string") { - param.type = RequestParameterType::STRING; - param.str_value = value; - } else { - Usage( - "Failed to parse --request-parameter. Unsupported type: '" + - type + "'."); - } + cb::RequestParameter param; + param.name = name; + param.value = value; + param.type = type; params_->request_parameters[name] = param; break; } diff --git a/src/c++/perf_analyzer/command_line_parser.h b/src/c++/perf_analyzer/command_line_parser.h index 518e7b2cf..9ff4869ff 100644 --- a/src/c++/perf_analyzer/command_line_parser.h +++ b/src/c++/perf_analyzer/command_line_parser.h @@ -58,7 +58,7 @@ struct PerfAnalyzerParameters { uint64_t measurement_window_ms = 5000; bool using_concurrency_range = false; Range concurrency_range{1, 1, 1}; - std::unordered_map request_parameters; + std::unordered_map request_parameters; uint64_t latency_threshold_ms = NO_LIMIT; double stability_threshold = 0.1; size_t max_trials = 10; diff --git a/src/c++/perf_analyzer/concurrency_manager.cc b/src/c++/perf_analyzer/concurrency_manager.cc index 7489d95e0..a64062cc0 100644 --- a/src/c++/perf_analyzer/concurrency_manager.cc +++ b/src/c++/perf_analyzer/concurrency_manager.cc @@ -44,11 +44,14 @@ ConcurrencyManager::Create( const SharedMemoryType shared_memory_type, const size_t output_shm_size, const std::shared_ptr& parser, const std::shared_ptr& factory, - std::unique_ptr* manager) + std::unique_ptr* manager, + const std::unordered_map& + request_parameters) { std::unique_ptr local_manager(new ConcurrencyManager( async, streaming, batch_size, max_threads, max_concurrency, - shared_memory_type, output_shm_size, parser, factory)); + shared_memory_type, output_shm_size, parser, factory, + request_parameters)); *manager = std::move(local_manager); @@ -60,10 +63,12 @@ ConcurrencyManager::ConcurrencyManager( const size_t max_threads, const size_t max_concurrency, const SharedMemoryType shared_memory_type, const size_t output_shm_size, const std::shared_ptr& parser, - const std::shared_ptr& factory) + const std::shared_ptr& factory, + const std::unordered_map& + request_parameters) : LoadManager( async, streaming, batch_size, max_threads, shared_memory_type, - output_shm_size, parser, factory), + output_shm_size, parser, factory, request_parameters), execute_(true), max_concurrency_(max_concurrency) { threads_config_.reserve(max_threads); diff --git a/src/c++/perf_analyzer/concurrency_manager.h b/src/c++/perf_analyzer/concurrency_manager.h index 15e211ca1..513d7396c 100644 --- a/src/c++/perf_analyzer/concurrency_manager.h +++ b/src/c++/perf_analyzer/concurrency_manager.h @@ -74,6 +74,7 @@ class ConcurrencyManager : public LoadManager { /// \param factory The ClientBackendFactory object used to create /// client to the server. /// \param manager Returns a new ConcurrencyManager object. + /// \param request_parameters Custom request parameters to send to the server /// \return cb::Error object indicating success or failure. static cb::Error Create( const bool async, const bool streaming, const int32_t batch_size, @@ -81,7 +82,9 @@ class ConcurrencyManager : public LoadManager { const SharedMemoryType shared_memory_type, const size_t output_shm_size, const std::shared_ptr& parser, const std::shared_ptr& factory, - std::unique_ptr* manager); + std::unique_ptr* manager, + const std::unordered_map& + request_parameters); /// Adjusts the number of concurrent requests to be the same as /// 'concurrent_request_count' (by creating or pausing threads) @@ -100,7 +103,9 @@ class ConcurrencyManager : public LoadManager { const size_t max_threads, const size_t max_concurrency, const SharedMemoryType shared_memory_type, const size_t output_shm_size, const std::shared_ptr& parser, - const std::shared_ptr& factory); + const std::shared_ptr& factory, + const std::unordered_map& + request_parameters); // The number of worker threads with non-zero concurrencies size_t active_threads_; diff --git a/src/c++/perf_analyzer/custom_load_manager.cc b/src/c++/perf_analyzer/custom_load_manager.cc index 2dddf3ab2..32e5693b0 100644 --- a/src/c++/perf_analyzer/custom_load_manager.cc +++ b/src/c++/perf_analyzer/custom_load_manager.cc @@ -41,12 +41,15 @@ CustomLoadManager::Create( const SharedMemoryType shared_memory_type, const size_t output_shm_size, const bool serial_sequences, const std::shared_ptr& parser, const std::shared_ptr& factory, - std::unique_ptr* manager) + std::unique_ptr* manager, + const std::unordered_map& + request_parameters) { std::unique_ptr local_manager(new CustomLoadManager( async, streaming, request_intervals_file, batch_size, measurement_window_ms, max_trials, max_threads, num_of_sequences, - shared_memory_type, output_shm_size, serial_sequences, parser, factory)); + shared_memory_type, output_shm_size, serial_sequences, parser, factory, + request_parameters)); *manager = std::move(local_manager); @@ -60,12 +63,14 @@ CustomLoadManager::CustomLoadManager( const size_t max_threads, const uint32_t num_of_sequences, const SharedMemoryType shared_memory_type, const size_t output_shm_size, const bool serial_sequences, const std::shared_ptr& parser, - const std::shared_ptr& factory) + const std::shared_ptr& factory, + const std::unordered_map& + request_parameters) : RequestRateManager( async, streaming, Distribution::CUSTOM, batch_size, measurement_window_ms, max_trials, max_threads, num_of_sequences, shared_memory_type, output_shm_size, serial_sequences, parser, - factory), + factory, request_parameters), request_intervals_file_(request_intervals_file) { } diff --git a/src/c++/perf_analyzer/custom_load_manager.h b/src/c++/perf_analyzer/custom_load_manager.h index 6b61de2f0..c762e9c7e 100644 --- a/src/c++/perf_analyzer/custom_load_manager.h +++ b/src/c++/perf_analyzer/custom_load_manager.h @@ -72,6 +72,7 @@ class CustomLoadManager : public RequestRateManager { /// \param factory The ClientBackendFactory object used to create /// client to the server. /// \param manager Returns a new ConcurrencyManager object. + /// \param request_parameters Custom request parameters to send to the server /// \return cb::Error object indicating success or failure. static cb::Error Create( const bool async, const bool streaming, @@ -81,7 +82,9 @@ class CustomLoadManager : public RequestRateManager { const SharedMemoryType shared_memory_type, const size_t output_shm_size, const bool serial_sequences, const std::shared_ptr& parser, const std::shared_ptr& factory, - std::unique_ptr* manager); + std::unique_ptr* manager, + const std::unordered_map& + request_parameter); /// Initializes the load manager with the provided file containing request /// intervals @@ -103,7 +106,9 @@ class CustomLoadManager : public RequestRateManager { const size_t max_threads, const uint32_t num_of_sequences, const SharedMemoryType shared_memory_type, const size_t output_shm_size, const bool serial_sequences, const std::shared_ptr& parser, - const std::shared_ptr& factory); + const std::shared_ptr& factory, + const std::unordered_map& + request_parameters); cb::Error GenerateSchedule(); diff --git a/src/c++/perf_analyzer/infer_data_manager.cc b/src/c++/perf_analyzer/infer_data_manager.cc index 99a3bcb08..8b89ea806 100644 --- a/src/c++/perf_analyzer/infer_data_manager.cc +++ b/src/c++/perf_analyzer/infer_data_manager.cc @@ -167,6 +167,9 @@ InferDataManager::InitInferDataInput( infer_input->AppendRaw(input_data.data_ptr, input_data.batch1_size)); } } + + AddInferDataParameters(infer_data); + return cb::Error::Success; } diff --git a/src/c++/perf_analyzer/infer_data_manager.h b/src/c++/perf_analyzer/infer_data_manager.h index dd1973c2d..4b41cc776 100644 --- a/src/c++/perf_analyzer/infer_data_manager.h +++ b/src/c++/perf_analyzer/infer_data_manager.h @@ -41,11 +41,14 @@ class InferDataManager : public InferDataManagerBase { public: InferDataManager( const size_t max_threads, const int32_t batch_size, + const std::unordered_map& + request_parameters, const std::shared_ptr& parser, const std::shared_ptr& factory, const std::shared_ptr& data_loader) : max_threads_(max_threads), - InferDataManagerBase(batch_size, parser, factory, data_loader) + InferDataManagerBase( + batch_size, request_parameters, parser, factory, data_loader) { } diff --git a/src/c++/perf_analyzer/infer_data_manager_base.cc b/src/c++/perf_analyzer/infer_data_manager_base.cc index 08bd19588..f21fbbc00 100644 --- a/src/c++/perf_analyzer/infer_data_manager_base.cc +++ b/src/c++/perf_analyzer/infer_data_manager_base.cc @@ -179,5 +179,10 @@ InferDataManagerBase::CreateInferInput( return cb::InferInput::Create(infer_input, kind, name, dims, datatype); } +void +InferDataManagerBase::AddInferDataParameters(InferData& infer_data) +{ + infer_data.options_->request_parameters_ = request_parameters_; +} }} // namespace triton::perfanalyzer diff --git a/src/c++/perf_analyzer/infer_data_manager_base.h b/src/c++/perf_analyzer/infer_data_manager_base.h index 7ff6a07c5..d0ae2ab16 100644 --- a/src/c++/perf_analyzer/infer_data_manager_base.h +++ b/src/c++/perf_analyzer/infer_data_manager_base.h @@ -41,11 +41,15 @@ namespace triton { namespace perfanalyzer { class InferDataManagerBase : public IInferDataManager { public: InferDataManagerBase( - const int32_t batch_size, const std::shared_ptr& parser, + const int32_t batch_size, + const std::unordered_map& + request_parameters, + const std::shared_ptr& parser, const std::shared_ptr& factory, const std::shared_ptr& data_loader) - : batch_size_(batch_size), parser_(parser), factory_(factory), - data_loader_(data_loader), backend_kind_(factory->Kind()) + : batch_size_(batch_size), request_parameters_(request_parameters), + parser_(parser), factory_(factory), data_loader_(data_loader), + backend_kind_(factory->Kind()) { } @@ -72,6 +76,7 @@ class InferDataManagerBase : public IInferDataManager { std::shared_ptr data_loader_; std::unique_ptr backend_; cb::BackendKind backend_kind_; + std::unordered_map request_parameters_; /// Gets the input data for the specified input for the specified batch size /// @@ -135,6 +140,8 @@ class InferDataManagerBase : public IInferDataManager { virtual cb::Error InitInferDataOutput( const std::string& name, InferData& infer_data) = 0; + void AddInferDataParameters(InferData& infer_data); + #ifndef DOCTEST_CONFIG_DISABLE public: InferDataManagerBase() = default; diff --git a/src/c++/perf_analyzer/infer_data_manager_factory.h b/src/c++/perf_analyzer/infer_data_manager_factory.h index 58eab717d..6bf24bef8 100644 --- a/src/c++/perf_analyzer/infer_data_manager_factory.h +++ b/src/c++/perf_analyzer/infer_data_manager_factory.h @@ -30,6 +30,7 @@ #include "infer_data_manager.h" #include "infer_data_manager_shm.h" #include "model_parser.h" +#include "perf_utils.h" namespace triton { namespace perfanalyzer { @@ -38,40 +39,49 @@ class InferDataManagerFactory { static std::shared_ptr CreateInferDataManager( const size_t max_threads, const int32_t batch_size, const SharedMemoryType shared_memory_type, const size_t output_shm_size, + const std::unordered_map& + request_parameters, const std::shared_ptr& parser, const std::shared_ptr& factory, const std::shared_ptr& data_loader) { if (shared_memory_type == SharedMemoryType::NO_SHARED_MEMORY) { return CreateInferDataManagerNoShm( - max_threads, batch_size, parser, factory, data_loader); + max_threads, batch_size, request_parameters, parser, factory, + data_loader); } else { return CreateInferDataManagerShm( - batch_size, shared_memory_type, output_shm_size, parser, factory, - data_loader); + batch_size, shared_memory_type, output_shm_size, request_parameters, + parser, factory, data_loader); } } private: static std::shared_ptr CreateInferDataManagerNoShm( const size_t max_threads, const int32_t batch_size, + const std::unordered_map& + request_parameters, const std::shared_ptr& parser, const std::shared_ptr& factory, const std::shared_ptr& data_loader) { return std::make_shared( - max_threads, batch_size, parser, factory, data_loader); + max_threads, batch_size, request_parameters, parser, factory, + data_loader); } static std::shared_ptr CreateInferDataManagerShm( const int32_t batch_size, const SharedMemoryType shared_memory_type, - const size_t output_shm_size, const std::shared_ptr& parser, + const size_t output_shm_size, + const std::unordered_map& + request_parameters, + const std::shared_ptr& parser, const std::shared_ptr& factory, const std::shared_ptr& data_loader) { return std::make_shared( - batch_size, shared_memory_type, output_shm_size, parser, factory, - data_loader); + batch_size, shared_memory_type, output_shm_size, request_parameters, + parser, factory, data_loader); } }; diff --git a/src/c++/perf_analyzer/infer_data_manager_shm.cc b/src/c++/perf_analyzer/infer_data_manager_shm.cc index 91d691830..6ca4a87ff 100644 --- a/src/c++/perf_analyzer/infer_data_manager_shm.cc +++ b/src/c++/perf_analyzer/infer_data_manager_shm.cc @@ -331,6 +331,8 @@ InferDataManagerShm::InitInferDataInput( RETURN_IF_ERROR(infer_input->SetSharedMemory( region_name, shared_memory_regions_[region_name].byte_size_)); + AddInferDataParameters(infer_data); + return cb::Error::Success; } diff --git a/src/c++/perf_analyzer/infer_data_manager_shm.h b/src/c++/perf_analyzer/infer_data_manager_shm.h index 14d1fd2ee..b684544c5 100644 --- a/src/c++/perf_analyzer/infer_data_manager_shm.h +++ b/src/c++/perf_analyzer/infer_data_manager_shm.h @@ -94,12 +94,16 @@ class InferDataManagerShm : public InferDataManagerBase { public: InferDataManagerShm( const int32_t batch_size, const SharedMemoryType shared_memory_type, - const size_t output_shm_size, const std::shared_ptr& parser, + const size_t output_shm_size, + const std::unordered_map& + request_parameters, + const std::shared_ptr& parser, const std::shared_ptr& factory, const std::shared_ptr& data_loader) : shared_memory_type_(shared_memory_type), output_shm_size_(output_shm_size), - InferDataManagerBase(batch_size, parser, factory, data_loader) + InferDataManagerBase( + batch_size, request_parameters, parser, factory, data_loader) { } diff --git a/src/c++/perf_analyzer/load_manager.cc b/src/c++/perf_analyzer/load_manager.cc index 369317e66..ac9150a9d 100644 --- a/src/c++/perf_analyzer/load_manager.cc +++ b/src/c++/perf_analyzer/load_manager.cc @@ -160,7 +160,9 @@ LoadManager::LoadManager( const bool async, const bool streaming, const int32_t batch_size, const size_t max_threads, const SharedMemoryType shared_memory_type, const size_t output_shm_size, const std::shared_ptr& parser, - const std::shared_ptr& factory) + const std::shared_ptr& factory, + const std::unordered_map& + request_parameters) : async_(async), streaming_(streaming), batch_size_(batch_size), max_threads_(max_threads), parser_(parser), factory_(factory), using_json_data_(false) @@ -172,8 +174,8 @@ LoadManager::LoadManager( data_loader_.reset(new DataLoader(batch_size_)); infer_data_manager_ = InferDataManagerFactory::CreateInferDataManager( - max_threads, batch_size, shared_memory_type, output_shm_size, parser, - factory, data_loader_); + max_threads, batch_size, shared_memory_type, output_shm_size, + request_parameters, parser, factory, data_loader_); } void diff --git a/src/c++/perf_analyzer/load_manager.h b/src/c++/perf_analyzer/load_manager.h index 5e75ab9ea..799bfa75f 100644 --- a/src/c++/perf_analyzer/load_manager.h +++ b/src/c++/perf_analyzer/load_manager.h @@ -112,7 +112,9 @@ class LoadManager { const bool async, const bool streaming, const int32_t batch_size, const size_t max_threads, const SharedMemoryType shared_memory_type, const size_t output_shm_size, const std::shared_ptr& parser, - const std::shared_ptr& factory); + const std::shared_ptr& factory, + const std::unordered_map& + request_parameters); /// Complete any subclass-specific manager initialization tasks. virtual void InitManagerFinalize() {} diff --git a/src/c++/perf_analyzer/mock_infer_data_manager.h b/src/c++/perf_analyzer/mock_infer_data_manager.h index f001c20ad..8f9cd7ec0 100644 --- a/src/c++/perf_analyzer/mock_infer_data_manager.h +++ b/src/c++/perf_analyzer/mock_infer_data_manager.h @@ -37,12 +37,15 @@ class MockInferDataManagerShm : public InferDataManagerShm { public: MockInferDataManagerShm( const int32_t batch_size, const SharedMemoryType shared_memory_type, - const size_t output_shm_size, const std::shared_ptr& parser, + const size_t output_shm_size, + std::unordered_map + request_parameters, + const std::shared_ptr& parser, const std::shared_ptr& factory, const std::shared_ptr& data_loader) : InferDataManagerShm( - batch_size, shared_memory_type, output_shm_size, parser, factory, - data_loader) + batch_size, shared_memory_type, output_shm_size, request_parameters, + parser, factory, data_loader) { } @@ -83,10 +86,14 @@ class MockInferDataManager : public InferDataManager { MockInferDataManager( const size_t max_threads, const int32_t batch_size, + std::unordered_map + request_parameters, const std::shared_ptr& parser, const std::shared_ptr& factory, const std::shared_ptr& data_loader) - : InferDataManager(max_threads, batch_size, parser, factory, data_loader) + : InferDataManager( + max_threads, batch_size, request_parameters, parser, factory, + data_loader) { SetupMocks(); } @@ -122,17 +129,20 @@ class MockInferDataManagerFactory { static std::shared_ptr CreateMockInferDataManager( const size_t max_threads, const int32_t batch_size, const SharedMemoryType shared_memory_type, const size_t output_shm_size, + std::unordered_map + request_parameters, const std::shared_ptr& parser, const std::shared_ptr& factory, const std::shared_ptr& data_loader) { if (shared_memory_type == SharedMemoryType::NO_SHARED_MEMORY) { return std::make_shared>( - max_threads, batch_size, parser, factory, data_loader); + max_threads, batch_size, request_parameters, parser, factory, + data_loader); } else { return std::make_shared>( - batch_size, shared_memory_type, output_shm_size, parser, factory, - data_loader); + batch_size, shared_memory_type, output_shm_size, request_parameters, + parser, factory, data_loader); } } }; diff --git a/src/c++/perf_analyzer/perf_analyzer.cc b/src/c++/perf_analyzer/perf_analyzer.cc index c3e5e5f90..46b665757 100644 --- a/src/c++/perf_analyzer/perf_analyzer.cc +++ b/src/c++/perf_analyzer/perf_analyzer.cc @@ -206,7 +206,7 @@ PerfAnalyzer::CreateAnalyzerObjects() params_->async, params_->streaming, params_->batch_size, params_->max_threads, params_->max_concurrency, params_->shared_memory_type, params_->output_shm_size, parser_, - factory, &manager), + factory, &manager, params_->request_parameters), "failed to create concurrency manager"); } else if (params_->is_using_periodic_concurrency_mode) { @@ -214,7 +214,8 @@ PerfAnalyzer::CreateAnalyzerObjects() params_->async, params_->streaming, params_->batch_size, params_->max_threads, params_->max_concurrency, params_->shared_memory_type, params_->output_shm_size, parser_, factory, - params_->periodic_concurrency_range, params_->request_period); + params_->periodic_concurrency_range, params_->request_period, + params_->request_parameters); } else if (params_->using_request_rate_range) { if ((params_->sequence_id_range != 0) && (params_->sequence_id_range < params_->num_of_sequences)) { @@ -231,7 +232,7 @@ PerfAnalyzer::CreateAnalyzerObjects() params_->batch_size, params_->max_threads, params_->num_of_sequences, params_->shared_memory_type, params_->output_shm_size, params_->serial_sequences, parser_, - factory, &manager), + factory, &manager, params_->request_parameters), "failed to create request rate manager"); } else { @@ -250,7 +251,7 @@ PerfAnalyzer::CreateAnalyzerObjects() params_->batch_size, params_->max_threads, params_->num_of_sequences, params_->shared_memory_type, params_->output_shm_size, params_->serial_sequences, parser_, - factory, &manager), + factory, &manager, params_->request_parameters), "failed to create custom load manager"); } diff --git a/src/c++/perf_analyzer/perf_utils.h b/src/c++/perf_analyzer/perf_utils.h index 0871de42c..7166936a9 100644 --- a/src/c++/perf_analyzer/perf_utils.h +++ b/src/c++/perf_analyzer/perf_utils.h @@ -83,15 +83,6 @@ class Range { T step; }; -enum RequestParameterType { STRING = 0, INT = 1, BOOL = 2 }; - -struct RequestParameter { - std::string str_value; - int64_t int_value; - bool bool_value; - RequestParameterType type; -}; - // Converts the datatype from tensorflow to perf analyzer space // \param tf_dtype The data type string returned from the model metadata. // \param datatype Returns the datatype in perf_analyzer space. diff --git a/src/c++/perf_analyzer/periodic_concurrency_manager.h b/src/c++/perf_analyzer/periodic_concurrency_manager.h index dca2797b7..db612fd96 100644 --- a/src/c++/perf_analyzer/periodic_concurrency_manager.h +++ b/src/c++/perf_analyzer/periodic_concurrency_manager.h @@ -42,12 +42,16 @@ class PeriodicConcurrencyManager : public ConcurrencyManager { const bool async, const bool streaming, const int32_t batch_size, const size_t max_threads, const size_t max_concurrency, const SharedMemoryType shared_memory_type, const size_t output_shm_size, + const std::shared_ptr& parser, const std::shared_ptr& factory, - const Range concurrency_range, const uint64_t request_period) + const Range concurrency_range, const uint64_t request_period, + const std::unordered_map& + request_parameters) : ConcurrencyManager( async, streaming, batch_size, max_threads, max_concurrency, - shared_memory_type, output_shm_size, parser, factory), + shared_memory_type, output_shm_size, parser, factory, + request_parameters), concurrency_range_(concurrency_range), request_period_(request_period) { } diff --git a/src/c++/perf_analyzer/request_rate_manager.cc b/src/c++/perf_analyzer/request_rate_manager.cc index 8463c40c4..a79c52ff4 100644 --- a/src/c++/perf_analyzer/request_rate_manager.cc +++ b/src/c++/perf_analyzer/request_rate_manager.cc @@ -44,12 +44,14 @@ RequestRateManager::Create( const SharedMemoryType shared_memory_type, const size_t output_shm_size, const bool serial_sequences, const std::shared_ptr& parser, const std::shared_ptr& factory, - std::unique_ptr* manager) + std::unique_ptr* manager, + const std::unordered_map& + request_parameters) { std::unique_ptr local_manager(new RequestRateManager( async, streaming, request_distribution, batch_size, measurement_window_ms, max_trials, max_threads, num_of_sequences, shared_memory_type, - output_shm_size, serial_sequences, parser, factory)); + output_shm_size, serial_sequences, parser, factory, request_parameters)); *manager = std::move(local_manager); @@ -63,10 +65,12 @@ RequestRateManager::RequestRateManager( const uint32_t num_of_sequences, const SharedMemoryType shared_memory_type, const size_t output_shm_size, const bool serial_sequences, const std::shared_ptr& parser, - const std::shared_ptr& factory) + const std::shared_ptr& factory, + const std::unordered_map& + request_parameters) : LoadManager( async, streaming, batch_size, max_threads, shared_memory_type, - output_shm_size, parser, factory), + output_shm_size, parser, factory, request_parameters), request_distribution_(request_distribution), execute_(false), num_of_sequences_(num_of_sequences), serial_sequences_(serial_sequences) { diff --git a/src/c++/perf_analyzer/request_rate_manager.h b/src/c++/perf_analyzer/request_rate_manager.h index a698f0139..deb8ed953 100644 --- a/src/c++/perf_analyzer/request_rate_manager.h +++ b/src/c++/perf_analyzer/request_rate_manager.h @@ -84,6 +84,7 @@ class RequestRateManager : public LoadManager { /// \param factory The ClientBackendFactory object used to create /// client to the server. /// \param manager Returns a new ConcurrencyManager object. + /// \param request_parameters Custom request parameters to send to the server /// \return cb::Error object indicating success or failure. static cb::Error Create( const bool async, const bool streaming, @@ -93,7 +94,9 @@ class RequestRateManager : public LoadManager { const SharedMemoryType shared_memory_type, const size_t output_shm_size, const bool serial_sequences, const std::shared_ptr& parser, const std::shared_ptr& factory, - std::unique_ptr* manager); + std::unique_ptr* manager, + const std::unordered_map& + request_parameters); /// Adjusts the rate of issuing requests to be the same as 'request_rate' /// \param request_rate The rate at which requests must be issued to the @@ -109,7 +112,9 @@ class RequestRateManager : public LoadManager { const uint32_t num_of_sequences, const SharedMemoryType shared_memory_type, const size_t output_shm_size, const bool serial_sequences, const std::shared_ptr& parser, - const std::shared_ptr& factory); + const std::shared_ptr& factory, + const std::unordered_map& + request_parameters); void InitManagerFinalize() override; diff --git a/src/c++/perf_analyzer/test_command_line_parser.cc b/src/c++/perf_analyzer/test_command_line_parser.cc index bdf819ff8..bb65f4dc5 100644 --- a/src/c++/perf_analyzer/test_command_line_parser.cc +++ b/src/c++/perf_analyzer/test_command_line_parser.cc @@ -195,14 +195,8 @@ CHECK_PARAMS(PAParamsPtr act, PAParamsPtr exp) exp_param != exp->request_parameters.end(), "Unexpected parameter: ", act_param.first); + CHECK(act_param.second.value == exp_param->second.value); CHECK(act_param.second.type == exp_param->second.type); - if (act_param.second.type == RequestParameterType::STRING) { - CHECK(act_param.second.str_value == exp_param->second.str_value); - } else if (act_param.second.type == RequestParameterType::INT) { - CHECK(act_param.second.int_value == exp_param->second.int_value); - } else if (act_param.second.type == RequestParameterType::BOOL) { - CHECK(act_param.second.bool_value == exp_param->second.bool_value); - } } } @@ -1392,9 +1386,9 @@ TEST_CASE("Testing Command Line Parser") REQUIRE_NOTHROW(act = parser.Parse(argc, argv)); CHECK(!parser.UsageCalled()); - RequestParameter param; - param.int_value = 256; - param.type = RequestParameterType::INT; + cb::RequestParameter param; + param.value = "256"; + param.type = "int"; exp->request_parameters["max_tokens"] = param; } @@ -1420,25 +1414,6 @@ TEST_CASE("Testing Command Line Parser") check_params = false; } - - SUBCASE("unsupported type") - { - args.push_back(option_name); - args.push_back("max_tokens:256:hello"); - - int argc = args.size(); - char* argv[argc]; - std::copy(args.begin(), args.end(), argv); - - REQUIRE_NOTHROW(act = parser.Parse(argc, argv)); - CHECK(parser.UsageCalled()); - - expected_msg = - CreateUsageMessage(option_name, "Unsupported type: 'hello'."); - CHECK_STRING("Usage Message", parser.GetUsageMessage(), expected_msg); - - check_params = false; - } } SUBCASE("Option : --latency-threshold") diff --git a/src/c++/perf_analyzer/test_concurrency_manager.cc b/src/c++/perf_analyzer/test_concurrency_manager.cc index b454e6272..58d3a3031 100644 --- a/src/c++/perf_analyzer/test_concurrency_manager.cc +++ b/src/c++/perf_analyzer/test_concurrency_manager.cc @@ -54,7 +54,7 @@ class TestConcurrencyManager : public TestLoadManagerBase, params.async, params.streaming, params.batch_size, params.max_threads, params.max_concurrency, params.shared_memory_type, params.output_shm_size, GetParser(), - GetFactory()) + GetFactory(), params.request_parameters) { } @@ -561,8 +561,8 @@ TEST_CASE("Concurrency - shared memory infer input calls") tcm.infer_data_manager_ = MockInferDataManagerFactory::CreateMockInferDataManager( params.max_threads, params.batch_size, params.shared_memory_type, - params.output_shm_size, mip.mock_model_parser_, tcm.factory_, - mip.mock_data_loader_); + params.output_shm_size, params.request_parameters, + mip.mock_model_parser_, tcm.factory_, mip.mock_data_loader_); std::shared_ptr thread_stat{std::make_shared()}; std::shared_ptr thread_config{ @@ -635,8 +635,8 @@ TEST_CASE("Concurrency - Shared memory methods") tcm.infer_data_manager_ = MockInferDataManagerFactory::CreateMockInferDataManager( params.max_threads, params.batch_size, params.shared_memory_type, - params.output_shm_size, mip.mock_model_parser_, tcm.factory_, - mip.mock_data_loader_); + params.output_shm_size, params.request_parameters, + mip.mock_model_parser_, tcm.factory_, mip.mock_data_loader_); tcm.InitManager( params.string_length, params.string_data, params.zero_input, @@ -660,8 +660,8 @@ TEST_CASE("Concurrency - Shared memory methods") tcm.infer_data_manager_ = MockInferDataManagerFactory::CreateMockInferDataManager( params.max_threads, params.batch_size, params.shared_memory_type, - params.output_shm_size, mip.mock_model_parser_, tcm.factory_, - mip.mock_data_loader_); + params.output_shm_size, params.request_parameters, + mip.mock_model_parser_, tcm.factory_, mip.mock_data_loader_); tcm.InitManager( params.string_length, params.string_data, params.zero_input, @@ -682,8 +682,8 @@ TEST_CASE("Concurrency - Shared memory methods") tcm.infer_data_manager_ = MockInferDataManagerFactory::CreateMockInferDataManager( params.max_threads, params.batch_size, params.shared_memory_type, - params.output_shm_size, mip.mock_model_parser_, tcm.factory_, - mip.mock_data_loader_); + params.output_shm_size, params.request_parameters, + mip.mock_model_parser_, tcm.factory_, mip.mock_data_loader_); tcm.InitManager( params.string_length, params.string_data, params.zero_input, params.user_data, params.start_sequence_id, params.sequence_id_range, diff --git a/src/c++/perf_analyzer/test_custom_load_manager.cc b/src/c++/perf_analyzer/test_custom_load_manager.cc index bf92f329d..0cb6c4c5c 100644 --- a/src/c++/perf_analyzer/test_custom_load_manager.cc +++ b/src/c++/perf_analyzer/test_custom_load_manager.cc @@ -59,7 +59,7 @@ class TestCustomLoadManager : public TestLoadManagerBase, params.measurement_window_ms, params.max_trials, params.max_threads, params.num_of_sequences, params.shared_memory_type, params.output_shm_size, params.serial_sequences, GetParser(), - GetFactory()) + GetFactory(), params.request_parameters) { InitManager( params.string_length, params.string_data, params.zero_input, diff --git a/src/c++/perf_analyzer/test_load_manager.cc b/src/c++/perf_analyzer/test_load_manager.cc index 2a11e0749..c057516f0 100644 --- a/src/c++/perf_analyzer/test_load_manager.cc +++ b/src/c++/perf_analyzer/test_load_manager.cc @@ -58,7 +58,8 @@ class TestLoadManager : public TestLoadManagerBase, public LoadManager { LoadManager( params.async, params.streaming, params.batch_size, params.max_threads, params.shared_memory_type, - params.output_shm_size, GetParser(), GetFactory()) + params.output_shm_size, GetParser(), GetFactory(), + params.request_parameters) { } diff --git a/src/c++/perf_analyzer/test_request_rate_manager.cc b/src/c++/perf_analyzer/test_request_rate_manager.cc index 3e29a8e63..008424b72 100644 --- a/src/c++/perf_analyzer/test_request_rate_manager.cc +++ b/src/c++/perf_analyzer/test_request_rate_manager.cc @@ -61,7 +61,8 @@ class TestRequestRateManager : public TestLoadManagerBase, params.batch_size, params.measurement_window_ms, params.max_trials, params.max_threads, params.num_of_sequences, params.shared_memory_type, params.output_shm_size, - params.serial_sequences, GetParser(), GetFactory()) + params.serial_sequences, GetParser(), GetFactory(), + params.request_parameters) { } @@ -368,7 +369,8 @@ class TestRequestRateManager : public TestLoadManagerBase, infer_data_manager_ = MockInferDataManagerFactory::CreateMockInferDataManager( params_.max_threads, params_.batch_size, params_.shared_memory_type, - params_.output_shm_size, mmp, factory_, mdl); + params_.output_shm_size, params_.request_parameters, mmp, factory_, + mdl); parser_ = mmp; data_loader_ = mdl; @@ -1549,8 +1551,8 @@ TEST_CASE("Request rate - Shared memory methods") trrm.infer_data_manager_ = MockInferDataManagerFactory::CreateMockInferDataManager( params.max_threads, params.batch_size, params.shared_memory_type, - params.output_shm_size, mip.mock_model_parser_, trrm.factory_, - mip.mock_data_loader_); + params.output_shm_size, params.request_parameters, + mip.mock_model_parser_, trrm.factory_, mip.mock_data_loader_); trrm.parser_ = mip.mock_model_parser_; trrm.data_loader_ = mip.mock_data_loader_; @@ -1576,8 +1578,8 @@ TEST_CASE("Request rate - Shared memory methods") trrm.infer_data_manager_ = MockInferDataManagerFactory::CreateMockInferDataManager( params.max_threads, params.batch_size, params.shared_memory_type, - params.output_shm_size, mip.mock_model_parser_, trrm.factory_, - mip.mock_data_loader_); + params.output_shm_size, params.request_parameters, + mip.mock_model_parser_, trrm.factory_, mip.mock_data_loader_); trrm.parser_ = mip.mock_model_parser_; trrm.data_loader_ = mip.mock_data_loader_; @@ -1601,8 +1603,8 @@ TEST_CASE("Request rate - Shared memory methods") trrm.infer_data_manager_ = MockInferDataManagerFactory::CreateMockInferDataManager( params.max_threads, params.batch_size, params.shared_memory_type, - params.output_shm_size, mip.mock_model_parser_, trrm.factory_, - mip.mock_data_loader_); + params.output_shm_size, params.request_parameters, + mip.mock_model_parser_, trrm.factory_, mip.mock_data_loader_); trrm.parser_ = mip.mock_model_parser_; trrm.data_loader_ = mip.mock_data_loader_; @@ -1692,8 +1694,8 @@ TEST_CASE("Request rate - Shared memory infer input calls") trrm.infer_data_manager_ = MockInferDataManagerFactory::CreateMockInferDataManager( params.max_threads, params.batch_size, params.shared_memory_type, - params.output_shm_size, mip.mock_model_parser_, trrm.factory_, - mip.mock_data_loader_); + params.output_shm_size, params.request_parameters, + mip.mock_model_parser_, trrm.factory_, mip.mock_data_loader_); std::shared_ptr thread_stat{std::make_shared()}; std::shared_ptr thread_config{