diff --git a/ngraph_bridge/CMakeLists.txt b/ngraph_bridge/CMakeLists.txt index bd78abf0d..dd149bb79 100644 --- a/ngraph_bridge/CMakeLists.txt +++ b/ngraph_bridge/CMakeLists.txt @@ -49,6 +49,9 @@ set(SRC tf_graphcycles.cc tf_deadness_analysis.cc version.cc + ie_backend_engine.cc + ie_basic_engine.cc + ie_vadm_engine.cc ) message(STATUS "NGRAPH_TF_USE_GRAPPLER_OPTIMIZER: ${NGRAPH_TF_USE_GRAPPLER_OPTIMIZER}") diff --git a/ngraph_bridge/executable.cc b/ngraph_bridge/executable.cc index e9c9da63d..dce744ab9 100644 --- a/ngraph_bridge/executable.cc +++ b/ngraph_bridge/executable.cc @@ -14,15 +14,18 @@ // limitations under the License. //***************************************************************************** +#include + #include "ngraph/ngraph.hpp" #include "ngraph/opsets/opset.hpp" -#include - #include "logging/ngraph_log.h" #include "ngraph_bridge/default_opset.h" #include "ngraph_bridge/executable.h" +#include "ngraph_bridge/ie_basic_engine.h" #include "ngraph_bridge/ie_tensor.h" +#include "ngraph_bridge/ie_utils.h" +#include "ngraph_bridge/ie_vadm_engine.h" #include "ngraph_bridge/ngraph_utils.h" using namespace std; @@ -138,22 +141,25 @@ Executable::Executable(shared_ptr func, string device) "ie_" + m_device + "_" + name; } - NGRAPH_VLOG(2) << "Loading IE CNN network to device " << m_device; - - // Load network to the plugin (m_device) and create an infer request - InferenceEngine::ExecutableNetwork exe_network = - ie.LoadNetwork(m_network, m_device, options); - m_infer_req = exe_network.CreateInferRequest(); + NGRAPH_VLOG(2) << "Creating IE Execution Engine"; + if (m_device == "HDDL") { + m_ie_engine = make_shared(m_network); + } else { + m_ie_engine = make_shared(m_network, m_device); + } } bool Executable::call(const vector>& inputs, - vector>& outputs) { + vector>& outputs, + bool multi_req_execution) { if (m_trivial_fn) { NGRAPH_VLOG(2) << "Calling trivial IE function with inputs=" << inputs.size() << " outputs=" << outputs.size(); return call_trivial(inputs, outputs); } + shared_ptr func = m_ie_engine->get_func(); + // Check if the number of inputs that the CNN network expects is equal to the // sum of the // inputs specified and the inputs we hoisted, if any. @@ -166,7 +172,8 @@ bool Executable::call(const vector>& inputs, } // Prepare input blobs - auto func = m_network.getFunction(); + std::vector> ie_inputs(inputs.size()); + std::vector input_names(inputs.size()); auto parameters = func->get_parameters(); int j = 0; for (int i = 0; i < inputs.size(); i++) { @@ -179,18 +186,23 @@ bool Executable::call(const vector>& inputs, NGRAPH_VLOG(1) << "Skipping unused input " << input_name; continue; } - shared_ptr tv = static_pointer_cast(inputs[i]); - m_infer_req.SetBlob(input_name, tv->get_blob()); + ie_inputs[i] = nullptr; + ie_inputs[i] = static_pointer_cast(inputs[i]); + input_names[i] = input_name; } + std::vector> ie_hoisted_params( + m_hoisted_params.size()); + std::vector param_names(m_hoisted_params.size()); for (const auto& it : m_hoisted_params) { auto input_name = it.first; if (input_info.find(input_name) == input_info.end()) { NGRAPH_VLOG(1) << "Skipping unused hoisted param " << input_name; continue; } - shared_ptr tv = static_pointer_cast(it.second); - m_infer_req.SetBlob(input_name, tv->get_blob()); + ie_hoisted_params[j] = nullptr; + ie_hoisted_params[j] = static_pointer_cast(it.second); + param_names[j++] = input_name; } InferenceEngine::OutputsDataMap output_info = m_network.getOutputsInfo(); @@ -213,22 +225,26 @@ bool Executable::call(const vector>& inputs, // Prepare output blobs auto results = func->get_results(); + std::vector> ie_outputs(outputs.size()); + std::vector output_names(outputs.size()); for (int i = 0; i < results.size(); i++) { if (outputs[i] != nullptr) { - NGRAPH_VLOG(4) << "Executable::call() SetBlob()"; - shared_ptr tv = static_pointer_cast(outputs[i]); - m_infer_req.SetBlob(get_output_name(results[i]), tv->get_blob()); + ie_outputs[i] = static_pointer_cast(outputs[i]); } + output_names[i] = get_output_name(results[i]); + } + + if (multi_req_execution) { + m_ie_engine->enable_multi_req_execution(); } - m_infer_req.Infer(); + m_ie_engine->infer(ie_inputs, input_names, ie_outputs, output_names, + ie_hoisted_params, param_names); // Set dynamic output blobs for (int i = 0; i < results.size(); i++) { if (outputs[i] == nullptr) { - NGRAPH_VLOG(4) << "Executable::call() GetBlob()"; - auto blob = m_infer_req.GetBlob(get_output_name(results[i])); - outputs[i] = make_shared(blob); + outputs[i] = ie_outputs[i]; } } diff --git a/ngraph_bridge/executable.h b/ngraph_bridge/executable.h index 124233a5b..962a90c3f 100644 --- a/ngraph_bridge/executable.h +++ b/ngraph_bridge/executable.h @@ -21,7 +21,9 @@ #include #include + #include "ngraph/ngraph.hpp" +#include "ngraph_bridge/ie_backend_engine.h" using namespace std; @@ -35,12 +37,21 @@ class Executable { Executable(shared_ptr func, string device); ~Executable() {} bool call(const vector>& inputs, - vector>& outputs); + vector>& outputs, + bool multi_req_execution = false); const ngraph::ResultVector& get_results() { return m_function->get_results(); }; + const vector get_output_shape(const int i) { + if (m_trivial_fn) { + return get_results()[i]->get_shape(); + } else { + return m_ie_engine->get_output_shape(i); + } + } + private: bool call_trivial(const vector>& inputs, vector>& outputs); @@ -56,6 +67,7 @@ class Executable { shared_ptr m_trivial_fn; // This is the original nGraph function corresponding to this executable shared_ptr m_function; + shared_ptr m_ie_engine; }; } } diff --git a/ngraph_bridge/ie_backend_engine.cc b/ngraph_bridge/ie_backend_engine.cc new file mode 100644 index 000000000..b9325d750 --- /dev/null +++ b/ngraph_bridge/ie_backend_engine.cc @@ -0,0 +1,104 @@ +/******************************************************************************* + * Copyright 2017-2020 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + +#include + +#include "ngraph_bridge/ie_backend_engine.h" +#include "ngraph_bridge/ie_utils.h" + +namespace tensorflow { +namespace ngraph_bridge { + +IE_Backend_Engine::IE_Backend_Engine(InferenceEngine::CNNNetwork ie_network, + std::string device) + : m_network(ie_network), + m_func(ie_network.getFunction()), + m_device(device), + m_multi_req_execution(false), + m_network_ready(false) { + if (std::getenv("NGRAPH_TF_DUMP_GRAPHS")) { + auto& name = m_network.getName(); + m_network.serialize(name + ".xml", name + ".bin"); + } +} + +IE_Backend_Engine::~IE_Backend_Engine() {} + +void IE_Backend_Engine::load_network() { + if (m_network_ready) return; + + std::map config; + + if (m_device == "MYRIAD") { + // Set MYRIAD configurations + if (IE_Utils::VPUConfigEnabled()) { + config["MYRIAD_DETECT_NETWORK_BATCH"] = "NO"; + } + + if (IE_Utils::VPUFastCompileEnabled()) { + config["MYRIAD_HW_INJECT_STAGES"] = "NO"; + config["MYRIAD_COPY_OPTIMIZATION"] = "NO"; + } + } + + InferenceEngine::Core ie; + // Load network to the plugin (m_device) + m_exe_network = ie.LoadNetwork(m_network, m_device, config); + m_network_ready = true; +} + +void IE_Backend_Engine::start_async_inference(const int req_id) { + // Start Async inference + try { + m_infer_reqs[req_id].StartAsync(); + } catch (InferenceEngine::details::InferenceEngineException e) { + THROW_IE_EXCEPTION << "Couldn't start Inference: "; + } catch (...) { + THROW_IE_EXCEPTION << "Couldn't start Inference: "; + } +} + +void IE_Backend_Engine::complete_async_inference(const int req_id) { + // Wait for Async inference completion + try { + m_infer_reqs[req_id].Wait( + InferenceEngine::IInferRequest::WaitMode::RESULT_READY); + } catch (InferenceEngine::details::InferenceEngineException e) { + THROW_IE_EXCEPTION << " Exception with completing Inference: "; + } catch (...) { + THROW_IE_EXCEPTION << " Exception with completing Inference: "; + } +} + +size_t IE_Backend_Engine::get_output_batch_size(size_t inputBatchSize) const { + return m_network.getBatchSize() * + IE_Utils::GetNumRequests(inputBatchSize, m_device); +} + +// Enables multi request execution if the execution engine supprts +void IE_Backend_Engine::enable_multi_req_execution() { + m_multi_req_execution = true; +} +// Disables multi request execution +void IE_Backend_Engine::disable_multi_req_execution() { + m_multi_req_execution = false; +} + +std::shared_ptr IE_Backend_Engine::get_func() { + return m_func; +} +} +} diff --git a/ngraph_bridge/ie_backend_engine.h b/ngraph_bridge/ie_backend_engine.h new file mode 100644 index 000000000..d8724389a --- /dev/null +++ b/ngraph_bridge/ie_backend_engine.h @@ -0,0 +1,74 @@ +/******************************************************************************* + * Copyright 2017-2020 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + +#ifndef IE_BACKEND_ENGINE_H_ +#define IE_BACKEND_ENGINE_H_ + +#include +#include +#include + +#include + +#include "ngraph_bridge/ie_tensor.h" + +namespace tensorflow { +namespace ngraph_bridge { + +class IE_Backend_Engine { + public: + IE_Backend_Engine(InferenceEngine::CNNNetwork ie_network, std::string device); + ~IE_Backend_Engine(); + + // Executes the inference + virtual void infer(std::vector>& inputs, + std::vector& input_names, + std::vector>& outputs, + std::vector& output_names, + std::vector>& hoisted_params, + std::vector& param_names) = 0; + + // Returns output batch size based on the input batch size and the device + // FIXME: This may not be needed + virtual size_t get_output_batch_size(size_t inputBatchSize) const; + + // Enables multi request execution if the execution engine supprts + void enable_multi_req_execution(); + // Disables multi request execution + void disable_multi_req_execution(); + + // Returns the NGraph Function from the CNNNetwork + std::shared_ptr get_func(); + + virtual const std::vector get_output_shape(const int i) = 0; + + protected: + InferenceEngine::CNNNetwork m_network; + std::shared_ptr m_func; + std::vector m_infer_reqs; + std::string m_device; + bool m_multi_req_execution; + InferenceEngine::ExecutableNetwork m_exe_network; + bool m_network_ready; + + virtual void start_async_inference(const int req_id); + virtual void complete_async_inference(const int req_id); + virtual void load_network(); +}; +} +} + +#endif // IE_BACKEND_ENGINE_H_ diff --git a/ngraph_bridge/ie_basic_engine.cc b/ngraph_bridge/ie_basic_engine.cc new file mode 100644 index 000000000..f5db5d114 --- /dev/null +++ b/ngraph_bridge/ie_basic_engine.cc @@ -0,0 +1,80 @@ +/******************************************************************************* + * Copyright 2017-2020 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + +#include + +#include "logging/ngraph_log.h" +#include "ngraph_bridge/ie_basic_engine.h" +#include "ngraph_bridge/ie_utils.h" + +namespace tensorflow { +namespace ngraph_bridge { + +IE_Basic_Engine::IE_Basic_Engine(InferenceEngine::CNNNetwork ie_network, + std::string device) + : IE_Backend_Engine(ie_network, device) {} + +IE_Basic_Engine::~IE_Basic_Engine() {} + +void IE_Basic_Engine::infer( + std::vector>& inputs, + std::vector& input_names, + std::vector>& outputs, + std::vector& output_names, + std::vector>& hoisted_params, + std::vector& param_names) { + load_network(); + if (m_infer_reqs.empty()) { + m_infer_reqs.push_back(m_exe_network.CreateInferRequest()); + } + + // Prepare input blobs + auto func = m_network.getFunction(); + auto parameters = func->get_parameters(); + for (int i = 0; i < inputs.size(); i++) { + if (inputs[i] != nullptr) + m_infer_reqs[0].SetBlob(input_names[i], inputs[i]->get_blob()); + } + + for (int i = 0; i < hoisted_params.size(); i++) { + if (hoisted_params[i] != nullptr) + m_infer_reqs[0].SetBlob(param_names[i], hoisted_params[i]->get_blob()); + } + + // Prepare output blobs + auto results = func->get_results(); + for (int i = 0; i < results.size(); i++) { + if (outputs[i] != nullptr) { + NGRAPH_VLOG(4) << "Executable::call() SetBlob()"; + m_infer_reqs[0].SetBlob(output_names[i], outputs[i]->get_blob()); + } + } + + m_infer_reqs[0].Infer(); + + // Set dynamic output blobs + for (int i = 0; i < results.size(); i++) { + if (outputs[i] == nullptr) { + NGRAPH_VLOG(4) << "Executable::call() GetBlob()"; + auto blob = m_infer_reqs[0].GetBlob(output_names[i]); + outputs[i] = std::make_shared(blob); + } + } + + // return true; +} +} +} diff --git a/ngraph_bridge/ie_basic_engine.h b/ngraph_bridge/ie_basic_engine.h new file mode 100644 index 000000000..d6dc295e6 --- /dev/null +++ b/ngraph_bridge/ie_basic_engine.h @@ -0,0 +1,51 @@ +/******************************************************************************* + * Copyright 2017-2020 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + +#ifndef IE_BASIC_ENGINE_H_ +#define IE_BASIC_ENGINE_H_ + +#include +#include +#include + +#include + +#include "ngraph_bridge/ie_backend_engine.h" + +namespace tensorflow { +namespace ngraph_bridge { + +class IE_Basic_Engine : public IE_Backend_Engine { + public: + IE_Basic_Engine(InferenceEngine::CNNNetwork ie_network, std::string device); + ~IE_Basic_Engine(); + + // Executes the inference + virtual void infer(std::vector>& inputs, + std::vector& input_names, + std::vector>& outputs, + std::vector& output_names, + std::vector>& hoisted_params, + std::vector& param_names); + + virtual const std::vector get_output_shape(const int i) { + return m_func->get_results()[i]->get_shape(); + }; +}; +} +} + +#endif // IE_BASIC_ENGINE_H_ diff --git a/ngraph_bridge/ie_utils.h b/ngraph_bridge/ie_utils.h new file mode 100644 index 000000000..4ca82aeae --- /dev/null +++ b/ngraph_bridge/ie_utils.h @@ -0,0 +1,109 @@ +/******************************************************************************* + * Copyright 2017-2020 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + +// The backend manager class is a singelton class that interfaces with the +// bridge to provide necessary backend + +#ifndef IE_UTILS_H_ +#define IE_UTILS_H_ + +#include +#include +#include +#include + +#include + +class IE_Utils { + public: + // Returns the maxiumum number of requests based on the device. + // TODO: The number of requests are hardcoded temporarly. + // This should dynamically look at the underlying architecture + // and compute the best performing number of requests. + static size_t GetMaxReq(std::string device) { + int max_req = 1; + if (device == "HDDL") max_req = 8; + return max_req; + } + + // Computes the input batch size per request best on the actual input batch + // size and the device. + static size_t GetInputBatchSize(size_t inputBatchSize, std::string device) { + int max_req = IE_Utils::GetMaxReq(device); + return ((inputBatchSize + max_req - 1) / max_req); + } + + // Gets the actual number of requests + static size_t GetNumRequests(size_t inputBatchSize, std::string device) { + return inputBatchSize / GetInputBatchSize(inputBatchSize, device); + } + + static bool VPUConfigEnabled() { return true; } + + static bool VPUFastCompileEnabled() { return true; } + + // Creates a MemoryBlob for InferenceEngine + static void CreateBlob(InferenceEngine::TensorDesc& desc, + InferenceEngine::Precision& precision, + const void* data_ptr, size_t byte_size, + InferenceEngine::MemoryBlob::Ptr& blob_ptr) { +#define MAKE_IE_BLOB(type_, desc_, ptr_, size_) \ + do { \ + if (ptr_ == nullptr) { \ + blob_ptr = std::make_shared>(desc); \ + blob_ptr->allocate(); \ + } else { \ + blob_ptr = std::make_shared>( \ + desc, (type_*)ptr_, size_); \ + } \ + } while (0) + switch (precision) { + case InferenceEngine::Precision::FP32: + MAKE_IE_BLOB(float, desc, (float*)data_ptr, byte_size); + break; + case InferenceEngine::Precision::U8: + MAKE_IE_BLOB(uint8_t, desc, (uint8_t*)data_ptr, byte_size); + break; + case InferenceEngine::Precision::I8: + MAKE_IE_BLOB(int8_t, desc, (int8_t*)data_ptr, byte_size); + break; + case InferenceEngine::Precision::U16: + MAKE_IE_BLOB(uint16_t, desc, (uint16_t*)data_ptr, byte_size); + break; + case InferenceEngine::Precision::I16: + MAKE_IE_BLOB(int16_t, desc, (int16_t*)data_ptr, byte_size); + break; + case InferenceEngine::Precision::I32: + MAKE_IE_BLOB(int32_t, desc, (int32_t*)data_ptr, byte_size); + break; + case InferenceEngine::Precision::U64: + MAKE_IE_BLOB(uint64_t, desc, (uint64_t*)data_ptr, byte_size); + break; + case InferenceEngine::Precision::I64: + MAKE_IE_BLOB(int64_t, desc, (int64_t*)data_ptr, byte_size); + break; + case InferenceEngine::Precision::BOOL: + MAKE_IE_BLOB(uint8_t, desc, (uint8_t*)data_ptr, byte_size); + break; + default: + THROW_IE_EXCEPTION << "Can't create IE blob for type " + << precision.name(); + } + } +}; + +#endif +// IE_UTILS_H diff --git a/ngraph_bridge/ie_vadm_engine.cc b/ngraph_bridge/ie_vadm_engine.cc new file mode 100644 index 000000000..b2b35371e --- /dev/null +++ b/ngraph_bridge/ie_vadm_engine.cc @@ -0,0 +1,165 @@ +/******************************************************************************* + * Copyright 2017-2020 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + +#include + +#include "ngraph_bridge/ie_utils.h" +#include "ngraph_bridge/ie_vadm_engine.h" + +namespace tensorflow { +namespace ngraph_bridge { + +IE_VADM_Engine::IE_VADM_Engine(InferenceEngine::CNNNetwork ie_network) + : IE_Backend_Engine(ie_network, "HDDL"), + m_orig_batch_size(ie_network.getBatchSize()) {} + +IE_VADM_Engine::~IE_VADM_Engine() {} + +void IE_VADM_Engine::infer( + std::vector>& inputs, + std::vector& input_names, + std::vector>& outputs, + std::vector& output_names, + std::vector>& hoisted_params, + std::vector& param_names) { + // Batch size is 0 and the number of requests is 1 when + // multi request execution is disabled. + int num_req = 1; + int batch_size = 0; + + if (m_multi_req_execution && inputs.size() == 1 && inputs[0] != nullptr && + inputs[0]->get_blob()->getTensorDesc().getDims().size() > 1) { + // Set the batch size per request and number of requests + batch_size = IE_Utils::GetInputBatchSize( + inputs[0]->get_blob()->getTensorDesc().getDims()[0], m_device); + num_req = inputs[0]->get_blob()->getTensorDesc().getDims()[0] / batch_size; + if (m_network.getBatchSize() != batch_size) + m_network.setBatchSize(batch_size); + } + + // Create requests + load_network(); + while (m_infer_reqs.size() < num_req) { + m_infer_reqs.push_back(m_exe_network.CreateInferRequest()); + } + std::vector in_blobs(inputs.size() * + num_req); + std::vector param_blobs( + hoisted_params.size()); + std::vector out_blobs(outputs.size() * + num_req); + // Prepare input blobs + for (int i = 0; i < inputs.size(); i++) { + if (inputs[i] == nullptr) continue; + InferenceEngine::TensorDesc desc = inputs[i]->get_blob()->getTensorDesc(); + InferenceEngine::Precision prec = desc.getPrecision(); + const void* input_data_pointer = inputs[i]->get_data_ptr(); + std::string input_name = input_names[i]; + size_t size = inputs[i]->get_blob()->byteSize(); + + InferenceEngine::SizeVector req_shape(desc.getDims()); + if (batch_size != 0) { + req_shape[0] = batch_size; + desc.setDims(req_shape); + } + for (int j = 0; j < num_req; j++) { + size_t req_size = size / num_req; + const void* data_ptr = + (void*)((uint64_t)(input_data_pointer) + req_size * j); + int in_idx = i * num_req + j; + IE_Utils::CreateBlob(desc, prec, data_ptr, req_size, in_blobs[in_idx]); + m_infer_reqs[j].SetBlob(input_name, in_blobs[in_idx]); + } + } + for (int i = 0; i < hoisted_params.size(); i++) { + if (hoisted_params[i] == nullptr) continue; + InferenceEngine::TensorDesc desc = + hoisted_params[i]->get_blob()->getTensorDesc(); + InferenceEngine::Precision prec = desc.getPrecision(); + const void* param_data_pointer = hoisted_params[i]->get_data_ptr(); + std::string param_name = param_names[i]; + size_t size = hoisted_params[i]->get_blob()->byteSize(); + + InferenceEngine::SizeVector req_shape(desc.getDims()); + IE_Utils::CreateBlob(desc, prec, param_data_pointer, size, param_blobs[i]); + for (int j = 0; j < num_req; j++) { + m_infer_reqs[j].SetBlob(param_name, param_blobs[i]); + } + } + + // Prepare output blobs + for (int i = 0; i < outputs.size(); i++) { + out_blobs[i] = nullptr; + if (outputs[i] != nullptr) { + InferenceEngine::TensorDesc desc = + outputs[i]->get_blob()->getTensorDesc(); + InferenceEngine::Precision prec = desc.getPrecision(); + InferenceEngine::Layout layout = desc.getLayout(); + const void* output_data_pointer = outputs[i]->get_data_ptr(); + std::string output_name = output_names[i]; + size_t size = outputs[i]->get_blob()->byteSize(); + + InferenceEngine::SizeVector req_shape(desc.getDims()); + if (batch_size != 0) { + req_shape[0] = batch_size; + desc.setDims(req_shape); + } + + InferenceEngine::TensorDesc req_desc(prec, req_shape, layout); + for (int j = 0; j < num_req; j++) { + size_t req_size = size / num_req; + const void* data_ptr = + (void*)((uint64_t)(output_data_pointer) + req_size * j); + int out_idx = i * num_req + j; + IE_Utils::CreateBlob(req_desc, prec, data_ptr, req_size, + out_blobs[out_idx]); + m_infer_reqs[j].SetBlob(output_name, out_blobs[out_idx]); + } + } + } + + // Start Inference Requests + for (int i = 0; i < num_req; i++) { + start_async_inference(i); + } + // Complete Inference Requests + for (int i = 0; i < num_req; i++) { + complete_async_inference(i); + } + + // Set dynamic output blobs + for (int i = 0; i < outputs.size(); i++) { + if (outputs[i] == nullptr) { + auto blob = InferenceEngine::as( + m_infer_reqs[0].GetBlob(output_names[i])); + outputs[i] = std::make_shared(blob); + } + } + + for (int i = 0; i < in_blobs.size(); i++) { + in_blobs[i]->deallocate(); + } + for (int i = 0; i < out_blobs.size(); i++) { + if (out_blobs[i] != nullptr) { + out_blobs[i]->deallocate(); + } + } + for (int i = 0; i < param_blobs.size(); i++) { + param_blobs[i]->deallocate(); + } +} +} +} diff --git a/ngraph_bridge/ie_vadm_engine.h b/ngraph_bridge/ie_vadm_engine.h new file mode 100644 index 000000000..b3851238d --- /dev/null +++ b/ngraph_bridge/ie_vadm_engine.h @@ -0,0 +1,58 @@ +/******************************************************************************* + * Copyright 2017-2020 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + +#ifndef IE_VADM_ENGINE_H_ +#define IE_VADM_ENGINE_H_ + +#include +#include +#include + +#include + +#include "ngraph_bridge/ie_backend_engine.h" + +namespace tensorflow { +namespace ngraph_bridge { + +class IE_VADM_Engine : public IE_Backend_Engine { + public: + IE_VADM_Engine(InferenceEngine::CNNNetwork ie_network); + ~IE_VADM_Engine(); + + // Executes the inference + virtual void infer(std::vector>& inputs, + std::vector& input_names, + std::vector>& outputs, + std::vector& output_names, + std::vector>& hoisted_params, + std::vector& param_names); + + virtual const std::vector get_output_shape(const int i) { + std::vector shape = m_func->get_results()[i]->get_shape(); + if (m_multi_req_execution && shape.size() > 1) { + shape[0] = m_orig_batch_size; + } + return shape; + }; + + private: + int m_orig_batch_size; +}; +} +} + +#endif // IE_VADM_ENGINE_H_ diff --git a/ngraph_bridge/kernels/ngraph_encapsulate_op.cc b/ngraph_bridge/kernels/ngraph_encapsulate_op.cc index 74f89580f..f05e23feb 100644 --- a/ngraph_bridge/kernels/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/kernels/ngraph_encapsulate_op.cc @@ -207,6 +207,13 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) { int time_func_create_or_lookup; Timer function_lookup_or_create; + bool multi_req_execution = false; + if (std::getenv("NGRAPH_TF_ENABLE_BATCHING") && + NGraphClusterManager::NumberOfClusters() == 1) { + NGRAPH_VLOG(2) << "Batching is enabled" << name(); + multi_req_execution = true; + } + // TF input tensor std::vector tf_input_tensors; std::shared_ptr ng_exec; @@ -278,7 +285,7 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) { } // Create the TF output tensor - auto ng_shape = ng_element->get_shape(); + auto ng_shape = ng_exec->get_output_shape(i); TensorShape tf_shape; for (auto dim : ng_shape) { tf_shape.AddDim(dim); @@ -315,7 +322,7 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) { << "NGraphEncapsulateOp::Compute call starting for cluster " << m_cluster_id; try { - ng_exec->call(ng_inputs, ng_outputs); + ng_exec->call(ng_inputs, ng_outputs, multi_req_execution); } catch (const std::exception& exp) { string status_string = "Caught exception while executing cluster " + to_string(m_cluster_id) + ": " + diff --git a/ngraph_bridge/ngraph_cluster_manager.cc b/ngraph_bridge/ngraph_cluster_manager.cc index 4555c3e6d..cb6aecc20 100644 --- a/ngraph_bridge/ngraph_cluster_manager.cc +++ b/ngraph_bridge/ngraph_cluster_manager.cc @@ -37,6 +37,10 @@ GraphDef* NGraphClusterManager::GetClusterGraph(size_t idx) { return idx < s_cluster_graphs.size() ? s_cluster_graphs[idx] : nullptr; } +size_t NGraphClusterManager::NumberOfClusters() { + return s_cluster_graphs.size(); +} + void NGraphClusterManager::EvictAllClusters() { s_cluster_graphs.clear(); } } // namespace ngraph_bridge diff --git a/ngraph_bridge/ngraph_cluster_manager.h b/ngraph_bridge/ngraph_cluster_manager.h index bcb41fa1b..9008521e5 100644 --- a/ngraph_bridge/ngraph_cluster_manager.h +++ b/ngraph_bridge/ngraph_cluster_manager.h @@ -29,6 +29,7 @@ class NGraphClusterManager { static size_t NewCluster(); static tensorflow::GraphDef* GetClusterGraph(size_t idx); static void EvictAllClusters(); + static size_t NumberOfClusters(); private: static std::vector s_cluster_graphs;