tensorflow · cavusmustafa · Dec 17, 2020 · Dec 17, 2020 · Dec 18, 2020 · kanvi-nervana
diff --git a/ngraph_bridge/CMakeLists.txt b/ngraph_bridge/CMakeLists.txt
@@ -49,6 +49,9 @@ set(SRC
    tf_graphcycles.cc
    tf_deadness_analysis.cc
    version.cc
+   ie_backend_engine.cc
+   ie_basic_engine.cc
+   ie_vadm_engine.cc
 )
 
 message(STATUS "NGRAPH_TF_USE_GRAPPLER_OPTIMIZER: ${NGRAPH_TF_USE_GRAPPLER_OPTIMIZER}")

diff --git a/ngraph_bridge/executable.cc b/ngraph_bridge/executable.cc
@@ -22,7 +22,10 @@
 #include "logging/ngraph_log.h"
 #include "ngraph_bridge/default_opset.h"
 #include "ngraph_bridge/executable.h"
+#include "ngraph_bridge/ie_basic_engine.h"
 #include "ngraph_bridge/ie_tensor.h"
+#include "ngraph_bridge/ie_utils.h"
+#include "ngraph_bridge/ie_vadm_engine.h"
 #include "ngraph_bridge/ngraph_utils.h"
 
 using namespace std;
@@ -138,22 +141,25 @@ Executable::Executable(shared_ptr<Function> func, string device)
         "ie_" + m_device + "_" + name;
   }
 
-  NGRAPH_VLOG(2) << "Loading IE CNN network to device " << m_device;
-
-  // Load network to the plugin (m_device) and create an infer request
-  InferenceEngine::ExecutableNetwork exe_network =
-      ie.LoadNetwork(m_network, m_device, options);
-  m_infer_req = exe_network.CreateInferRequest();
+  NGRAPH_VLOG(2) << "Creating IE Execution Engine";
+  if (m_device == "HDDL") {
+    m_ie_engine = make_shared<IE_VADM_Engine>(m_network);
+  } else {
+    m_ie_engine = make_shared<IE_Basic_Engine>(m_network, m_device);
+  }
 }
 
 bool Executable::call(const vector<shared_ptr<runtime::Tensor>>& inputs,
-                      vector<shared_ptr<runtime::Tensor>>& outputs) {
+                      vector<shared_ptr<runtime::Tensor>>& outputs,
+                      bool multi_req_execution) {
   if (m_trivial_fn) {
     NGRAPH_VLOG(2) << "Calling trivial IE function with inputs="
                    << inputs.size() << " outputs=" << outputs.size();
     return call_trivial(inputs, outputs);
   }
 
+  shared_ptr<ngraph::Function> func = m_ie_engine->get_func();
+
   // Check if the number of inputs that the CNN network expects is equal to the
   // sum of the
   // inputs specified and the inputs we hoisted, if any.
@@ -166,7 +172,8 @@ bool Executable::call(const vector<shared_ptr<runtime::Tensor>>& inputs,
   }
 
   //  Prepare input blobs
-  auto func = m_network.getFunction();
+  std::vector<std::shared_ptr<IETensor>> ie_inputs(inputs.size());
+  std::vector<std::string> input_names(inputs.size());
   auto parameters = func->get_parameters();
   int j = 0;
   for (int i = 0; i < inputs.size(); i++) {
@@ -179,18 +186,23 @@ bool Executable::call(const vector<shared_ptr<runtime::Tensor>>& inputs,
       NGRAPH_VLOG(1) << "Skipping unused input " << input_name;
       continue;
     }
-    shared_ptr<IETensor> tv = static_pointer_cast<IETensor>(inputs[i]);
-    m_infer_req.SetBlob(input_name, tv->get_blob());
+    ie_inputs[i] = nullptr;
+    ie_inputs[i] = static_pointer_cast<IETensor>(inputs[i]);
+    input_names[i] = input_name;
   }
 
+  std::vector<std::shared_ptr<IETensor>> ie_hoisted_params(
+      m_hoisted_params.size());
+  std::vector<std::string> param_names(m_hoisted_params.size());
   for (const auto& it : m_hoisted_params) {
     auto input_name = it.first;
     if (input_info.find(input_name) == input_info.end()) {
       NGRAPH_VLOG(1) << "Skipping unused hoisted param " << input_name;
       continue;
     }
-    shared_ptr<IETensor> tv = static_pointer_cast<IETensor>(it.second);
-    m_infer_req.SetBlob(input_name, tv->get_blob());
+    ie_hoisted_params[j] = nullptr;
+    ie_hoisted_params[j] = static_pointer_cast<IETensor>(it.second);
+    param_names[j++] = input_name;
   }
 
   InferenceEngine::OutputsDataMap output_info = m_network.getOutputsInfo();
@@ -213,22 +225,29 @@ bool Executable::call(const vector<shared_ptr<runtime::Tensor>>& inputs,
 
   //  Prepare output blobs
   auto results = func->get_results();
+  std::vector<std::shared_ptr<IETensor>> ie_outputs(outputs.size());
+  std::vector<std::string> output_names(outputs.size());
   for (int i = 0; i < results.size(); i++) {
     if (outputs[i] != nullptr) {
-      NGRAPH_VLOG(4) << "Executable::call() SetBlob()";
-      shared_ptr<IETensor> tv = static_pointer_cast<IETensor>(outputs[i]);
-      m_infer_req.SetBlob(get_output_name(results[i]), tv->get_blob());
+      ie_outputs[i] = static_pointer_cast<IETensor>(outputs[i]);
     }
+    output_names[i] = get_output_name(results[i]);
+  }
+
+  if (multi_req_execution) {
+    m_ie_engine->enable_multi_req_execution();
+  } else {
+    m_ie_engine->disable_multi_req_execution();
   }
 
-  m_infer_req.Infer();
+  m_ie_engine->infer(ie_inputs, input_names, ie_outputs, output_names,
+                     ie_hoisted_params, param_names);
 
   // Set dynamic output blobs
   for (int i = 0; i < results.size(); i++) {
     if (outputs[i] == nullptr) {
-      NGRAPH_VLOG(4) << "Executable::call() GetBlob()";
-      auto blob = m_infer_req.GetBlob(get_output_name(results[i]));
-      outputs[i] = make_shared<IETensor>(blob);
+      // NGRAPH_VLOG(4) << "Executable::call() GetBlob()";
+      outputs[i] = ie_outputs[i];
     }
   }
 

diff --git a/ngraph_bridge/executable.h b/ngraph_bridge/executable.h
@@ -22,6 +22,7 @@
 
 #include <ie_core.hpp>
 #include "ngraph/ngraph.hpp"
+#include "ngraph_bridge/ie_backend_engine.h"
 
 using namespace std;
 
@@ -35,12 +36,21 @@ class Executable {
   Executable(shared_ptr<ngraph::Function> func, string device);
   ~Executable() {}
   bool call(const vector<shared_ptr<ngraph::runtime::Tensor>>& inputs,
-            vector<shared_ptr<ngraph::runtime::Tensor>>& outputs);
+            vector<shared_ptr<ngraph::runtime::Tensor>>& outputs,
+            bool multi_req_execution = false);
 
   const ngraph::ResultVector& get_results() {
     return m_function->get_results();
   };
 
+  const vector<size_t> get_output_shape(const int i) {
+    if (m_trivial_fn) {
+      return get_results()[i]->get_shape();
+    } else {
+      return m_ie_engine->get_output_shape(i);
+    }
+  }
+
  private:
   bool call_trivial(const vector<shared_ptr<ngraph::runtime::Tensor>>& inputs,
                     vector<shared_ptr<ngraph::runtime::Tensor>>& outputs);
@@ -56,6 +66,7 @@ class Executable {
   shared_ptr<ngraph::Function> m_trivial_fn;
   // This is the original nGraph function corresponding to this executable
   shared_ptr<ngraph::Function> m_function;
+  shared_ptr<IE_Backend_Engine> m_ie_engine;
 };
 }
 }
diff --git a/ngraph_bridge/ie_backend_engine.cc b/ngraph_bridge/ie_backend_engine.cc
@@ -0,0 +1,103 @@
+/*******************************************************************************
+ * Copyright 2017-2020 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *******************************************************************************/
+
+#include "ngraph_bridge/ie_backend_engine.h"
+#include <iostream>
+#include "ngraph_bridge/ie_utils.h"
+
+namespace tensorflow {
+namespace ngraph_bridge {
+
+IE_Backend_Engine::IE_Backend_Engine(InferenceEngine::CNNNetwork ie_network,
+                                     std::string device)
+    : m_network(ie_network),
+      m_func(ie_network.getFunction()),
+      m_device(device),
+      m_multi_req_execution(false),
+      m_network_ready(false) {
+  if (std::getenv("NGRAPH_TF_DUMP_GRAPHS")) {
+    auto& name = m_network.getName();
+    m_network.serialize(name + ".xml", name + ".bin");
+  }
+}
+
+IE_Backend_Engine::~IE_Backend_Engine() {}
+
+void IE_Backend_Engine::load_network() {
+  if (m_network_ready) return;
+
+  std::map<std::string, std::string> config;
+
+  if (m_device == "MYRIAD") {
+    // Set MYRIAD configurations
+    if (IE_Utils::VPUConfigEnabled()) {
+      config["MYRIAD_DETECT_NETWORK_BATCH"] = "NO";
+    }
+
+    if (IE_Utils::VPUFastCompileEnabled()) {
+      config["MYRIAD_HW_INJECT_STAGES"] = "NO";
+      config["MYRIAD_COPY_OPTIMIZATION"] = "NO";
+    }
+  }
+
+  InferenceEngine::Core ie;
+  // Load network to the plugin (m_device)
+  m_exe_network = ie.LoadNetwork(m_network, m_device, config);
+  m_network_ready = true;
+}
+
+void IE_Backend_Engine::start_async_inference(const int req_id) {
+  // Start Async inference
+  try {
+    m_infer_reqs[req_id].StartAsync();
+  } catch (InferenceEngine::details::InferenceEngineException e) {
+    THROW_IE_EXCEPTION << "Couldn't start Inference: ";
+  } catch (...) {
+    THROW_IE_EXCEPTION << "Couldn't start Inference: ";
+  }
+}
+
+void IE_Backend_Engine::complete_async_inference(const int req_id) {
+  // Wait for Async inference completion
+  try {
+    m_infer_reqs[req_id].Wait(
+        InferenceEngine::IInferRequest::WaitMode::RESULT_READY);
+  } catch (InferenceEngine::details::InferenceEngineException e) {
+    THROW_IE_EXCEPTION << " Exception with completing Inference: ";
+  } catch (...) {
+    THROW_IE_EXCEPTION << " Exception with completing Inference: ";
+  }
+}
+
+size_t IE_Backend_Engine::getOutputBatchSize(size_t inputBatchSize) const {
+  return m_network.getBatchSize() *
+         IE_Utils::GetNumRequests(inputBatchSize, m_device);
+}
+
+// Enables multi request execution if the execution engine supprts
+void IE_Backend_Engine::enable_multi_req_execution() {
+  m_multi_req_execution = true;
+}
+// Disables multi request execution
+void IE_Backend_Engine::disable_multi_req_execution() {
+  m_multi_req_execution = false;
+}
+
+std::shared_ptr<ngraph::Function> IE_Backend_Engine::get_func() {
+  return m_func;
+}
+}
+}
diff --git a/ngraph_bridge/ie_backend_engine.h b/ngraph_bridge/ie_backend_engine.h
@@ -0,0 +1,72 @@
+/*******************************************************************************
+ * Copyright 2017-2020 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *******************************************************************************/
+
+#ifndef IE_BACKEND_ENGINE_H_
+#define IE_BACKEND_ENGINE_H_
+
+#include <ie_core.hpp>
+#include <memory>
+#include <string>
+#include <vector>
+#include "ngraph_bridge/ie_tensor.h"
+
+namespace tensorflow {
+namespace ngraph_bridge {
+
+class IE_Backend_Engine {
+ public:
+  IE_Backend_Engine(InferenceEngine::CNNNetwork ie_network, std::string device);
+  ~IE_Backend_Engine();
+
+  // Executes the inference
+  virtual void infer(std::vector<std::shared_ptr<IETensor>>& inputs,
+                     std::vector<std::string>& input_names,
+                     std::vector<std::shared_ptr<IETensor>>& outputs,
+                     std::vector<std::string>& output_names,
+                     std::vector<std::shared_ptr<IETensor>>& hoisted_params,
+                     std::vector<std::string>& param_names) = 0;
+
+  // Returns output batch size based on the input batch size and the device
+  // FIXME: This may not be needed
+  virtual size_t getOutputBatchSize(size_t inputBatchSize) const;
+
+  // Enables multi request execution if the execution engine supprts
+  void enable_multi_req_execution();
+  // Disables multi request execution
+  void disable_multi_req_execution();
+
+  // Returns the NGraph Function from the CNNNetwork
+  std::shared_ptr<ngraph::Function> get_func();
+
+  virtual const std::vector<size_t> get_output_shape(const int i) = 0;
+
+ protected:
+  InferenceEngine::CNNNetwork m_network;
+  std::shared_ptr<ngraph::Function> m_func;
+  std::vector<InferenceEngine::InferRequest> m_infer_reqs;
+  std::string m_device;
+  bool m_multi_req_execution;
+  InferenceEngine::ExecutableNetwork m_exe_network;
+  bool m_network_ready;
+
+  virtual void start_async_inference(const int req_id);
+  virtual void complete_async_inference(const int req_id);
+  virtual void load_network();
+};
+}
+}
+
+#endif  // IE_BACKEND_ENGINE_H_