diff --git a/ngraph_bridge/CMakeLists.txt b/ngraph_bridge/CMakeLists.txt
index bd78abf0d..dd149bb79 100644
--- a/ngraph_bridge/CMakeLists.txt
+++ b/ngraph_bridge/CMakeLists.txt
@@ -49,6 +49,9 @@ set(SRC
    tf_graphcycles.cc
    tf_deadness_analysis.cc
    version.cc
+   ie_backend_engine.cc
+   ie_basic_engine.cc
+   ie_vadm_engine.cc
 )
 
 message(STATUS "NGRAPH_TF_USE_GRAPPLER_OPTIMIZER: ${NGRAPH_TF_USE_GRAPPLER_OPTIMIZER}")
diff --git a/ngraph_bridge/executable.cc b/ngraph_bridge/executable.cc
index e9c9da63d..dce744ab9 100644
--- a/ngraph_bridge/executable.cc
+++ b/ngraph_bridge/executable.cc
@@ -14,15 +14,18 @@
 // limitations under the License.
 //*****************************************************************************
 
+#include <ie_plugin_config.hpp>
+
 #include "ngraph/ngraph.hpp"
 #include "ngraph/opsets/opset.hpp"
 
-#include <ie_plugin_config.hpp>
-
 #include "logging/ngraph_log.h"
 #include "ngraph_bridge/default_opset.h"
 #include "ngraph_bridge/executable.h"
+#include "ngraph_bridge/ie_basic_engine.h"
 #include "ngraph_bridge/ie_tensor.h"
+#include "ngraph_bridge/ie_utils.h"
+#include "ngraph_bridge/ie_vadm_engine.h"
 #include "ngraph_bridge/ngraph_utils.h"
 
 using namespace std;
@@ -138,22 +141,25 @@ Executable::Executable(shared_ptr<Function> func, string device)
         "ie_" + m_device + "_" + name;
   }
 
-  NGRAPH_VLOG(2) << "Loading IE CNN network to device " << m_device;
-
-  // Load network to the plugin (m_device) and create an infer request
-  InferenceEngine::ExecutableNetwork exe_network =
-      ie.LoadNetwork(m_network, m_device, options);
-  m_infer_req = exe_network.CreateInferRequest();
+  NGRAPH_VLOG(2) << "Creating IE Execution Engine";
+  if (m_device == "HDDL") {
+    m_ie_engine = make_shared<IE_VADM_Engine>(m_network);
+  } else {
+    m_ie_engine = make_shared<IE_Basic_Engine>(m_network, m_device);
+  }
 }
 
 bool Executable::call(const vector<shared_ptr<runtime::Tensor>>& inputs,
-                      vector<shared_ptr<runtime::Tensor>>& outputs) {
+                      vector<shared_ptr<runtime::Tensor>>& outputs,
+                      bool multi_req_execution) {
   if (m_trivial_fn) {
     NGRAPH_VLOG(2) << "Calling trivial IE function with inputs="
                    << inputs.size() << " outputs=" << outputs.size();
     return call_trivial(inputs, outputs);
   }
 
+  shared_ptr<ngraph::Function> func = m_ie_engine->get_func();
+
   // Check if the number of inputs that the CNN network expects is equal to the
   // sum of the
   // inputs specified and the inputs we hoisted, if any.
@@ -166,7 +172,8 @@ bool Executable::call(const vector<shared_ptr<runtime::Tensor>>& inputs,
   }
 
   //  Prepare input blobs
-  auto func = m_network.getFunction();
+  std::vector<std::shared_ptr<IETensor>> ie_inputs(inputs.size());
+  std::vector<std::string> input_names(inputs.size());
   auto parameters = func->get_parameters();
   int j = 0;
   for (int i = 0; i < inputs.size(); i++) {
@@ -179,18 +186,23 @@ bool Executable::call(const vector<shared_ptr<runtime::Tensor>>& inputs,
       NGRAPH_VLOG(1) << "Skipping unused input " << input_name;
       continue;
     }
-    shared_ptr<IETensor> tv = static_pointer_cast<IETensor>(inputs[i]);
-    m_infer_req.SetBlob(input_name, tv->get_blob());
+    ie_inputs[i] = nullptr;
+    ie_inputs[i] = static_pointer_cast<IETensor>(inputs[i]);
+    input_names[i] = input_name;
   }
 
+  std::vector<std::shared_ptr<IETensor>> ie_hoisted_params(
+      m_hoisted_params.size());
+  std::vector<std::string> param_names(m_hoisted_params.size());
   for (const auto& it : m_hoisted_params) {
     auto input_name = it.first;
     if (input_info.find(input_name) == input_info.end()) {
       NGRAPH_VLOG(1) << "Skipping unused hoisted param " << input_name;
       continue;
     }
-    shared_ptr<IETensor> tv = static_pointer_cast<IETensor>(it.second);
-    m_infer_req.SetBlob(input_name, tv->get_blob());
+    ie_hoisted_params[j] = nullptr;
+    ie_hoisted_params[j] = static_pointer_cast<IETensor>(it.second);
+    param_names[j++] = input_name;
   }
 
   InferenceEngine::OutputsDataMap output_info = m_network.getOutputsInfo();
@@ -213,22 +225,26 @@ bool Executable::call(const vector<shared_ptr<runtime::Tensor>>& inputs,
 
   //  Prepare output blobs
   auto results = func->get_results();
+  std::vector<std::shared_ptr<IETensor>> ie_outputs(outputs.size());
+  std::vector<std::string> output_names(outputs.size());
   for (int i = 0; i < results.size(); i++) {
     if (outputs[i] != nullptr) {
-      NGRAPH_VLOG(4) << "Executable::call() SetBlob()";
-      shared_ptr<IETensor> tv = static_pointer_cast<IETensor>(outputs[i]);
-      m_infer_req.SetBlob(get_output_name(results[i]), tv->get_blob());
+      ie_outputs[i] = static_pointer_cast<IETensor>(outputs[i]);
     }
+    output_names[i] = get_output_name(results[i]);
+  }
+
+  if (multi_req_execution) {
+    m_ie_engine->enable_multi_req_execution();
   }
 
-  m_infer_req.Infer();
+  m_ie_engine->infer(ie_inputs, input_names, ie_outputs, output_names,
+                     ie_hoisted_params, param_names);
 
   // Set dynamic output blobs
   for (int i = 0; i < results.size(); i++) {
     if (outputs[i] == nullptr) {
-      NGRAPH_VLOG(4) << "Executable::call() GetBlob()";
-      auto blob = m_infer_req.GetBlob(get_output_name(results[i]));
-      outputs[i] = make_shared<IETensor>(blob);
+      outputs[i] = ie_outputs[i];
     }
   }
 
diff --git a/ngraph_bridge/executable.h b/ngraph_bridge/executable.h
index 124233a5b..962a90c3f 100644
--- a/ngraph_bridge/executable.h
+++ b/ngraph_bridge/executable.h
@@ -21,7 +21,9 @@
 #include <vector>
 
 #include <ie_core.hpp>
+
 #include "ngraph/ngraph.hpp"
+#include "ngraph_bridge/ie_backend_engine.h"
 
 using namespace std;
 
@@ -35,12 +37,21 @@ class Executable {
   Executable(shared_ptr<ngraph::Function> func, string device);
   ~Executable() {}
   bool call(const vector<shared_ptr<ngraph::runtime::Tensor>>& inputs,
-            vector<shared_ptr<ngraph::runtime::Tensor>>& outputs);
+            vector<shared_ptr<ngraph::runtime::Tensor>>& outputs,
+            bool multi_req_execution = false);
 
   const ngraph::ResultVector& get_results() {
     return m_function->get_results();
   };
 
+  const vector<size_t> get_output_shape(const int i) {
+    if (m_trivial_fn) {
+      return get_results()[i]->get_shape();
+    } else {
+      return m_ie_engine->get_output_shape(i);
+    }
+  }
+
  private:
   bool call_trivial(const vector<shared_ptr<ngraph::runtime::Tensor>>& inputs,
                     vector<shared_ptr<ngraph::runtime::Tensor>>& outputs);
@@ -56,6 +67,7 @@ class Executable {
   shared_ptr<ngraph::Function> m_trivial_fn;
   // This is the original nGraph function corresponding to this executable
   shared_ptr<ngraph::Function> m_function;
+  shared_ptr<IE_Backend_Engine> m_ie_engine;
 };
 }
 }
diff --git a/ngraph_bridge/ie_backend_engine.cc b/ngraph_bridge/ie_backend_engine.cc
new file mode 100644
index 000000000..b9325d750
--- /dev/null
+++ b/ngraph_bridge/ie_backend_engine.cc
@@ -0,0 +1,104 @@
+/*******************************************************************************
+ * Copyright 2017-2020 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *******************************************************************************/
+
+#include <iostream>
+
+#include "ngraph_bridge/ie_backend_engine.h"
+#include "ngraph_bridge/ie_utils.h"
+
+namespace tensorflow {
+namespace ngraph_bridge {
+
+IE_Backend_Engine::IE_Backend_Engine(InferenceEngine::CNNNetwork ie_network,
+                                     std::string device)
+    : m_network(ie_network),
+      m_func(ie_network.getFunction()),
+      m_device(device),
+      m_multi_req_execution(false),
+      m_network_ready(false) {
+  if (std::getenv("NGRAPH_TF_DUMP_GRAPHS")) {
+    auto& name = m_network.getName();
+    m_network.serialize(name + ".xml", name + ".bin");
+  }
+}
+
+IE_Backend_Engine::~IE_Backend_Engine() {}
+
+void IE_Backend_Engine::load_network() {
+  if (m_network_ready) return;
+
+  std::map<std::string, std::string> config;
+
+  if (m_device == "MYRIAD") {
+    // Set MYRIAD configurations
+    if (IE_Utils::VPUConfigEnabled()) {
+      config["MYRIAD_DETECT_NETWORK_BATCH"] = "NO";
+    }
+
+    if (IE_Utils::VPUFastCompileEnabled()) {
+      config["MYRIAD_HW_INJECT_STAGES"] = "NO";
+      config["MYRIAD_COPY_OPTIMIZATION"] = "NO";
+    }
+  }
+
+  InferenceEngine::Core ie;
+  // Load network to the plugin (m_device)
+  m_exe_network = ie.LoadNetwork(m_network, m_device, config);
+  m_network_ready = true;
+}
+
+void IE_Backend_Engine::start_async_inference(const int req_id) {
+  // Start Async inference
+  try {
+    m_infer_reqs[req_id].StartAsync();
+  } catch (InferenceEngine::details::InferenceEngineException e) {
+    THROW_IE_EXCEPTION << "Couldn't start Inference: ";
+  } catch (...) {
+    THROW_IE_EXCEPTION << "Couldn't start Inference: ";
+  }
+}
+
+void IE_Backend_Engine::complete_async_inference(const int req_id) {
+  // Wait for Async inference completion
+  try {
+    m_infer_reqs[req_id].Wait(
+        InferenceEngine::IInferRequest::WaitMode::RESULT_READY);
+  } catch (InferenceEngine::details::InferenceEngineException e) {
+    THROW_IE_EXCEPTION << " Exception with completing Inference: ";
+  } catch (...) {
+    THROW_IE_EXCEPTION << " Exception with completing Inference: ";
+  }
+}
+
+size_t IE_Backend_Engine::get_output_batch_size(size_t inputBatchSize) const {
+  return m_network.getBatchSize() *
+         IE_Utils::GetNumRequests(inputBatchSize, m_device);
+}
+
+// Enables multi request execution if the execution engine supprts
+void IE_Backend_Engine::enable_multi_req_execution() {
+  m_multi_req_execution = true;
+}
+// Disables multi request execution
+void IE_Backend_Engine::disable_multi_req_execution() {
+  m_multi_req_execution = false;
+}
+
+std::shared_ptr<ngraph::Function> IE_Backend_Engine::get_func() {
+  return m_func;
+}
+}
+}
diff --git a/ngraph_bridge/ie_backend_engine.h b/ngraph_bridge/ie_backend_engine.h
new file mode 100644
index 000000000..d8724389a
--- /dev/null
+++ b/ngraph_bridge/ie_backend_engine.h
@@ -0,0 +1,74 @@
+/*******************************************************************************
+ * Copyright 2017-2020 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *******************************************************************************/
+
+#ifndef IE_BACKEND_ENGINE_H_
+#define IE_BACKEND_ENGINE_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <ie_core.hpp>
+
+#include "ngraph_bridge/ie_tensor.h"
+
+namespace tensorflow {
+namespace ngraph_bridge {
+
+class IE_Backend_Engine {
+ public:
+  IE_Backend_Engine(InferenceEngine::CNNNetwork ie_network, std::string device);
+  ~IE_Backend_Engine();
+
+  // Executes the inference
+  virtual void infer(std::vector<std::shared_ptr<IETensor>>& inputs,
+                     std::vector<std::string>& input_names,
+                     std::vector<std::shared_ptr<IETensor>>& outputs,
+                     std::vector<std::string>& output_names,
+                     std::vector<std::shared_ptr<IETensor>>& hoisted_params,
+                     std::vector<std::string>& param_names) = 0;
+
+  // Returns output batch size based on the input batch size and the device
+  // FIXME: This may not be needed
+  virtual size_t get_output_batch_size(size_t inputBatchSize) const;
+
+  // Enables multi request execution if the execution engine supprts
+  void enable_multi_req_execution();
+  // Disables multi request execution
+  void disable_multi_req_execution();
+
+  // Returns the NGraph Function from the CNNNetwork
+  std::shared_ptr<ngraph::Function> get_func();
+
+  virtual const std::vector<size_t> get_output_shape(const int i) = 0;
+
+ protected:
+  InferenceEngine::CNNNetwork m_network;
+  std::shared_ptr<ngraph::Function> m_func;
+  std::vector<InferenceEngine::InferRequest> m_infer_reqs;
+  std::string m_device;
+  bool m_multi_req_execution;
+  InferenceEngine::ExecutableNetwork m_exe_network;
+  bool m_network_ready;
+
+  virtual void start_async_inference(const int req_id);
+  virtual void complete_async_inference(const int req_id);
+  virtual void load_network();
+};
+}
+}
+
+#endif  // IE_BACKEND_ENGINE_H_
diff --git a/ngraph_bridge/ie_basic_engine.cc b/ngraph_bridge/ie_basic_engine.cc
new file mode 100644
index 000000000..f5db5d114
--- /dev/null
+++ b/ngraph_bridge/ie_basic_engine.cc
@@ -0,0 +1,80 @@
+/*******************************************************************************
+ * Copyright 2017-2020 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *******************************************************************************/
+
+#include <iostream>
+
+#include "logging/ngraph_log.h"
+#include "ngraph_bridge/ie_basic_engine.h"
+#include "ngraph_bridge/ie_utils.h"
+
+namespace tensorflow {
+namespace ngraph_bridge {
+
+IE_Basic_Engine::IE_Basic_Engine(InferenceEngine::CNNNetwork ie_network,
+                                 std::string device)
+    : IE_Backend_Engine(ie_network, device) {}
+
+IE_Basic_Engine::~IE_Basic_Engine() {}
+
+void IE_Basic_Engine::infer(
+    std::vector<std::shared_ptr<IETensor>>& inputs,
+    std::vector<std::string>& input_names,
+    std::vector<std::shared_ptr<IETensor>>& outputs,
+    std::vector<std::string>& output_names,
+    std::vector<std::shared_ptr<IETensor>>& hoisted_params,
+    std::vector<std::string>& param_names) {
+  load_network();
+  if (m_infer_reqs.empty()) {
+    m_infer_reqs.push_back(m_exe_network.CreateInferRequest());
+  }
+
+  //  Prepare input blobs
+  auto func = m_network.getFunction();
+  auto parameters = func->get_parameters();
+  for (int i = 0; i < inputs.size(); i++) {
+    if (inputs[i] != nullptr)
+      m_infer_reqs[0].SetBlob(input_names[i], inputs[i]->get_blob());
+  }
+
+  for (int i = 0; i < hoisted_params.size(); i++) {
+    if (hoisted_params[i] != nullptr)
+      m_infer_reqs[0].SetBlob(param_names[i], hoisted_params[i]->get_blob());
+  }
+
+  //  Prepare output blobs
+  auto results = func->get_results();
+  for (int i = 0; i < results.size(); i++) {
+    if (outputs[i] != nullptr) {
+      NGRAPH_VLOG(4) << "Executable::call() SetBlob()";
+      m_infer_reqs[0].SetBlob(output_names[i], outputs[i]->get_blob());
+    }
+  }
+
+  m_infer_reqs[0].Infer();
+
+  // Set dynamic output blobs
+  for (int i = 0; i < results.size(); i++) {
+    if (outputs[i] == nullptr) {
+      NGRAPH_VLOG(4) << "Executable::call() GetBlob()";
+      auto blob = m_infer_reqs[0].GetBlob(output_names[i]);
+      outputs[i] = std::make_shared<IETensor>(blob);
+    }
+  }
+
+  // return true;
+}
+}
+}
diff --git a/ngraph_bridge/ie_basic_engine.h b/ngraph_bridge/ie_basic_engine.h
new file mode 100644
index 000000000..d6dc295e6
--- /dev/null
+++ b/ngraph_bridge/ie_basic_engine.h
@@ -0,0 +1,51 @@
+/*******************************************************************************
+ * Copyright 2017-2020 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *******************************************************************************/
+
+#ifndef IE_BASIC_ENGINE_H_
+#define IE_BASIC_ENGINE_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <ie_core.hpp>
+
+#include "ngraph_bridge/ie_backend_engine.h"
+
+namespace tensorflow {
+namespace ngraph_bridge {
+
+class IE_Basic_Engine : public IE_Backend_Engine {
+ public:
+  IE_Basic_Engine(InferenceEngine::CNNNetwork ie_network, std::string device);
+  ~IE_Basic_Engine();
+
+  // Executes the inference
+  virtual void infer(std::vector<std::shared_ptr<IETensor>>& inputs,
+                     std::vector<std::string>& input_names,
+                     std::vector<std::shared_ptr<IETensor>>& outputs,
+                     std::vector<std::string>& output_names,
+                     std::vector<std::shared_ptr<IETensor>>& hoisted_params,
+                     std::vector<std::string>& param_names);
+
+  virtual const std::vector<size_t> get_output_shape(const int i) {
+    return m_func->get_results()[i]->get_shape();
+  };
+};
+}
+}
+
+#endif  // IE_BASIC_ENGINE_H_
diff --git a/ngraph_bridge/ie_utils.h b/ngraph_bridge/ie_utils.h
new file mode 100644
index 000000000..4ca82aeae
--- /dev/null
+++ b/ngraph_bridge/ie_utils.h
@@ -0,0 +1,109 @@
+/*******************************************************************************
+ * Copyright 2017-2020 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *******************************************************************************/
+
+// The backend manager class is a singelton class that interfaces with the
+// bridge to provide necessary backend
+
+#ifndef IE_UTILS_H_
+#define IE_UTILS_H_
+
+#include <atomic>
+#include <mutex>
+#include <ostream>
+#include <vector>
+
+#include <ie_core.hpp>
+
+class IE_Utils {
+ public:
+  // Returns the maxiumum number of requests based on the device.
+  // TODO: The number of requests are hardcoded temporarly.
+  // This should dynamically look at the underlying architecture
+  // and compute the best performing number of requests.
+  static size_t GetMaxReq(std::string device) {
+    int max_req = 1;
+    if (device == "HDDL") max_req = 8;
+    return max_req;
+  }
+
+  // Computes the input batch size per request best on the actual input batch
+  // size and the device.
+  static size_t GetInputBatchSize(size_t inputBatchSize, std::string device) {
+    int max_req = IE_Utils::GetMaxReq(device);
+    return ((inputBatchSize + max_req - 1) / max_req);
+  }
+
+  // Gets the actual number of requests
+  static size_t GetNumRequests(size_t inputBatchSize, std::string device) {
+    return inputBatchSize / GetInputBatchSize(inputBatchSize, device);
+  }
+
+  static bool VPUConfigEnabled() { return true; }
+
+  static bool VPUFastCompileEnabled() { return true; }
+
+  // Creates a MemoryBlob for InferenceEngine
+  static void CreateBlob(InferenceEngine::TensorDesc& desc,
+                         InferenceEngine::Precision& precision,
+                         const void* data_ptr, size_t byte_size,
+                         InferenceEngine::MemoryBlob::Ptr& blob_ptr) {
+#define MAKE_IE_BLOB(type_, desc_, ptr_, size_)                         \
+  do {                                                                  \
+    if (ptr_ == nullptr) {                                              \
+      blob_ptr = std::make_shared<InferenceEngine::TBlob<type_>>(desc); \
+      blob_ptr->allocate();                                             \
+    } else {                                                            \
+      blob_ptr = std::make_shared<InferenceEngine::TBlob<type_>>(       \
+          desc, (type_*)ptr_, size_);                                   \
+    }                                                                   \
+  } while (0)
+    switch (precision) {
+      case InferenceEngine::Precision::FP32:
+        MAKE_IE_BLOB(float, desc, (float*)data_ptr, byte_size);
+        break;
+      case InferenceEngine::Precision::U8:
+        MAKE_IE_BLOB(uint8_t, desc, (uint8_t*)data_ptr, byte_size);
+        break;
+      case InferenceEngine::Precision::I8:
+        MAKE_IE_BLOB(int8_t, desc, (int8_t*)data_ptr, byte_size);
+        break;
+      case InferenceEngine::Precision::U16:
+        MAKE_IE_BLOB(uint16_t, desc, (uint16_t*)data_ptr, byte_size);
+        break;
+      case InferenceEngine::Precision::I16:
+        MAKE_IE_BLOB(int16_t, desc, (int16_t*)data_ptr, byte_size);
+        break;
+      case InferenceEngine::Precision::I32:
+        MAKE_IE_BLOB(int32_t, desc, (int32_t*)data_ptr, byte_size);
+        break;
+      case InferenceEngine::Precision::U64:
+        MAKE_IE_BLOB(uint64_t, desc, (uint64_t*)data_ptr, byte_size);
+        break;
+      case InferenceEngine::Precision::I64:
+        MAKE_IE_BLOB(int64_t, desc, (int64_t*)data_ptr, byte_size);
+        break;
+      case InferenceEngine::Precision::BOOL:
+        MAKE_IE_BLOB(uint8_t, desc, (uint8_t*)data_ptr, byte_size);
+        break;
+      default:
+        THROW_IE_EXCEPTION << "Can't create IE blob for type "
+                           << precision.name();
+    }
+  }
+};
+
+#endif
+// IE_UTILS_H
diff --git a/ngraph_bridge/ie_vadm_engine.cc b/ngraph_bridge/ie_vadm_engine.cc
new file mode 100644
index 000000000..b2b35371e
--- /dev/null
+++ b/ngraph_bridge/ie_vadm_engine.cc
@@ -0,0 +1,165 @@
+/*******************************************************************************
+ * Copyright 2017-2020 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *******************************************************************************/
+
+#include <iostream>
+
+#include "ngraph_bridge/ie_utils.h"
+#include "ngraph_bridge/ie_vadm_engine.h"
+
+namespace tensorflow {
+namespace ngraph_bridge {
+
+IE_VADM_Engine::IE_VADM_Engine(InferenceEngine::CNNNetwork ie_network)
+    : IE_Backend_Engine(ie_network, "HDDL"),
+      m_orig_batch_size(ie_network.getBatchSize()) {}
+
+IE_VADM_Engine::~IE_VADM_Engine() {}
+
+void IE_VADM_Engine::infer(
+    std::vector<std::shared_ptr<IETensor>>& inputs,
+    std::vector<std::string>& input_names,
+    std::vector<std::shared_ptr<IETensor>>& outputs,
+    std::vector<std::string>& output_names,
+    std::vector<std::shared_ptr<IETensor>>& hoisted_params,
+    std::vector<std::string>& param_names) {
+  // Batch size is 0 and the number of requests is 1 when
+  // multi request execution is disabled.
+  int num_req = 1;
+  int batch_size = 0;
+
+  if (m_multi_req_execution && inputs.size() == 1 && inputs[0] != nullptr &&
+      inputs[0]->get_blob()->getTensorDesc().getDims().size() > 1) {
+    // Set the batch size per request and number of requests
+    batch_size = IE_Utils::GetInputBatchSize(
+        inputs[0]->get_blob()->getTensorDesc().getDims()[0], m_device);
+    num_req = inputs[0]->get_blob()->getTensorDesc().getDims()[0] / batch_size;
+    if (m_network.getBatchSize() != batch_size)
+      m_network.setBatchSize(batch_size);
+  }
+
+  // Create requests
+  load_network();
+  while (m_infer_reqs.size() < num_req) {
+    m_infer_reqs.push_back(m_exe_network.CreateInferRequest());
+  }
+  std::vector<InferenceEngine::MemoryBlob::Ptr> in_blobs(inputs.size() *
+                                                         num_req);
+  std::vector<InferenceEngine::MemoryBlob::Ptr> param_blobs(
+      hoisted_params.size());
+  std::vector<InferenceEngine::MemoryBlob::Ptr> out_blobs(outputs.size() *
+                                                          num_req);
+  //  Prepare input blobs
+  for (int i = 0; i < inputs.size(); i++) {
+    if (inputs[i] == nullptr) continue;
+    InferenceEngine::TensorDesc desc = inputs[i]->get_blob()->getTensorDesc();
+    InferenceEngine::Precision prec = desc.getPrecision();
+    const void* input_data_pointer = inputs[i]->get_data_ptr();
+    std::string input_name = input_names[i];
+    size_t size = inputs[i]->get_blob()->byteSize();
+
+    InferenceEngine::SizeVector req_shape(desc.getDims());
+    if (batch_size != 0) {
+      req_shape[0] = batch_size;
+      desc.setDims(req_shape);
+    }
+    for (int j = 0; j < num_req; j++) {
+      size_t req_size = size / num_req;
+      const void* data_ptr =
+          (void*)((uint64_t)(input_data_pointer) + req_size * j);
+      int in_idx = i * num_req + j;
+      IE_Utils::CreateBlob(desc, prec, data_ptr, req_size, in_blobs[in_idx]);
+      m_infer_reqs[j].SetBlob(input_name, in_blobs[in_idx]);
+    }
+  }
+  for (int i = 0; i < hoisted_params.size(); i++) {
+    if (hoisted_params[i] == nullptr) continue;
+    InferenceEngine::TensorDesc desc =
+        hoisted_params[i]->get_blob()->getTensorDesc();
+    InferenceEngine::Precision prec = desc.getPrecision();
+    const void* param_data_pointer = hoisted_params[i]->get_data_ptr();
+    std::string param_name = param_names[i];
+    size_t size = hoisted_params[i]->get_blob()->byteSize();
+
+    InferenceEngine::SizeVector req_shape(desc.getDims());
+    IE_Utils::CreateBlob(desc, prec, param_data_pointer, size, param_blobs[i]);
+    for (int j = 0; j < num_req; j++) {
+      m_infer_reqs[j].SetBlob(param_name, param_blobs[i]);
+    }
+  }
+
+  // Prepare output blobs
+  for (int i = 0; i < outputs.size(); i++) {
+    out_blobs[i] = nullptr;
+    if (outputs[i] != nullptr) {
+      InferenceEngine::TensorDesc desc =
+          outputs[i]->get_blob()->getTensorDesc();
+      InferenceEngine::Precision prec = desc.getPrecision();
+      InferenceEngine::Layout layout = desc.getLayout();
+      const void* output_data_pointer = outputs[i]->get_data_ptr();
+      std::string output_name = output_names[i];
+      size_t size = outputs[i]->get_blob()->byteSize();
+
+      InferenceEngine::SizeVector req_shape(desc.getDims());
+      if (batch_size != 0) {
+        req_shape[0] = batch_size;
+        desc.setDims(req_shape);
+      }
+
+      InferenceEngine::TensorDesc req_desc(prec, req_shape, layout);
+      for (int j = 0; j < num_req; j++) {
+        size_t req_size = size / num_req;
+        const void* data_ptr =
+            (void*)((uint64_t)(output_data_pointer) + req_size * j);
+        int out_idx = i * num_req + j;
+        IE_Utils::CreateBlob(req_desc, prec, data_ptr, req_size,
+                             out_blobs[out_idx]);
+        m_infer_reqs[j].SetBlob(output_name, out_blobs[out_idx]);
+      }
+    }
+  }
+
+  // Start Inference Requests
+  for (int i = 0; i < num_req; i++) {
+    start_async_inference(i);
+  }
+  // Complete Inference Requests
+  for (int i = 0; i < num_req; i++) {
+    complete_async_inference(i);
+  }
+
+  // Set dynamic output blobs
+  for (int i = 0; i < outputs.size(); i++) {
+    if (outputs[i] == nullptr) {
+      auto blob = InferenceEngine::as<InferenceEngine::MemoryBlob>(
+          m_infer_reqs[0].GetBlob(output_names[i]));
+      outputs[i] = std::make_shared<IETensor>(blob);
+    }
+  }
+
+  for (int i = 0; i < in_blobs.size(); i++) {
+    in_blobs[i]->deallocate();
+  }
+  for (int i = 0; i < out_blobs.size(); i++) {
+    if (out_blobs[i] != nullptr) {
+      out_blobs[i]->deallocate();
+    }
+  }
+  for (int i = 0; i < param_blobs.size(); i++) {
+    param_blobs[i]->deallocate();
+  }
+}
+}
+}
diff --git a/ngraph_bridge/ie_vadm_engine.h b/ngraph_bridge/ie_vadm_engine.h
new file mode 100644
index 000000000..b3851238d
--- /dev/null
+++ b/ngraph_bridge/ie_vadm_engine.h
@@ -0,0 +1,58 @@
+/*******************************************************************************
+ * Copyright 2017-2020 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *******************************************************************************/
+
+#ifndef IE_VADM_ENGINE_H_
+#define IE_VADM_ENGINE_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <ie_core.hpp>
+
+#include "ngraph_bridge/ie_backend_engine.h"
+
+namespace tensorflow {
+namespace ngraph_bridge {
+
+class IE_VADM_Engine : public IE_Backend_Engine {
+ public:
+  IE_VADM_Engine(InferenceEngine::CNNNetwork ie_network);
+  ~IE_VADM_Engine();
+
+  // Executes the inference
+  virtual void infer(std::vector<std::shared_ptr<IETensor>>& inputs,
+                     std::vector<std::string>& input_names,
+                     std::vector<std::shared_ptr<IETensor>>& outputs,
+                     std::vector<std::string>& output_names,
+                     std::vector<std::shared_ptr<IETensor>>& hoisted_params,
+                     std::vector<std::string>& param_names);
+
+  virtual const std::vector<size_t> get_output_shape(const int i) {
+    std::vector<size_t> shape = m_func->get_results()[i]->get_shape();
+    if (m_multi_req_execution && shape.size() > 1) {
+      shape[0] = m_orig_batch_size;
+    }
+    return shape;
+  };
+
+ private:
+  int m_orig_batch_size;
+};
+}
+}
+
+#endif  // IE_VADM_ENGINE_H_
diff --git a/ngraph_bridge/kernels/ngraph_encapsulate_op.cc b/ngraph_bridge/kernels/ngraph_encapsulate_op.cc
index 74f89580f..f05e23feb 100644
--- a/ngraph_bridge/kernels/ngraph_encapsulate_op.cc
+++ b/ngraph_bridge/kernels/ngraph_encapsulate_op.cc
@@ -207,6 +207,13 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) {
   int time_func_create_or_lookup;
   Timer function_lookup_or_create;
 
+  bool multi_req_execution = false;
+  if (std::getenv("NGRAPH_TF_ENABLE_BATCHING") &&
+      NGraphClusterManager::NumberOfClusters() == 1) {
+    NGRAPH_VLOG(2) << "Batching is enabled" << name();
+    multi_req_execution = true;
+  }
+
   // TF input tensor
   std::vector<Tensor> tf_input_tensors;
   std::shared_ptr<Executable> ng_exec;
@@ -278,7 +285,7 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) {
     }
 
     // Create the TF output tensor
-    auto ng_shape = ng_element->get_shape();
+    auto ng_shape = ng_exec->get_output_shape(i);
     TensorShape tf_shape;
     for (auto dim : ng_shape) {
       tf_shape.AddDim(dim);
@@ -315,7 +322,7 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) {
           << "NGraphEncapsulateOp::Compute call starting for cluster "
           << m_cluster_id;
       try {
-        ng_exec->call(ng_inputs, ng_outputs);
+        ng_exec->call(ng_inputs, ng_outputs, multi_req_execution);
       } catch (const std::exception& exp) {
         string status_string = "Caught exception while executing cluster " +
                                to_string(m_cluster_id) + ": " +
diff --git a/ngraph_bridge/ngraph_cluster_manager.cc b/ngraph_bridge/ngraph_cluster_manager.cc
index 4555c3e6d..cb6aecc20 100644
--- a/ngraph_bridge/ngraph_cluster_manager.cc
+++ b/ngraph_bridge/ngraph_cluster_manager.cc
@@ -37,6 +37,10 @@ GraphDef* NGraphClusterManager::GetClusterGraph(size_t idx) {
   return idx < s_cluster_graphs.size() ? s_cluster_graphs[idx] : nullptr;
 }
 
+size_t NGraphClusterManager::NumberOfClusters() {
+  return s_cluster_graphs.size();
+}
+
 void NGraphClusterManager::EvictAllClusters() { s_cluster_graphs.clear(); }
 
 }  // namespace ngraph_bridge
diff --git a/ngraph_bridge/ngraph_cluster_manager.h b/ngraph_bridge/ngraph_cluster_manager.h
index bcb41fa1b..9008521e5 100644
--- a/ngraph_bridge/ngraph_cluster_manager.h
+++ b/ngraph_bridge/ngraph_cluster_manager.h
@@ -29,6 +29,7 @@ class NGraphClusterManager {
   static size_t NewCluster();
   static tensorflow::GraphDef* GetClusterGraph(size_t idx);
   static void EvictAllClusters();
+  static size_t NumberOfClusters();
 
  private:
   static std::vector<tensorflow::GraphDef*> s_cluster_graphs;