Samsung · zetwhite · Oct 10, 2024 · Oct 10, 2024 · Oct 10, 2024 · Oct 10, 2024
diff --git a/runtime/onert/backend/train/BackendContext.cc b/runtime/onert/backend/train/BackendContext.cc
@@ -179,9 +179,60 @@ FunctionMap BackendContext::gen()
   //   fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
   // }
 
+  planLayerScopeTensors(fn_map);
+  _tensor_builder->allocateLayerScope();
+
   return fn_map;
 }
 
+void BackendContext::planLayerScopeTensors(const FunctionMap &fn_map)
+{
+
+  const auto &ops = trainable_graph()->operations();
+
+  auto register_tensors = [this, &ops](ir::OperationIndex op_idx,
+                                       std::optional<LayerScopeTensors> &&tensors) {
+    if (not tensors.has_value())
+      return;
+
+    auto ls_tensors = tensors.value();
+    for (auto i = 0u; i < ls_tensors.size(); ++i)
+    {
+      LayerScopeTensorIndex tensor_idx(op_idx, i);
+      _tensor_builder->registerLayerScopeTensor(tensor_idx, ls_tensors[i]);
+
+      std::stringstream info;
+      info << op_idx << "_" << ops.at(op_idx).name();
+      VERBOSE() << "register (idx:" << tensor_idx << ") requested from " << info.str() << std::endl;
+    }
+    return;
+  };
+
+  for (auto &pair : fn_map)
+  {
+    auto &op_idx = pair.first;
+    auto &fn_seq = pair.second;
+
+    const ir::IOperation *op = &ops.at(op_idx);
+    const auto trainable_op = dynamic_cast<const ir::train::TrainableOperation *>(op);
+    assert(trainable_op != nullptr);
+
+    if (not trainable_op->isRequiredForBackward())
+      continue;
+
+    VERBOSE(LayerScopeTensor) << "register tensor for " << trainable_op->name() << std::endl;
+
+    fn_seq->iterate([&](exec::train::ITrainableFunction &fn) {
+      register_tensors(op_idx, (&fn)->registerLayerScopeTensors());
+    });
+  }
+
+  const auto ctx_data = data();
+  TensorPlanner tensor_planner{*ctx_data->tgraph.get(), ctx_data->external_operands};
+  tensor_planner.planLayerScopeTensors(_tensor_builder.get());
+  return;
+}
+
 void BackendContext::planForwardTensors()
 {
   const auto &tgraph = *trainable_graph();

diff --git a/runtime/onert/backend/train/BackendContext.h b/runtime/onert/backend/train/BackendContext.h
@@ -73,6 +73,7 @@ class BackendContext : public onert::backend::train::TrainableBackendContext
 private:
   void planForwardTensors();
   void planBackwardTensors();
+  void planLayerScopeTensors(const FunctionMap &fn_map);
 
 public:
   std::shared_ptr<ExternalContext> external_context() { return _external_context; }

diff --git a/runtime/onert/backend/train/TensorBuilder.cc b/runtime/onert/backend/train/TensorBuilder.cc
@@ -95,6 +95,27 @@ void TensorBuilder::registerDisposableBackwardTensorInfo(const DisposableTensorI
   _disposable_backprops.add(index);
 }
 
+void TensorBuilder::registerLayerScopeTensor(const LayerScopeTensorIndex &index,
+                                             std::shared_ptr<LayerScopeTensor> &tensor)
+{
+  const auto op_idx = index.op_index();
+
+  const auto pair = _operation_to_layerscope.find(op_idx);
+  if (pair == _operation_to_layerscope.end())
+  {
+    util::Set<LayerScopeTensorIndex> tensor_indices;
+    tensor_indices.add(index);
+    _operation_to_layerscope[op_idx] = tensor_indices;
+  }
+  else
+  {
+    assert(!pair->second.contains(index));
+    pair->second.add(index);
+  }
+
+  _tensor_reg->setLayerScopeTensor(index, tensor);
+}
+
 void TensorBuilder::notifyFirstUse(const ir::OperandIndex &index)
 {
   // TODO Support momory plan
@@ -155,6 +176,16 @@ void TensorBuilder::notifyDisposableBackPropLastUse(const DisposableTensorIndex
   _tensor_mgr->releaseDisposableBackPropPlan(index);
 }
 
+void TensorBuilder::notifyLayerScopeFirstUse(const LayerScopeTensorIndex &index)
+{
+  _tensor_mgr->claimLayerScopePlan(index);
+}
+
+void TensorBuilder::notifyLayerScopeLastUse(const LayerScopeTensorIndex &index)
+{
+  _tensor_mgr->releaseLayerScopePlan(index);
+}
+
 bool TensorBuilder::isRegistered(const ir::OperandIndex &index) const
 {
   return _tensor_info_map.find(index) != _tensor_info_map.end();
@@ -170,6 +201,29 @@ bool TensorBuilder::isRegisteredDisposableBackwardTensor(const DisposableTensorI
   return _disposable_backprops.contains(index);
 }
 
+bool TensorBuilder::isRegisteredLayerScopeTensor(const ir::OperationIndex &index) const
+{
+  const auto pair = _operation_to_layerscope.find(index);
+  return (pair != _operation_to_layerscope.end());
+}
+
+const util::Set<LayerScopeTensorIndex> &
+TensorBuilder::getRegisteredLayerScopeTensorIndex(const ir::OperationIndex &index) const
+{
+  const auto pair = _operation_to_layerscope.find(index);
+  assert(pair != _operation_to_layerscope.end());
+
+  return pair->second;
+}
+
+LayerScopeTensorLifeTime
+TensorBuilder::getLayerScopeTensorLifeTime(const LayerScopeTensorIndex &index) const
+{
+  const auto &ls_tensors = _tensor_reg->layerscope_tensors();
+  const auto &tensor = ls_tensors.at(index);
+  return tensor->lifetime();
+}
+
 void TensorBuilder::allocate(void)
 {
   _tensor_mgr->allocateNonConstTensors();
@@ -183,6 +237,8 @@ void TensorBuilder::allocateBackward(void)
   _tensor_mgr->allocateDisposableBackPropTensors();
 }
 
+void TensorBuilder::allocateLayerScope(void) { _tensor_mgr->allocateLayerScopeTensors(); }
+
 } // namespace train
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/backend/train/TensorBuilder.h b/runtime/onert/backend/train/TensorBuilder.h
@@ -18,10 +18,12 @@
 #define __ONERT_BACKEND_TRAIN_TENSOR_BUILDER_H__
 
 #include "DisposableTensorIndex.h"
+#include "LayerScopeTensorIndex.h"
 #include "TensorManager.h"
 #include "TensorRegistry.h"
 #include "util/Set.h"
 
+#include <ir/OperationIndexMap.h>
 #include <exec/train/optimizer/Optimizer.h>
 
 namespace onert
@@ -55,20 +57,31 @@ class TensorBuilder
   void registerDisposableBackwardTensorInfo(const DisposableTensorIndex &index,
                                             const ir::OperandInfo &info);
 
+  void registerLayerScopeTensor(const LayerScopeTensorIndex &index,
+                                std::shared_ptr<LayerScopeTensor> &info);
+
   // TODO Support memory plan of all tensors
   void notifyFirstUse(const ir::OperandIndex &);
   void notifyLastUse(const ir::OperandIndex &);
   void notifyBackwardFirstUse(const ir::OperandIndex &);
   void notifyBackwardLastUse(const ir::OperandIndex &);
   void notifyDisposableBackPropFirstUse(const DisposableTensorIndex &);
   void notifyDisposableBackPropLastUse(const DisposableTensorIndex &);
+  void notifyLayerScopeFirstUse(const LayerScopeTensorIndex &);
+  void notifyLayerScopeLastUse(const LayerScopeTensorIndex &);
 
   bool isRegistered(const ir::OperandIndex &) const;
   bool isRegisteredBackward(const ir::OperandIndex &) const;
   bool isRegisteredDisposableBackwardTensor(const DisposableTensorIndex &index) const;
+  bool isRegisteredLayerScopeTensor(const ir::OperationIndex &) const;
+
+  const util::Set<LayerScopeTensorIndex> &
+  getRegisteredLayerScopeTensorIndex(const ir::OperationIndex &) const;
+  LayerScopeTensorLifeTime getLayerScopeTensorLifeTime(const LayerScopeTensorIndex &) const;
 
   void allocate(void);
   void allocateBackward(void);
+  void allocateLayerScope(void); // <- this have to called after
 
 private:
   const std::shared_ptr<TensorRegistry> _tensor_reg;
@@ -77,6 +90,7 @@ class TensorBuilder
   ir::OperandIndexMap<ir::OperandInfo> _backward_tensor_info_map;
   ir::OperandIndexMap<bool> _as_constants;
   util::Set<DisposableTensorIndex> _disposable_backprops;
+  ir::OperationIndexMap<util::Set<LayerScopeTensorIndex>> _operation_to_layerscope;
   const exec::train::optimizer::Optimizer *_optimizer;
 };
 

diff --git a/runtime/onert/backend/train/TensorPlanner.cc b/runtime/onert/backend/train/TensorPlanner.cc
@@ -519,6 +519,48 @@ ir::OperandIndexSequence TensorPlanner::getOutgoingBackPropSeq(const ir::Operati
   return ret;
 }
 
+void TensorPlanner::planLayerScopeTensors(TensorBuilder *tensor_builder)
+{
+  // forwading order
+  const auto f_order = _tgraph.topolSortOperations();
+  for (const auto &op_index : f_order)
+  {
+    if (not tensor_builder->isRegisteredLayerScopeTensor(op_index))
+      continue;
+
+    auto indices = tensor_builder->getRegisteredLayerScopeTensorIndex(op_index);
+    for (const auto &idx : indices)
+    {
+      const auto lt = tensor_builder->getLayerScopeTensorLifeTime(idx);
+      if (lt == LayerScopeTensorLifeTime::FORWARD_TO_BACKWARD)
+        tensor_builder->notifyLayerScopeFirstUse(idx);
+    }
+  }
+
+  // backwarding order
+  const auto b_order = _tgraph.essentialBackwardOrder();
+  for (const auto &op_index : b_order)
+  {
+    if (not tensor_builder->isRegisteredLayerScopeTensor(op_index))
+      continue;
+
+    auto indices = tensor_builder->getRegisteredLayerScopeTensorIndex(op_index);
+    for (const auto &idx : indices)
+    {
+      const auto lt = tensor_builder->getLayerScopeTensorLifeTime(idx);
+      if (lt == LayerScopeTensorLifeTime::BACKWARD)
+        tensor_builder->notifyLayerScopeFirstUse(idx);
+    }
+    for (const auto &idx : indices)
+    {
+      const auto lt = tensor_builder->getLayerScopeTensorLifeTime(idx);
+      if (lt == LayerScopeTensorLifeTime::FORWARD_TO_BACKWARD ||
+          lt == LayerScopeTensorLifeTime::BACKWARD)
+        tensor_builder->notifyLayerScopeLastUse(idx);
+    }
+  }
+}
+
 } // namespace train
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/backend/train/TensorPlanner.h b/runtime/onert/backend/train/TensorPlanner.h
@@ -45,6 +45,7 @@ class TensorPlanner
   void planBackPropTensors(TensorBuilder *tensor_builder);
   void planGradientTensors(TensorBuilder *tensor_builder);
   void planDisposableBackPropTensors(TensorBuilder *tensor_builder);
+  void planLayerScopeTensors(TensorBuilder *tensor_builder);
 
 private:
   ir::OperandIndexSequence getOutgoingBackPropSeq(const ir::OperationIndex &op_index,

diff --git a/runtime/onert/backend/train/ops/BinaryArithmeticLayer.cc b/runtime/onert/backend/train/ops/BinaryArithmeticLayer.cc
@@ -55,11 +55,22 @@ void BinaryArithmeticLayer::configureBackward(IPortableTensor *back_prop_lhs,
 
   if (activation != ir::Activation::NONE)
   {
-    _act_back_prop_output = std::make_unique<Tensor>(_output->get_info());
-    _act_back_prop_output->setBuffer(std::make_shared<basic::Allocator>(_output->total_size()));
+    _act_back_prop_output = std::make_shared<LayerScopeTensor>(_back_prop_output->get_info());
   }
 }
 
+std::optional<LayerScopeTensors> BinaryArithmeticLayer::registerLayerScopeTensors()
+{
+  LayerScopeTensors tensors;
+
+  if (_act_back_prop_output != nullptr)
+  {
+    tensors.push_back(_act_back_prop_output);
+  }
+
+  return std::optional<LayerScopeTensors>(tensors);
+}
+
 void BinaryArithmeticLayer::forward(bool) { cpu::ops::BinaryArithmeticLayer::run(); }
 
 void BinaryArithmeticLayer::backward()

diff --git a/runtime/onert/backend/train/ops/BinaryArithmeticLayer.h b/runtime/onert/backend/train/ops/BinaryArithmeticLayer.h
@@ -50,6 +50,7 @@ class BinaryArithmeticLayer : public ::onert::exec::train::ITrainableFunction,
   void configureBackward(IPortableTensor *back_prop_lhs, IPortableTensor *back_prop_rhs,
                          const IPortableTensor *back_prop_output, const ir::Activation activation,
                          const ArithmeticType arithmetic_type);
+  std::optional<LayerScopeTensors> registerLayerScopeTensors() override;
   void forward(bool training) override;
   void backward() override;
 
@@ -60,7 +61,7 @@ class BinaryArithmeticLayer : public ::onert::exec::train::ITrainableFunction,
 
   ArithmeticType _arithmetic_type;
   ir::Activation _activation;
-  std::unique_ptr<BackPropTensor> _act_back_prop_output;
+  std::shared_ptr<LayerScopeTensor> _act_back_prop_output;
 };
 
 } // namespace ops

diff --git a/runtime/onert/backend/train/ops/ConvolutionLayer.cc b/runtime/onert/backend/train/ops/ConvolutionLayer.cc
@@ -31,7 +31,7 @@ namespace
 using namespace onert;
 
 template <typename Tensor>
-std::unique_ptr<Tensor> createTransposedWeights(const backend::IPortableTensor *origin_weights)
+std::shared_ptr<Tensor> createTransposedWeights(const backend::IPortableTensor *origin_weights)
 {
   const auto &origin_shape = origin_weights->getShape();
   assert(origin_shape.rank() == 4);
@@ -42,7 +42,7 @@ std::unique_ptr<Tensor> createTransposedWeights(const backend::IPortableTensor *
     ir::Shape{origin_shape.dim(1), origin_shape.dim(2), origin_shape.dim(3), origin_shape.dim(0)};
   transposed_info.shape(transposed_shape);
 
-  return std::make_unique<Tensor>(transposed_info);
+  return std::make_shared<Tensor>(transposed_info);
 }
 
 } // namespace
@@ -79,27 +79,31 @@ void ConvolutionLayer::configureBackward(const IPortableTensor *weights,
   if (_dilationHeightFactor != 1 || _dilationWidthFactor != 1)
     throw std::runtime_error("train ConvolutionLayer: Unsupported dilation yet");
 
-  // TODO Optimize transposed tensors
-  _transposed_weights = createTransposedWeights<Tensor>(weights);
-  _transposed_weights->setBuffer(
-    std::make_shared<basic::Allocator>(_transposed_weights->total_size()));
+  _transposed_weights = createTransposedWeights<LayerScopeTensor>(weights);
 
-  _conv_back_prop_output = std::make_unique<BackPropTensor>(back_prop_output->get_info());
-  _conv_back_prop_output->setBuffer(
-    std::make_shared<basic::Allocator>(_conv_back_prop_output->total_size()));
+  _conv_back_prop_output = std::make_shared<LayerScopeTensor>(back_prop_output->get_info());
 
-  _transposed_grad_weights = createTransposedWeights<GradientTensor>(weights);
-  _transposed_grad_weights->setBuffer(
-    std::make_shared<basic::Allocator>(_transposed_grad_weights->total_size()));
+  _transposed_grad_weights = createTransposedWeights<LayerScopeTensor>(weights);
 
   if (activation != ir::Activation::NONE)
   {
-    _act_back_prop_output = std::make_unique<BackPropTensor>(_back_prop_output->get_info());
-    _act_back_prop_output->setBuffer(
-      std::make_shared<basic::Allocator>(_act_back_prop_output->total_size()));
+    _act_back_prop_output = std::make_unique<LayerScopeTensor>(_back_prop_output->get_info());
   }
 }
 
+std::optional<LayerScopeTensors> ConvolutionLayer::registerLayerScopeTensors()
+{
+  LayerScopeTensors tensors = {_transposed_weights, _conv_back_prop_output,
+                               _transposed_grad_weights};
+
+  if (_act_back_prop_output != nullptr)
+  {
+    tensors.push_back(_act_back_prop_output);
+  }
+
+  return std::optional<LayerScopeTensors>(tensors);
+}
+
 void ConvolutionLayer::forward(bool) { cpu::ops::ConvolutionLayer::run(); }
 void ConvolutionLayer::backward()
 {

diff --git a/runtime/onert/backend/train/ops/ConvolutionLayer.h b/runtime/onert/backend/train/ops/ConvolutionLayer.h
@@ -41,6 +41,7 @@ class ConvolutionLayer : public ::onert::exec::train::ITrainableFunction,
   void configureBackward(const IPortableTensor *weights, IPortableTensor *back_prop_input,
                          IPortableTensor *grad_weights, IPortableTensor *grad_bias,
                          const IPortableTensor *back_prop_output, const ir::Activation activation);
+  std::optional<LayerScopeTensors> registerLayerScopeTensors() override;
   void forward(bool training) override;
   void backward() override;
 
@@ -54,10 +55,10 @@ class ConvolutionLayer : public ::onert::exec::train::ITrainableFunction,
   const IPortableTensor *_back_prop_output;
 
   // TODO Consider if these tensors should be built in TensorBuilder
-  std::unique_ptr<Tensor> _transposed_weights;
-  std::unique_ptr<BackPropTensor> _conv_back_prop_output;
-  std::unique_ptr<BackPropTensor> _act_back_prop_output;
-  std::unique_ptr<GradientTensor> _transposed_grad_weights;
+  std::shared_ptr<LayerScopeTensor> _transposed_weights;
+  std::shared_ptr<LayerScopeTensor> _conv_back_prop_output;
+  std::shared_ptr<LayerScopeTensor> _transposed_grad_weights;
+  std::shared_ptr<LayerScopeTensor> _act_back_prop_output;
 };
 
 } // namespace ops