Skip to content

Commit

Permalink
add more operators
Browse files Browse the repository at this point in the history
  • Loading branch information
zetwhite committed Jul 29, 2024
1 parent 9c63187 commit f8dcd0e
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 43 deletions.
77 changes: 64 additions & 13 deletions runtime/onert/backend/train/ops/DepthwiseConvolutionLayer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,11 @@ void DepthwiseConvolutionLayer::configureBackward(IPortableTensor *back_prop_inp

if (activation != ir::Activation::NONE)
{
/*
_act_back_prop_output = std::make_unique<BackPropTensor>(_back_prop_output->get_info());
_act_back_prop_output->setBuffer(
std::make_shared<basic::Allocator>(_act_back_prop_output->total_size()));
*/
}

const int64_t k_packet_size = [&]() {
Expand All @@ -74,20 +76,20 @@ void DepthwiseConvolutionLayer::configureBackward(IPortableTensor *back_prop_inp
}();

const auto incoming_shape = getShape(_back_prop_output);
const auto filter_shape = getShape(_kernel);
const int batch = incoming_shape.Dims(0);
// const auto filter_shape = getShape(_kernel);
// const int batch = incoming_shape.Dims(0);
const int out_depth = incoming_shape.Dims(3);
const int filter_rows = filter_shape.Dims(1);
const int filter_cols = filter_shape.Dims(2);
// const int filter_rows = filter_shape.Dims(1);
// const int filter_cols = filter_shape.Dims(2);

const int filter_spatial_size = filter_rows * filter_cols;
const int padded_filter_inner_dim_size =
((out_depth + k_packet_size - 1) / k_packet_size) * k_packet_size;
// const int filter_spatial_size = filter_rows * filter_cols;
// const int padded_filter_inner_dim_size =
// ((out_depth + k_packet_size - 1) / k_packet_size) * k_packet_size;

_use_padded_filter = (out_depth % k_packet_size) == 0 ? false : true;

// prepare padded_filter buffer for cker
auto padded_filter_info = ir::OperandInfo(_kernel->get_info());
/* auto padded_filter_info = ir::OperandInfo(_kernel->get_info());
padded_filter_info.shape({batch, filter_spatial_size, padded_filter_inner_dim_size});
_padded_filter = std::make_unique<Tensor>(padded_filter_info);
_padded_filter->setBuffer(std::make_shared<basic::Allocator>(_padded_filter->total_size()));
Expand All @@ -105,6 +107,55 @@ void DepthwiseConvolutionLayer::configureBackward(IPortableTensor *back_prop_inp
_filter_dim_buffers = std::make_unique<Tensor>(filter_dim_buffers_info);
_filter_dim_buffers->setBuffer(
std::make_shared<basic::Allocator>(_filter_dim_buffers->total_size()));
*/
}

ExtraTensorRequests requestExtraTensors()
{
ExtraTensorRequests reqs;

reqs.push_back(ExtraTensorRequest::createRequestLike(_back_prop_output, &_act_back_prop_output));

const auto incoming_shape = getShape(_back_prop_output);
const auto batch = incoming_shape.Dims(0);
const auto depth = incoming_shape.Dims(3);

// const auto filter_shape = getShape(_kernel);
const int filter_rows = filter_shape.Dims(1);
const int filter_cols = filter_shape.Dims(2);
const int filter_spatial_size = filter_rows * filter_cols;

const auto k_packet_size = _dconv_kernel->kPacketSize<float>();
const int padded_filter_inner_dim_size =
((depth + k_packet_size - 1) / k_packet_size) * k_packet_size;

const int thread_count = _dconv_kernel->getThreadCount();

// _padded_filter
{
auto type_info = _kernel->get_info().typeInfo();
ir::Shape shape({batch, filter_spatial_size, padded_filter_inner_dim_size});
auto info = ir::OperandInfo::createStaticInfo(shape, type_info)
reqs.emplace_back(info, ExtraTensorLifeTime::BACKWARD, &_padded_filter);
}

// _filter_buffers
{
auto type_info = _kernel->get_info().typeInfo();
ir::Shape shape({thread_count, filter_spatial_size, padded_filter_inner_dim_size});
auto info = ir::OperandIndex::createStaticInfo(shape, type_info);
reqs.emplace_back(info, ExtraTensorLifeTime::BACKWARD, &_filter_buffers);
}

// _filter_dim_buffers
{
auto type = _back_prop_input->get_info().typeInfo();
ir::Shape shape({thread_count, padded_filter_inner_dim_size});
auto info = ir::OperandIndex::createStaticInfo(shape, type);
reqs.emplace_back(info, ExtraTensorLifeTime::BACKWARD, &_filter_dim_buffers);
}

return reqs;
}

void DepthwiseConvolutionLayer::forward(bool) { cpu::ops::DepthwiseConvolutionLayer::run(); }
Expand Down Expand Up @@ -133,7 +184,7 @@ void DepthwiseConvolutionLayer::backwardFloat32()
try
{
backprop_act =
backpropActivation(_activation, _output, _back_prop_output, _act_back_prop_output.get());
backpropActivation(_activation, _output, _back_prop_output, _act_back_prop_output);
}
catch (const std::exception &e)
{
Expand All @@ -151,15 +202,15 @@ void DepthwiseConvolutionLayer::backwardFloat32()
// Calculate gradient for input
_dconv_kernel->backpropInput(
dconv_params, getShape(backprop_act), getBuffer<float>(backprop_act), getShape(_kernel),
getBuffer<float>(_kernel), getBuffer<float>(_padded_filter.get()), getShape(_back_prop_input),
getBuffer<float>(_back_prop_input), _use_padded_filter, getBuffer<float>(_filter_buffers.get()),
getBuffer<float>(_filter_dim_buffers.get()));
getBuffer<float>(_kernel), getBuffer<float>(_padded_filter), getShape(_back_prop_input),
getBuffer<float>(_back_prop_input), _use_padded_filter, getBuffer<float>(_filter_buffers),
getBuffer<float>(_filter_dim_buffers));

// Calculate gradient for weights
_dconv_kernel->backpropFilter(
dconv_params, getShape(backprop_act), getBuffer<float>(backprop_act), getShape(_input),
getBuffer<float>(_input), getShape(_grad_weights), getBuffer<float>(_grad_weights),
getBuffer<float>(_padded_filter.get()), getBuffer<float>(_filter_buffers.get()));
getBuffer<float>(_padded_filter), getBuffer<float>(_filter_buffers));

// Calculate gradient for bias
if (_bias)
Expand Down
11 changes: 6 additions & 5 deletions runtime/onert/backend/train/ops/DepthwiseConvolutionLayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ class DepthwiseConvolutionLayer : public ::onert::exec::train::ITrainableFunctio
void configureBackward(IPortableTensor *back_prop_input, IPortableTensor *grad_weights,
IPortableTensor *grad_bias, const IPortableTensor *back_prop_output,
const ir::Activation activation);

ExtraTensorRequests requestExtraTensors() override;
void forward(bool training) override;
void backward() override;

Expand All @@ -55,12 +57,11 @@ class DepthwiseConvolutionLayer : public ::onert::exec::train::ITrainableFunctio
IPortableTensor *_back_prop_input;
const IPortableTensor *_back_prop_output;

// TODO Consider if these tensors should be built in TensorBuilder
std::unique_ptr<BackPropTensor> _act_back_prop_output;
ExtraTensor *_act_back_prop_output;
bool _use_padded_filter;
std::unique_ptr<Tensor> _padded_filter;
std::unique_ptr<Tensor> _filter_buffers;
std::unique_ptr<Tensor> _filter_dim_buffers;
ExtraTensor *_padded_filter;
ExtraTensor *_filter_buffers;
ExtraTensor *_filter_dim_buffers;

std::unique_ptr<nnfw::cker::train::DepthwiseConv> _dconv_kernel;
};
Expand Down
51 changes: 31 additions & 20 deletions runtime/onert/backend/train/ops/FullyConnectedLayer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,24 @@ namespace

using namespace onert;

std::unique_ptr<backend::train::Tensor>
createTransposedTensor(const backend::IPortableTensor *origin_tensor)
ir::OperandInfo transposeOperandInfo(const ir::OperandInfo &origin_info)
{
const auto &origin_shape = origin_tensor->getShape();
const auto &origin_shape = origin_info.shape();
assert(origin_shape.rank() == 2);

auto transposed_info = origin_tensor->get_info();
auto transposed_info = ir::OperandInfo(origin_info);
auto transposed_shape = ir::Shape{origin_shape.dim(1), origin_shape.dim(0)};
transposed_info.shape(transposed_shape);

return std::make_unique<backend::train::Tensor>(transposed_info);
return transposed_info;
}

backend::train::ExtraTensorRequest
createTransposeTenosrRequest(const backend::IPortableTensor *origin,
backend::train::ExtraTensor **const addr)
{
return backend::train::ExtraTensorRequest(transposeOperandInfo(origin->get_info()),
backend::train::ExtraTensorLifeTime::BACKWARD, addr);
}

} // namespace
Expand Down Expand Up @@ -85,21 +92,25 @@ void FullyConnectedLayer::configureBackward(
throw std::runtime_error{
"train FullyConnectedLayer: Input other ranks than 2 are not supported."};

_transposed_weights = createTransposedTensor(weights);
_transposed_weights->setBuffer(std::make_shared<basic::Allocator>(weights->total_size()));
if (activation != ir::Activation::NONE)
{
}
}

_transposed_input = createTransposedTensor(input);
_transposed_input->setBuffer(std::make_shared<basic::Allocator>(input->total_size()));
ExtraTensorRequests requestExtraTensors()
{
ExtraTensorRequests reqs;

_transposed_back_prop_output = createTransposedTensor(back_prop_output);
_transposed_back_prop_output->setBuffer(
std::make_shared<basic::Allocator>(back_prop_output->total_size()));
reqs.push_back(createTransposeTenosrRequest(_weights, &_transposed_weights));

if (activation != ir::Activation::NONE)
reqs.push_back(createTransposeTenosrRequest(_input, &_transposed_input));

reqs.push_back(createTransposeTenosrRequest(_back_prop_output, &_transposed_back_prop_output));

if (_activation != ir::Activation::NONE)
{
_act_back_prop_output = std::make_unique<Tensor>(_back_prop_output->get_info());
_act_back_prop_output->setBuffer(
std::make_shared<basic::Allocator>(_back_prop_output->total_size()));
reqs.push_back(
ExtraTensorRequest::createRequestLike(_back_prop_output, &_act_back_prop_output));
}
}

Expand Down Expand Up @@ -130,7 +141,7 @@ void FullyConnectedLayer::backwardFloat32()
try
{
backprop_act =
backpropActivation(_activation, _output, _back_prop_output, _act_back_prop_output.get());
backpropActivation(_activation, _output, _back_prop_output, _act_back_prop_output);
}
catch (const std::exception &e)
{
Expand All @@ -157,7 +168,7 @@ void FullyConnectedLayer::backwardFloat32()

// Transpose and compute gradient for input
// ∂L/∂X = fc(Incoming gradient, transposed W)
auto transposed_weights = _transposed_weights.get();
auto transposed_weights = _transposed_weights;
assert(transposed_weights->getShape().rank() == 2);
nnfw::cker::Transpose(transpose_param, getShape(_weights), getBuffer<float>(_weights),
getShape(transposed_weights), getBuffer<float>(transposed_weights));
Expand All @@ -169,12 +180,12 @@ void FullyConnectedLayer::backwardFloat32()

// Transpose and compute gradient for weights
// ∂L/∂W = fc(transposed incomming gradient, transposed X)
auto transposed_input = _transposed_input.get();
auto transposed_input = _transposed_input;
assert(transposed_input->getShape().rank() == 2);
nnfw::cker::Transpose(transpose_param, getShape(_input), getBuffer<float>(_input),
getShape(transposed_input), getBuffer<float>(transposed_input));

auto transposed_back_prop_output = _transposed_back_prop_output.get();
auto transposed_back_prop_output = _transposed_back_prop_output;
assert(transposed_back_prop_output->getShape().rank() == 2);
nnfw::cker::Transpose(transpose_param, getShape(backprop_act), getBuffer<float>(backprop_act),
getShape(transposed_back_prop_output),
Expand Down
10 changes: 5 additions & 5 deletions runtime/onert/backend/train/ops/FullyConnectedLayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class FullyConnectedLayer : public exec::train::ITrainableFunction,
const IPortableTensor *back_prop_output, ir::Activation activation,
ir::FullyConnectedWeightsFormat weights_format);

ExtraTensorRequests requestExtraTensors() override;
void forward(bool training) override;
void backward() override;

Expand All @@ -58,11 +59,10 @@ class FullyConnectedLayer : public exec::train::ITrainableFunction,
IPortableTensor *_back_prop_input;
const IPortableTensor *_back_prop_output;

// TODO Optimize memory
std::unique_ptr<Tensor> _transposed_weights;
std::unique_ptr<Tensor> _transposed_input;
std::unique_ptr<Tensor> _transposed_back_prop_output;
std::unique_ptr<Tensor> _act_back_prop_output;
ExtraTensor *_transposed_weights;
ExtraTensor *_transposed_input;
ExtraTensor *_transposed_back_prop_output;
ExtraTensor *_act_back_prop_output;
};

} // namespace ops
Expand Down

0 comments on commit f8dcd0e

Please sign in to comment.