Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[NNC] enable fusion of linear with elementwise OP #5

Draft
wants to merge 15 commits into
base: chunyuan/conv_eltwise_more_fusion
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions aten/src/ATen/native/mkldnn/Common.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,22 @@ struct ContextConv final {
attr_(attr) {}
};

struct ContextLinear final {
ideep::tensor weight_packed_;
c10::optional<at::Tensor> at_bias_;
ideep::attr_t attr_;

ContextLinear() = delete;

ContextLinear(
ideep::tensor&& weight_packed,
c10::optional<at::Tensor> at_bias,
ideep::attr_t attr)
: weight_packed_(std::move(weight_packed)),
at_bias_(std::move(at_bias)),
attr_(attr) {}
};

} // namespace mkldnn
} // namespace native
} // namespace at
Expand Down
191 changes: 191 additions & 0 deletions aten/src/ATen/native/mkldnn/LinearPrepack.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
#include <vector>

#include <ATen/native/mkldnn/Common.h>
#include <ATen/native/mkldnn/LinearPrepack.h>
#include <ATen/native/mkldnn/MKLDNNCommon.h>
#include <ATen/native/utils/Factory.h>
#include <ATen/native/utils/ParamUtils.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/passes/mkldnn_rewrite.h>

#if AT_MKLDNN_ENABLED()

namespace at {
namespace native {
namespace mkldnn {
namespace internal {
namespace linear {

using namespace torch::jit::mkldnn;

c10::intrusive_ptr<mkldnn::LinearOpContext> createLinearPrePackOpContext(
Tensor weight,
c10::optional<Tensor> bias,
std::vector<int64_t> input_size,
std::string attr,
std::vector<c10::optional<at::Scalar>> scalars,
c10::optional<std::string> algorithm) {
auto it = fusion_attr_map().find(attr);
TORCH_CHECK(it != fusion_attr_map().end(), "Fusion behavior undefined.");
ideep::attr_t op_attr = it->second.attr_function(scalars, algorithm);
return mkldnn::MkldnnLinearOpContext::create_context(
std::move(weight), std::move(bias), std::move(input_size), op_attr);
}

ContextLinear create(
const Tensor& weight,
const c10::optional<Tensor>& bias,
const IntArrayRef input_size,
const ideep::attr_t& attr) {
c10::impl::ExcludeDispatchKeyGuard edkg(c10::autograd_dispatch_keyset);
ideep::tensor w = itensor_view_from_dense(weight);
auto dtype = w.get_data_type();

int64_t b_size = std::accumulate(
input_size.begin(),
input_size.end(),
(int64_t)1,
std::multiplies<int64_t>()) /
input_size[input_size.size() - 1];

auto out_features = weight.size(0);
auto in_features = weight.size(1);
ideep::dims reshaped_input_size = {b_size, in_features};

ideep::tensor::desc expected_weight_desc =
ideep::inner_product_forward::expected_weights_desc(
{out_features, in_features},
reshaped_input_size,
/* w_dtype */ dtype,
/* x_dtype */ dtype);

ideep::tensor packed_weight;
packed_weight.init(expected_weight_desc);
packed_weight.feed_from(w);

return ContextLinear{
std::move(packed_weight),
bias.has_value() ? c10::make_optional(*bias) : c10::nullopt,
std::move(attr)};
}

void _mkldnn_linear_out(
const ideep::tensor& x,
ideep::tensor& y,
const ideep::tensor& w,
const c10::optional<ideep::tensor>& b,
const ideep::attr_t& attr = ideep::attr_t()) {
if (b.has_value()) {
ideep::inner_product_forward::compute(
x,
w,
b.value(),
y,
ideep::scale_t(),
ideep::scale_t(),
ideep::scale_t(),
attr);
} else {
ideep::inner_product_forward::compute(
x, w, y, ideep::scale_t(), ideep::scale_t(), ideep::scale_t(), attr);
}
}

void mkldnn_linear_out(
const Tensor& input,
ideep::tensor& mkldnn_output,
const ideep::tensor& mkldnn_weight,
const c10::optional<Tensor>& bias_opt,
const ideep::attr_t& attr = ideep::attr_t()) {
c10::MaybeOwned<Tensor> bias_maybe_owned =
at::borrow_from_optional_tensor(bias_opt);
const Tensor& bias = *bias_maybe_owned;

c10::impl::ExcludeDispatchKeyGuard edkg(c10::autograd_dispatch_keyset);
const ideep::tensor mkldnn_input = itensor_view_from_dense(input);

c10::optional<ideep::tensor> mkldnn_bias{c10::nullopt};
if (bias.defined()) {
mkldnn_bias = itensor_from_tensor(bias);
}

_mkldnn_linear_out(
mkldnn_input, mkldnn_output, mkldnn_weight, mkldnn_bias, attr);
}

Tensor run(ContextLinear& context, const Tensor& input) {
const ideep::tensor& mkldnn_weight = context.weight_packed_;

auto input_size = input.sizes();

const int64_t dim = input.dim();
auto input_reshaped =
dim == 2 ? input : input.reshape({-1, input.size(input.dim() - 1)});

std::vector<int64_t> output_size(input_size.begin(), input_size.end() - 1);
output_size.push_back(mkldnn_weight.get_dim(0));
auto output = at::empty(output_size, input.options());

if (dim != 2) {
std::vector<int64_t> output_size_reshaped = {
input_reshaped.size(0), mkldnn_weight.get_dim(0)};
output = output.reshape(output_size_reshaped);
}

c10::impl::ExcludeDispatchKeyGuard edkg(c10::autograd_dispatch_keyset);
ideep::tensor mkldnn_output = itensor_from_tensor(output);

mkldnn_linear_out(
input_reshaped,
mkldnn_output,
mkldnn_weight,
context.at_bias_,
context.attr_);

if (dim != 2) {
output = output.reshape(output_size);
}

return output;
}

void run(ContextLinear& context, const Tensor& input, void* output) {
const ideep::tensor& mkldnn_weight = context.weight_packed_;

auto input_size = input.sizes();

const int64_t dim = input.dim();
auto input_reshaped =
dim == 2 ? input : input.reshape({-1, input.size(input.dim() - 1)});

std::vector<int64_t> output_size(input_size.begin(), input_size.end() - 1);
output_size.push_back(mkldnn_weight.get_dim(0));

std::vector<int64_t> output_size_reshaped = {
input_reshaped.size(0), mkldnn_weight.get_dim(0)};

ideep::tensor::desc o_desc = {
output_size_reshaped, get_mkldnn_dtype(input.scalar_type())};
ideep::tensor mkldnn_output = {o_desc, output};

mkldnn_linear_out(
input_reshaped,
mkldnn_output,
mkldnn_weight,
context.at_bias_,
context.attr_);
}

Tensor linear_run(
const Tensor& input,
const c10::intrusive_ptr<mkldnn::LinearOpContext>& op_context) {
return op_context->run(input);
}

} // namespace linear
} // namespace internal
} // namespace mkldnn
} // namespace native
} // namespace at

#endif // AT_MKLDNN_ENABLED()
43 changes: 43 additions & 0 deletions aten/src/ATen/native/mkldnn/LinearPrepack.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#pragma once

#include <ATen/Tensor.h>
#include <ATen/native/mkldnn/Common.h>
#include <ATen/native/mkldnn/OpContext.h>

#if AT_MKLDNN_ENABLED()

namespace at {
namespace native {
namespace mkldnn {
namespace internal {
namespace linear {

c10::intrusive_ptr<mkldnn::LinearOpContext> createLinearPrePackOpContext(
Tensor weight,
c10::optional<Tensor> bias,
std::vector<int64_t> input_size,
std::string attr,
std::vector<c10::optional<at::Scalar>> scalars,
c10::optional<std::string> algorithm);

Tensor linear_run(
const Tensor& input,
const c10::intrusive_ptr<mkldnn::LinearOpContext>& op_context);

ContextLinear create(
const Tensor& weight,
const c10::optional<Tensor>& bias,
const IntArrayRef input_size,
const ideep::attr_t& attr);

Tensor run(ContextLinear& context, const Tensor& input);

void run(ContextLinear& context, const Tensor& input, void* output);

} // namespace linear
} // namespace internal
} // namespace mkldnn
} // namespace native
} // namespace at

#endif // AT_MKLDNN_ENABLED()
26 changes: 26 additions & 0 deletions aten/src/ATen/native/mkldnn/OpContext.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <ATen/native/mkldnn/ConvPrepack.h>
#include <ATen/native/mkldnn/LinearPrepack.h>
#include <ATen/native/mkldnn/OpContext.h>

#if AT_MKLDNN_ENABLED()
Expand Down Expand Up @@ -40,6 +41,31 @@ void MkldnnConvOpContext::run(const Tensor& input, void* output) {
return mkldnn::internal::convolution::run(op_context_, input, output);
}

c10::intrusive_ptr<LinearOpContext> MkldnnLinearOpContext::create_context(
at::Tensor&& weight,
c10::optional<at::Tensor>&& bias,
std::vector<int64_t>&& input_size,
const ideep::attr_t& attr) {
auto op_context =
mkldnn::internal::linear::create(weight, bias, input_size, attr);

auto linear_op_context = c10::make_intrusive<MkldnnLinearOpContext>(
std::move(weight),
std::move(bias),
std::move(input_size),
std::move(op_context));

return linear_op_context;
}

Tensor MkldnnLinearOpContext::run(const Tensor& input) {
return mkldnn::internal::linear::run(op_context_, input);
}

void MkldnnLinearOpContext::run(const Tensor& input, void* output) {
return mkldnn::internal::linear::run(op_context_, input, output);
}

} // namespace mkldnn
} // namespace native
} // namespace at
Expand Down
55 changes: 55 additions & 0 deletions aten/src/ATen/native/mkldnn/OpContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,61 @@ class MkldnnConvOpContext final : public ConvOpContext {
const ideep::attr_t& attr);
};

using SerializationTypeLinearPrePack = std::tuple<
at::Tensor,
c10::optional<at::Tensor>,
std::vector<int64_t>,
std::string,
std::vector<c10::optional<at::Scalar>>,
c10::optional<std::string>>;

class LinearOpContext : public torch::jit::CustomClassHolder {
protected:
Tensor orig_weight_;
c10::optional<Tensor> orig_bias_;
std::vector<int64_t> input_size_;
std::string attr_;
std::vector<c10::optional<at::Scalar>> scalars_;
c10::optional<std::string> algorithm_;

public:
SerializationTypeLinearPrePack unpack() {
return std::make_tuple(
orig_weight_, orig_bias_, input_size_, attr_, scalars_, algorithm_);
}

virtual at::Tensor run(const at::Tensor& input) = 0;

virtual void run(const Tensor& input, void* output) = 0;
};

class MkldnnLinearOpContext final : public LinearOpContext {
private:
ContextLinear op_context_;

public:
MkldnnLinearOpContext(
Tensor&& weight,
c10::optional<Tensor>&& bias,
std::vector<int64_t>&& input_size,
ContextLinear&& op_context)
: op_context_(std::move(op_context)) {
orig_weight_ = std::move(weight);
orig_bias_ = std::move(bias);
input_size_ = std::move(input_size);
}

at::Tensor run(const at::Tensor& input) override;

void run(const Tensor& input, void* output) override;

static c10::intrusive_ptr<LinearOpContext> create_context(
at::Tensor&& weight,
c10::optional<at::Tensor>&& bias,
std::vector<int64_t>&& input_size,
const ideep::attr_t& attr);
};

} // namespace mkldnn
} // namespace native
} // namespace at
Expand Down
Loading