Skip to content

Commit

Permalink
Adds ReduceMax,ReduceMean,ReduceMin,ReduceProd operations
Browse files Browse the repository at this point in the history
  • Loading branch information
nkogteva committed Aug 19, 2023
1 parent 2a89a44 commit 0ffbc27
Show file tree
Hide file tree
Showing 15 changed files with 588 additions and 74 deletions.
33 changes: 33 additions & 0 deletions modules/nvidia_plugin/src/cuda/dnn.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,39 @@ class DnnReduceAddDescriptor : public DnnReduceTensorDescriptor {
}
};

class DnnReduceMulDescriptor : public DnnReduceTensorDescriptor {
public:
explicit DnnReduceMulDescriptor(cudnnDataType_t compType) {
set(CUDNN_REDUCE_TENSOR_MUL,
compType,
CUDNN_PROPAGATE_NAN,
CUDNN_REDUCE_TENSOR_NO_INDICES,
CUDNN_32BIT_INDICES);
}
};

class DnnReduceMinDescriptor : public DnnReduceTensorDescriptor {
public:
explicit DnnReduceMinDescriptor(cudnnDataType_t compType) {
set(CUDNN_REDUCE_TENSOR_MIN,
compType,
CUDNN_PROPAGATE_NAN,
CUDNN_REDUCE_TENSOR_NO_INDICES,
CUDNN_32BIT_INDICES);
}
};

class DnnReduceMaxDescriptor : public DnnReduceTensorDescriptor {
public:
explicit DnnReduceMaxDescriptor(cudnnDataType_t compType) {
set(CUDNN_REDUCE_TENSOR_MAX,
compType,
CUDNN_PROPAGATE_NAN,
CUDNN_REDUCE_TENSOR_NO_INDICES,
CUDNN_32BIT_INDICES);
}
};

class DnnReduceAvgDescriptor : public DnnReduceTensorDescriptor {
public:
explicit DnnReduceAvgDescriptor(cudnnDataType_t compType) {
Expand Down
60 changes: 60 additions & 0 deletions modules/nvidia_plugin/src/ops/reduce.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// Copyright (C) 2021-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "cuda/descriptor_utils.hpp"
#include "converters.hpp"
#include "reduce_sum.hpp"

namespace ov {
namespace nvidia_gpu {

cudnnDataType_t ReduceOp::reduceCompType(const ov::Node& node) {
const auto in_type = convertDataType<cudnnDataType_t>(node.get_input_element_type(0));
const auto out_type = convertDataType<cudnnDataType_t>(node.get_output_element_type(0));
OPENVINO_ASSERT(in_type == out_type, "Node name: ", node.get_friendly_name());
switch (in_type) {
case CUDNN_DATA_FLOAT:
case CUDNN_DATA_HALF:
// TODO: it's unclear from documentation, whether it can be half when both tensors are
// half, or int8 when both tensors are int8. we'll have to test it
return CUDNN_DATA_FLOAT;
case CUDNN_DATA_DOUBLE:
return CUDNN_DATA_DOUBLE;
default:
throw_ov_exception(fmt::format("ov::nvidia_gpu::reduceCompType(): Unsupported data types: in0 = {}, in1 = {}",
toString(in_type),
toString(out_type)));
}
}

ReduceOp::ReduceOp(const CreationContext& context,
const ov::Node& node,
IndexCollection&& inputIds,
IndexCollection&& outputIds,
const CUDA::DnnReduceTensorDescriptor& reduce_desc)
: OperationCuDnn{context, node, move(inputIds), move(outputIds)},
comp_type_{reduceCompType(node)},
a_desc_{CUDA::makeInputDnnTensorDescr(node, 0)},
c_desc_{CUDA::makeOutputDnnTensorDescr(node, 0)},
reduce_desc_(reduce_desc),
workspace_size_{context.dnnHandle().getReductionWorkspaceSize(reduce_desc_, a_desc_, c_desc_)} {}

void ReduceOp::Execute(const InferenceRequestContext& context,
Inputs inputTensors,
Outputs outputTensors,
const Workbuffers& workbuffers) const {
context.getThreadContext().dnnHandle().reduceTensor(reduce_desc_,
workbuffers.createMutableSpanFrom<0>(workspace_size_),
CUDA::DnnScaleFactorOne{comp_type_},
a_desc_,
inputTensors[0],
CUDA::DnnScaleFactorZero{comp_type_},
c_desc_,
outputTensors[0]);
}

bool ReduceOp::IsCudaGraphCompatible() const { return true; }

} // namespace nvidia_gpu
} // namespace ov
42 changes: 42 additions & 0 deletions modules/nvidia_plugin/src/ops/reduce.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <cuda_operation_base.hpp>

namespace ov {
namespace nvidia_gpu {

class ReduceOp : public OperationCuDnn {
public:
ReduceOp(const CreationContext& context,
const ov::Node& node,
IndexCollection&& inputIds,
IndexCollection&& outputIds,
const CUDA::DnnReduceTensorDescriptor& reduce_desc);

void Execute(const InferenceRequestContext& context,
Inputs inputTensors,
Outputs outputTensors,
const Workbuffers& workbuffers) const override;

bool IsCudaGraphCompatible() const override;
WorkbufferRequest GetWorkBufferRequest() const override;

static cudnnDataType_t reduceCompType(const ov::Node& node);
private:
cudnnDataType_t comp_type_;
CUDA::DnnReduceTensorDescriptor reduce_desc_;
CUDA::DnnTensorDescriptor a_desc_;
CUDA::DnnTensorDescriptor c_desc_;
size_t workspace_size_;
};

inline WorkbufferRequest ReduceOp::GetWorkBufferRequest() const {
return {{}, {workspace_size_}}; // TODO: find a way to allocate buffers from constructor
}

} // namespace nvidia_gpu
} // namespace ov
20 changes: 20 additions & 0 deletions modules/nvidia_plugin/src/ops/reduce_max.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Copyright (C) 2021-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "cuda_operation_registry.hpp"
#include "reduce_max.hpp"

namespace ov {
namespace nvidia_gpu {

ReduceMaxOp::ReduceMaxOp(const CreationContext& context,
const ov::Node& node,
IndexCollection&& inputIds,
IndexCollection&& outputIds)
: ReduceOp(context, node, move(inputIds), move(outputIds), CUDA::DnnReduceMaxDescriptor(reduceCompType(node))) {}

OPERATION_REGISTER(ReduceMaxOp, ReduceMax);

} // namespace nvidia_gpu
} // namespace ov
21 changes: 21 additions & 0 deletions modules/nvidia_plugin/src/ops/reduce_max.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "reduce.hpp"

namespace ov {
namespace nvidia_gpu {

class ReduceMaxOp : public ReduceOp {
public:
explicit ReduceMaxOp(const CreationContext& context,
const ov::Node& node,
IndexCollection&& inputIds,
IndexCollection&& outputIds);
};

} // namespace nvidia_gpu
} // namespace ov
20 changes: 20 additions & 0 deletions modules/nvidia_plugin/src/ops/reduce_mean.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Copyright (C) 2021-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "cuda_operation_registry.hpp"
#include "reduce_mean.hpp"

namespace ov {
namespace nvidia_gpu {

ReduceMeanOp::ReduceMeanOp(const CreationContext& context,
const ov::Node& node,
IndexCollection&& inputIds,
IndexCollection&& outputIds)
: ReduceOp(context, node, move(inputIds), move(outputIds), CUDA::DnnReduceAvgDescriptor(reduceCompType(node))) {}

OPERATION_REGISTER(ReduceMeanOp, ReduceMean);

} // namespace nvidia_gpu
} // namespace ov
21 changes: 21 additions & 0 deletions modules/nvidia_plugin/src/ops/reduce_mean.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "reduce.hpp"

namespace ov {
namespace nvidia_gpu {

class ReduceMeanOp : public ReduceOp {
public:
explicit ReduceMeanOp(const CreationContext& context,
const ov::Node& node,
IndexCollection&& inputIds,
IndexCollection&& outputIds);
};

} // namespace nvidia_gpu
} // namespace ov
20 changes: 20 additions & 0 deletions modules/nvidia_plugin/src/ops/reduce_min.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Copyright (C) 2021-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "cuda_operation_registry.hpp"
#include "reduce_min.hpp"

namespace ov {
namespace nvidia_gpu {

ReduceMinOp::ReduceMinOp(const CreationContext& context,
const ov::Node& node,
IndexCollection&& inputIds,
IndexCollection&& outputIds)
: ReduceOp(context, node, move(inputIds), move(outputIds), CUDA::DnnReduceMinDescriptor(reduceCompType(node))) {}

OPERATION_REGISTER(ReduceMinOp, ReduceMin);

} // namespace nvidia_gpu
} // namespace ov
21 changes: 21 additions & 0 deletions modules/nvidia_plugin/src/ops/reduce_min.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "reduce.hpp"

namespace ov {
namespace nvidia_gpu {

class ReduceMinOp : public ReduceOp {
public:
explicit ReduceMinOp(const CreationContext& context,
const ov::Node& node,
IndexCollection&& inputIds,
IndexCollection&& outputIds);
};

} // namespace nvidia_gpu
} // namespace ov
20 changes: 20 additions & 0 deletions modules/nvidia_plugin/src/ops/reduce_prod.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Copyright (C) 2021-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "cuda_operation_registry.hpp"
#include "reduce_prod.hpp"

namespace ov {
namespace nvidia_gpu {

ReduceProdOp::ReduceProdOp(const CreationContext& context,
const ov::Node& node,
IndexCollection&& inputIds,
IndexCollection&& outputIds)
: ReduceOp(context, node, move(inputIds), move(outputIds), CUDA::DnnReduceMulDescriptor(reduceCompType(node))) {}

OPERATION_REGISTER(ReduceProdOp, ReduceProd);

} // namespace nvidia_gpu
} // namespace ov
21 changes: 21 additions & 0 deletions modules/nvidia_plugin/src/ops/reduce_prod.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "reduce.hpp"

namespace ov {
namespace nvidia_gpu {

class ReduceProdOp : public ReduceOp {
public:
explicit ReduceProdOp(const CreationContext& context,
const ov::Node& node,
IndexCollection&& inputIds,
IndexCollection&& outputIds);
};

} // namespace nvidia_gpu
} // namespace ov
51 changes: 3 additions & 48 deletions modules/nvidia_plugin/src/ops/reduce_sum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,64 +2,19 @@
// SPDX-License-Identifier: Apache-2.0
//

#include "cuda_operation_registry.hpp"
#include "reduce_sum.hpp"

#include <cuda/descriptor_utils.hpp>
#include <cuda_operation_registry.hpp>

#include "converters.hpp"

namespace ov {
namespace nvidia_gpu {

cudnnDataType_t reduceCompType(const ov::Node& node) {
const auto in_type = convertDataType<cudnnDataType_t>(node.get_input_element_type(0));
const auto out_type = convertDataType<cudnnDataType_t>(node.get_output_element_type(0));
// if (node.get_input_element_type(0) == ov::element::Type_t::f64) return CUDNN_DATA_DOUBLE;
switch (switchCase(in_type, out_type)) {
case switchCase(CUDNN_DATA_FLOAT, CUDNN_DATA_FLOAT):
case switchCase(CUDNN_DATA_FLOAT, CUDNN_DATA_HALF):
case switchCase(CUDNN_DATA_FLOAT, CUDNN_DATA_INT8):
case switchCase(CUDNN_DATA_HALF, CUDNN_DATA_FLOAT):
case switchCase(CUDNN_DATA_INT8, CUDNN_DATA_FLOAT):
// TODO: it's unclear from documentation, whether it can be half when both tensors are
// half, or int8 when both tensors are int8. we'll have to test it
return CUDNN_DATA_FLOAT;
case switchCase(CUDNN_DATA_DOUBLE, CUDNN_DATA_DOUBLE):
return CUDNN_DATA_DOUBLE;
default:
throw_ov_exception(fmt::format("ov::nvidia_gpu::reduceCompType(): Unsupported data types: in0 = {}, in1 = {}",
toString(in_type),
toString(out_type)));
}
}

ReduceSumOp::ReduceSumOp(const CreationContext& context,
const ov::Node& node,
IndexCollection&& inputIds,
IndexCollection&& outputIds)
: OperationCuDnn{context, node, move(inputIds), move(outputIds)},
comp_type_{reduceCompType(node)},
a_desc_{CUDA::makeInputDnnTensorDescr(node, 0)},
c_desc_{CUDA::makeOutputDnnTensorDescr(node, 0)},
workspace_size_{context.dnnHandle().getReductionWorkspaceSize(add_desc_, a_desc_, c_desc_)} {}

void ReduceSumOp::Execute(const InferenceRequestContext& context,
Inputs inputTensors,
Outputs outputTensors,
const Workbuffers& workbuffers) const {
context.getThreadContext().dnnHandle().reduceTensor(add_desc_,
workbuffers.createMutableSpanFrom<0>(workspace_size_),
CUDA::DnnScaleFactorOne{comp_type_},
a_desc_,
inputTensors[0],
CUDA::DnnScaleFactorZero{comp_type_},
c_desc_,
outputTensors[0]);
}

bool ReduceSumOp::IsCudaGraphCompatible() const { return true; }
: ReduceOp(context, node, move(inputIds), move(outputIds), CUDA::DnnReduceAddDescriptor(reduceCompType(node))) {}

OPERATION_REGISTER(ReduceSumOp, ReduceSum);

} // namespace nvidia_gpu
} // namespace ov
Loading

0 comments on commit 0ffbc27

Please sign in to comment.