-
Notifications
You must be signed in to change notification settings - Fork 144
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adds ReduceMax,ReduceMean,ReduceMin,ReduceProd operations
- Loading branch information
Showing
15 changed files
with
588 additions
and
74 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
// Copyright (C) 2021-2023 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#include "cuda/descriptor_utils.hpp" | ||
#include "converters.hpp" | ||
#include "reduce_sum.hpp" | ||
|
||
namespace ov { | ||
namespace nvidia_gpu { | ||
|
||
cudnnDataType_t ReduceOp::reduceCompType(const ov::Node& node) { | ||
const auto in_type = convertDataType<cudnnDataType_t>(node.get_input_element_type(0)); | ||
const auto out_type = convertDataType<cudnnDataType_t>(node.get_output_element_type(0)); | ||
OPENVINO_ASSERT(in_type == out_type, "Node name: ", node.get_friendly_name()); | ||
switch (in_type) { | ||
case CUDNN_DATA_FLOAT: | ||
case CUDNN_DATA_HALF: | ||
// TODO: it's unclear from documentation, whether it can be half when both tensors are | ||
// half, or int8 when both tensors are int8. we'll have to test it | ||
return CUDNN_DATA_FLOAT; | ||
case CUDNN_DATA_DOUBLE: | ||
return CUDNN_DATA_DOUBLE; | ||
default: | ||
throw_ov_exception(fmt::format("ov::nvidia_gpu::reduceCompType(): Unsupported data types: in0 = {}, in1 = {}", | ||
toString(in_type), | ||
toString(out_type))); | ||
} | ||
} | ||
|
||
ReduceOp::ReduceOp(const CreationContext& context, | ||
const ov::Node& node, | ||
IndexCollection&& inputIds, | ||
IndexCollection&& outputIds, | ||
const CUDA::DnnReduceTensorDescriptor& reduce_desc) | ||
: OperationCuDnn{context, node, move(inputIds), move(outputIds)}, | ||
comp_type_{reduceCompType(node)}, | ||
a_desc_{CUDA::makeInputDnnTensorDescr(node, 0)}, | ||
c_desc_{CUDA::makeOutputDnnTensorDescr(node, 0)}, | ||
reduce_desc_(reduce_desc), | ||
workspace_size_{context.dnnHandle().getReductionWorkspaceSize(reduce_desc_, a_desc_, c_desc_)} {} | ||
|
||
void ReduceOp::Execute(const InferenceRequestContext& context, | ||
Inputs inputTensors, | ||
Outputs outputTensors, | ||
const Workbuffers& workbuffers) const { | ||
context.getThreadContext().dnnHandle().reduceTensor(reduce_desc_, | ||
workbuffers.createMutableSpanFrom<0>(workspace_size_), | ||
CUDA::DnnScaleFactorOne{comp_type_}, | ||
a_desc_, | ||
inputTensors[0], | ||
CUDA::DnnScaleFactorZero{comp_type_}, | ||
c_desc_, | ||
outputTensors[0]); | ||
} | ||
|
||
bool ReduceOp::IsCudaGraphCompatible() const { return true; } | ||
|
||
} // namespace nvidia_gpu | ||
} // namespace ov |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
// Copyright (C) 2018-2023 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include <cuda_operation_base.hpp> | ||
|
||
namespace ov { | ||
namespace nvidia_gpu { | ||
|
||
class ReduceOp : public OperationCuDnn { | ||
public: | ||
ReduceOp(const CreationContext& context, | ||
const ov::Node& node, | ||
IndexCollection&& inputIds, | ||
IndexCollection&& outputIds, | ||
const CUDA::DnnReduceTensorDescriptor& reduce_desc); | ||
|
||
void Execute(const InferenceRequestContext& context, | ||
Inputs inputTensors, | ||
Outputs outputTensors, | ||
const Workbuffers& workbuffers) const override; | ||
|
||
bool IsCudaGraphCompatible() const override; | ||
WorkbufferRequest GetWorkBufferRequest() const override; | ||
|
||
static cudnnDataType_t reduceCompType(const ov::Node& node); | ||
private: | ||
cudnnDataType_t comp_type_; | ||
CUDA::DnnReduceTensorDescriptor reduce_desc_; | ||
CUDA::DnnTensorDescriptor a_desc_; | ||
CUDA::DnnTensorDescriptor c_desc_; | ||
size_t workspace_size_; | ||
}; | ||
|
||
inline WorkbufferRequest ReduceOp::GetWorkBufferRequest() const { | ||
return {{}, {workspace_size_}}; // TODO: find a way to allocate buffers from constructor | ||
} | ||
|
||
} // namespace nvidia_gpu | ||
} // namespace ov |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
// Copyright (C) 2021-2023 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#include "cuda_operation_registry.hpp" | ||
#include "reduce_max.hpp" | ||
|
||
namespace ov { | ||
namespace nvidia_gpu { | ||
|
||
ReduceMaxOp::ReduceMaxOp(const CreationContext& context, | ||
const ov::Node& node, | ||
IndexCollection&& inputIds, | ||
IndexCollection&& outputIds) | ||
: ReduceOp(context, node, move(inputIds), move(outputIds), CUDA::DnnReduceMaxDescriptor(reduceCompType(node))) {} | ||
|
||
OPERATION_REGISTER(ReduceMaxOp, ReduceMax); | ||
|
||
} // namespace nvidia_gpu | ||
} // namespace ov |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
// Copyright (C) 2018-2023 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include "reduce.hpp" | ||
|
||
namespace ov { | ||
namespace nvidia_gpu { | ||
|
||
class ReduceMaxOp : public ReduceOp { | ||
public: | ||
explicit ReduceMaxOp(const CreationContext& context, | ||
const ov::Node& node, | ||
IndexCollection&& inputIds, | ||
IndexCollection&& outputIds); | ||
}; | ||
|
||
} // namespace nvidia_gpu | ||
} // namespace ov |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
// Copyright (C) 2021-2023 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#include "cuda_operation_registry.hpp" | ||
#include "reduce_mean.hpp" | ||
|
||
namespace ov { | ||
namespace nvidia_gpu { | ||
|
||
ReduceMeanOp::ReduceMeanOp(const CreationContext& context, | ||
const ov::Node& node, | ||
IndexCollection&& inputIds, | ||
IndexCollection&& outputIds) | ||
: ReduceOp(context, node, move(inputIds), move(outputIds), CUDA::DnnReduceAvgDescriptor(reduceCompType(node))) {} | ||
|
||
OPERATION_REGISTER(ReduceMeanOp, ReduceMean); | ||
|
||
} // namespace nvidia_gpu | ||
} // namespace ov |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
// Copyright (C) 2018-2023 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include "reduce.hpp" | ||
|
||
namespace ov { | ||
namespace nvidia_gpu { | ||
|
||
class ReduceMeanOp : public ReduceOp { | ||
public: | ||
explicit ReduceMeanOp(const CreationContext& context, | ||
const ov::Node& node, | ||
IndexCollection&& inputIds, | ||
IndexCollection&& outputIds); | ||
}; | ||
|
||
} // namespace nvidia_gpu | ||
} // namespace ov |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
// Copyright (C) 2021-2023 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#include "cuda_operation_registry.hpp" | ||
#include "reduce_min.hpp" | ||
|
||
namespace ov { | ||
namespace nvidia_gpu { | ||
|
||
ReduceMinOp::ReduceMinOp(const CreationContext& context, | ||
const ov::Node& node, | ||
IndexCollection&& inputIds, | ||
IndexCollection&& outputIds) | ||
: ReduceOp(context, node, move(inputIds), move(outputIds), CUDA::DnnReduceMinDescriptor(reduceCompType(node))) {} | ||
|
||
OPERATION_REGISTER(ReduceMinOp, ReduceMin); | ||
|
||
} // namespace nvidia_gpu | ||
} // namespace ov |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
// Copyright (C) 2018-2023 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include "reduce.hpp" | ||
|
||
namespace ov { | ||
namespace nvidia_gpu { | ||
|
||
class ReduceMinOp : public ReduceOp { | ||
public: | ||
explicit ReduceMinOp(const CreationContext& context, | ||
const ov::Node& node, | ||
IndexCollection&& inputIds, | ||
IndexCollection&& outputIds); | ||
}; | ||
|
||
} // namespace nvidia_gpu | ||
} // namespace ov |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
// Copyright (C) 2021-2023 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#include "cuda_operation_registry.hpp" | ||
#include "reduce_prod.hpp" | ||
|
||
namespace ov { | ||
namespace nvidia_gpu { | ||
|
||
ReduceProdOp::ReduceProdOp(const CreationContext& context, | ||
const ov::Node& node, | ||
IndexCollection&& inputIds, | ||
IndexCollection&& outputIds) | ||
: ReduceOp(context, node, move(inputIds), move(outputIds), CUDA::DnnReduceMulDescriptor(reduceCompType(node))) {} | ||
|
||
OPERATION_REGISTER(ReduceProdOp, ReduceProd); | ||
|
||
} // namespace nvidia_gpu | ||
} // namespace ov |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
// Copyright (C) 2018-2023 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include "reduce.hpp" | ||
|
||
namespace ov { | ||
namespace nvidia_gpu { | ||
|
||
class ReduceProdOp : public ReduceOp { | ||
public: | ||
explicit ReduceProdOp(const CreationContext& context, | ||
const ov::Node& node, | ||
IndexCollection&& inputIds, | ||
IndexCollection&& outputIds); | ||
}; | ||
|
||
} // namespace nvidia_gpu | ||
} // namespace ov |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.