Skip to content

Commit

Permalink
Refactoring, add test to skip list
Browse files Browse the repository at this point in the history
  • Loading branch information
nkogteva committed Sep 6, 2023
1 parent e7460e5 commit 3f8dcfe
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -82,21 +82,23 @@ CUDA::DnnConvolutionDescriptor ConvolutionParamsCuDnn::MakeConvolutionDescriptor
}

ConvolutionDescriptorsCuDnn::ConvolutionDescriptorsCuDnn(const CreationContext& context,
const ConvolutionParamsCuDnn& params)
const ConvolutionParamsCuDnn& params,
const std::vector<cudnnDataType_t> half_desc_types)
: params_{params},
tensor_element_type_{params_.ElementType()},
conv_desc_type_{params_.ElementType()},
input_{params_.MakeInputDescriptor()},
filter_{params_.MakeFilterDescriptor()},
output_{params_.MakeOutputDescriptor()},
conv_{},
algo_perf_{} {
algo_perf_{},
half_desc_types_{half_desc_types} {
auto& dnnHandle = context.dnnHandle();
if (context.opBenchOption()) {
BenchmarkOptimalAlgo(dnnHandle, params_);
} else {
GetAlgo(dnnHandle);
}
throwIfError(::cudnnSetConvolutionMathType(conv_.get(), algo_perf_.mathType));
}

void ConvolutionDescriptorsCuDnn::BenchmarkOptimalAlgo(const CUDA::DnnHandle& dnnHandle,
Expand Down Expand Up @@ -124,8 +126,12 @@ void ConvolutionDescriptorsCuDnn::BenchmarkOptimalAlgo(const CUDA::DnnHandle& dn
void ConvolutionDescriptorsCuDnn::GetAlgo(const CUDA::DnnHandle& dnnHandle) {
switch (tensor_element_type_) {
case CUDNN_DATA_HALF:
if (GetAlgoForConvDataType(dnnHandle, CUDNN_DATA_HALF)) return;
if (GetAlgoForConvDataType(dnnHandle, CUDNN_DATA_FLOAT)) return;
for (const auto& half_desc_type : half_desc_types_) {
if (GetAlgoForConvDataType(dnnHandle, half_desc_type)) {
conv_desc_type_ = half_desc_type;
return;
}
}
break;
default:
if (GetAlgoForConvDataType(dnnHandle, tensor_element_type_)) return;
Expand Down Expand Up @@ -153,6 +159,8 @@ bool ConvolutionDescriptorsCuDnn::GetAlgoForConvDataType(const CUDA::DnnHandle&
return false;
}

throwIfError(::cudnnSetConvolutionMathType(conv_.get(), algo_perf_.mathType));

size_t sizeInBytes = 0;
throwIfError(::cudnnGetConvolutionForwardWorkspaceSize(
dnnHandle.get(), input_.get(), filter_.get(), conv_.get(), output_.get(), algo_perf_.algo, &sizeInBytes));
Expand All @@ -164,8 +172,12 @@ bool ConvolutionDescriptorsCuDnn::GetAlgoForConvDataType(const CUDA::DnnHandle&
void ConvolutionDescriptorsCuDnn::FindAlgo(const CUDA::DnnHandle& dnnHandle) {
switch (tensor_element_type_) {
case CUDNN_DATA_HALF:
if (FindAlgoForConvDataType(dnnHandle, CUDNN_DATA_HALF)) return;
if (FindAlgoForConvDataType(dnnHandle, CUDNN_DATA_FLOAT)) return;
for (const auto& half_desc_type : half_desc_types_) {
if (FindAlgoForConvDataType(dnnHandle, half_desc_type)) {
conv_desc_type_ = half_desc_type;
return;
}
}
break;
default:
if (FindAlgoForConvDataType(dnnHandle, tensor_element_type_)) return;
Expand Down Expand Up @@ -193,6 +205,8 @@ bool ConvolutionDescriptorsCuDnn::FindAlgoForConvDataType(const CUDA::DnnHandle&
return false;
}

throwIfError(::cudnnSetConvolutionMathType(conv_.get(), algo_perf_.mathType));

size_t sizeInBytes = 0;
throwIfError(::cudnnGetConvolutionForwardWorkspaceSize(
dnnHandle.get(), input_.get(), filter_.get(), conv_.get(), output_.get(), algo_perf_.algo, &sizeInBytes));
Expand All @@ -208,8 +222,12 @@ void ConvolutionDescriptorsCuDnn::FindAlgo(const CUDA::DnnHandle& dnnHandle,
CUDA::DeviceBuffer<std::byte> workspace) {
switch (tensor_element_type_) {
case CUDNN_DATA_HALF:
if (FindAlgoForConvDataType(dnnHandle, inPtr, filterPtr, outPtr, workspace, CUDNN_DATA_HALF)) return;
if (FindAlgoForConvDataType(dnnHandle, inPtr, filterPtr, outPtr, workspace, CUDNN_DATA_FLOAT)) return;
for (const auto& half_desc_type : half_desc_types_) {
if (FindAlgoForConvDataType(dnnHandle, inPtr, filterPtr, outPtr, workspace, half_desc_type)) {
conv_desc_type_ = half_desc_type;
return;
}
}
break;
default:
if (FindAlgoForConvDataType(dnnHandle, inPtr, filterPtr, outPtr, workspace, tensor_element_type_)) return;
Expand Down Expand Up @@ -314,21 +332,24 @@ CUDA::DnnConvolutionDescriptor ConvolutionBackpropDataParamsCuDnn::MakeConvoluti
}

ConvolutionBackpropDataDescriptorCuDnn::ConvolutionBackpropDataDescriptorCuDnn(
const CreationContext& context, const ConvolutionBackpropDataParamsCuDnn& params)
const CreationContext& context,
const ConvolutionBackpropDataParamsCuDnn& params,
const std::vector<cudnnDataType_t> half_desc_types)
: params_{params},
tensor_element_type_{params_.ElementType()},
conv_desc_type_{params_.ElementType()},
filter_desc_{params_.MakeFilterDescriptor()},
doutput_desc_{params_.MakeDOutputDescriptor()},
dinput_desc_{params_.MakeDInputDescriptor()},
conv_{},
algo_perf_{} {
algo_perf_{},
half_desc_types_{half_desc_types} {
auto& dnnHandle = context.dnnHandle();
if (context.opBenchOption()) {
BenchmarkOptimalAlgo(dnnHandle);
} else {
GetAlgo(dnnHandle);
}
throwIfError(::cudnnSetConvolutionMathType(conv_.get(), algo_perf_.mathType));
}

void ConvolutionBackpropDataDescriptorCuDnn::BenchmarkOptimalAlgo(const CUDA::DnnHandle& dnnHandle) {
Expand All @@ -355,8 +376,12 @@ void ConvolutionBackpropDataDescriptorCuDnn::BenchmarkOptimalAlgo(const CUDA::Dn
void ConvolutionBackpropDataDescriptorCuDnn::GetAlgo(const CUDA::DnnHandle& dnnHandle) {
switch (tensor_element_type_) {
case CUDNN_DATA_HALF:
if (GetAlgoForConvDataType(dnnHandle, CUDNN_DATA_HALF)) return;
if (GetAlgoForConvDataType(dnnHandle, CUDNN_DATA_FLOAT)) return;
for (const auto& half_desc_type : half_desc_types_) {
if (GetAlgoForConvDataType(dnnHandle, half_desc_type)) {
conv_desc_type_ = half_desc_type;
return;
}
}
break;
default:
if (GetAlgoForConvDataType(dnnHandle, tensor_element_type_)) return;
Expand Down Expand Up @@ -384,6 +409,8 @@ bool ConvolutionBackpropDataDescriptorCuDnn::GetAlgoForConvDataType(const CUDA::
return false;
}

throwIfError(::cudnnSetConvolutionMathType(conv_.get(), algo_perf_.mathType));

size_t sizeInBytes = 0;
throwIfError(::cudnnGetConvolutionBackwardDataWorkspaceSize(dnnHandle.get(),
filter_desc_.get(),
Expand All @@ -400,8 +427,12 @@ bool ConvolutionBackpropDataDescriptorCuDnn::GetAlgoForConvDataType(const CUDA::
void ConvolutionBackpropDataDescriptorCuDnn::FindAlgo(const CUDA::DnnHandle& dnnHandle) {
switch (tensor_element_type_) {
case CUDNN_DATA_HALF:
if (FindAlgoForConvDataType(dnnHandle, CUDNN_DATA_HALF)) return;
if (FindAlgoForConvDataType(dnnHandle, CUDNN_DATA_FLOAT)) return;
for (const auto half_desc_type : half_desc_types_) {
if (FindAlgoForConvDataType(dnnHandle, half_desc_type)) {
conv_desc_type_ = half_desc_type;
return;
}
}
break;
default:
if (FindAlgoForConvDataType(dnnHandle, tensor_element_type_)) return;
Expand Down Expand Up @@ -429,6 +460,8 @@ bool ConvolutionBackpropDataDescriptorCuDnn::FindAlgoForConvDataType(const CUDA:
return false;
}

throwIfError(::cudnnSetConvolutionMathType(conv_.get(), algo_perf_.mathType));

size_t sizeInBytes = 0;
throwIfError(::cudnnGetConvolutionBackwardDataWorkspaceSize(dnnHandle.get(),
filter_desc_.get(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,11 @@ class ConvolutionBackpropDataParamsCuDnn {
class ConvolutionDescriptorsCuDnn {
public:
ConvolutionDescriptorsCuDnn(const CreationContext& context,
const Convolution::Details::ConvolutionParamsCuDnn& params);
const Convolution::Details::ConvolutionParamsCuDnn& params,
const std::vector<cudnnDataType_t> half_desc_types = {CUDNN_DATA_HALF, CUDNN_DATA_FLOAT});

cudnnDataType_t ElementType() const { return tensor_element_type_; }
cudnnDataType_t DescType() const { return conv_desc_type_; }
const CUDA::DnnTensorDescriptor& Input() const { return input_; }
const CUDA::DnnFilterDescriptor& Filter() const { return filter_; }
const CUDA::DnnTensorDescriptor& Output() const { return output_; }
Expand Down Expand Up @@ -103,11 +105,13 @@ class ConvolutionDescriptorsCuDnn {
private:
ConvolutionParamsCuDnn params_;
cudnnDataType_t tensor_element_type_;
cudnnDataType_t conv_desc_type_;
CUDA::DnnTensorDescriptor input_;
CUDA::DnnFilterDescriptor filter_;
CUDA::DnnTensorDescriptor output_;
CUDA::DnnConvolutionDescriptor conv_;
cudnnConvolutionFwdAlgoPerf_t algo_perf_;
std::vector<cudnnDataType_t> half_desc_types_;
};

/**
Expand All @@ -116,9 +120,11 @@ class ConvolutionDescriptorsCuDnn {
class ConvolutionBackpropDataDescriptorCuDnn {
public:
ConvolutionBackpropDataDescriptorCuDnn(const CreationContext& context,
const Convolution::Details::ConvolutionBackpropDataParamsCuDnn& params);
const Convolution::Details::ConvolutionBackpropDataParamsCuDnn& params,
const std::vector<cudnnDataType_t> half_desc_types = {CUDNN_DATA_HALF, CUDNN_DATA_FLOAT});

cudnnDataType_t ElementType() const { return tensor_element_type_; }
cudnnDataType_t DescType() const { return conv_desc_type_; }
const CUDA::DnnTensorDescriptor& dOutput() const { return doutput_desc_; }
const CUDA::DnnFilterDescriptor& Filter() const { return filter_desc_; }
const CUDA::DnnTensorDescriptor& dInput() const { return dinput_desc_; }
Expand Down Expand Up @@ -146,11 +152,13 @@ class ConvolutionBackpropDataDescriptorCuDnn {
private:
ConvolutionBackpropDataParamsCuDnn params_;
cudnnDataType_t tensor_element_type_;
cudnnDataType_t conv_desc_type_;
CUDA::DnnFilterDescriptor filter_desc_;
CUDA::DnnTensorDescriptor doutput_desc_;
CUDA::DnnTensorDescriptor dinput_desc_;
CUDA::DnnConvolutionDescriptor conv_;
cudnnConvolutionBwdDataAlgoPerf_t algo_perf_;
std::vector<cudnnDataType_t> half_desc_types_;
};

std::shared_ptr<CUDA::DnnTensorDescriptor> MakeFusedAddDescriptor(const ov::Shape& shape,
Expand Down
3 changes: 2 additions & 1 deletion modules/nvidia_plugin/src/ops/fused_convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ OperationBase::Ptr fusedConvolutionFactory(const CreationContext& context,
}
#endif // ENABLE_CUDNN_BACKEND_API

const auto conv_descs{std::make_shared<Convolution::Details::ConvolutionDescriptorsCuDnn>(context, params.conv_)};
const auto conv_descs{std::make_shared<Convolution::Details::ConvolutionDescriptorsCuDnn>(context, params.conv_,
std::vector<cudnnDataType_t>{CUDNN_DATA_HALF, CUDNN_DATA_FLOAT})}; // 119703: investigate whether we need HALF here
const auto bias_desc{Convolution::Details::MakeFusedAddDescriptor(params.bias_shape_, params.conv_.element_type_)};
const auto activation_desc{Convolution::Details::MakeFusedActivationDescriptor(params.activation_)};
const auto add_desc{params.add_shape_ ? Convolution::Details::MakeFusedAddDescriptor(params.add_shape_.value(),
Expand Down
9 changes: 5 additions & 4 deletions modules/nvidia_plugin/src/ops/fused_convolution_cudnn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ FusedConvolutionCuDnn::FusedConvolutionCuDnn(const CreationContext& context,
IndexCollection&& outputIds,
Convolution::Details::FusedConvolutionParams params)
: OperationCuDnn{context, node, std::move(inputIds), std::move(outputIds)},
conv_descs_{std::make_shared<Convolution::Details::ConvolutionDescriptorsCuDnn>(context, params.conv_)},
conv_descs_{std::make_shared<Convolution::Details::ConvolutionDescriptorsCuDnn>(context, params.conv_,
std::vector<cudnnDataType_t>{CUDNN_DATA_HALF, CUDNN_DATA_FLOAT})}, // 119703: investigate whether we need HALF here
bias_desc_{Convolution::Details::MakeFusedAddDescriptor(params.bias_shape_, params.conv_.element_type_)},
add_desc_{params.add_shape_ ? Convolution::Details::MakeFusedAddDescriptor(params.add_shape_.value(),
params.conv_.element_type_)
Expand Down Expand Up @@ -65,18 +66,18 @@ void FusedConvolutionCuDnn::Execute(const InferenceRequestContext& context,
cudnnTensorDescriptor_t zTensorDesc;
const void* zTensorIn = nullptr;
if (includesOnlyBiasAdd) {
alpha2 = &CUDA::NumericConst<CUDA::constants::zero>(conv_descs_->ElementType());
alpha2 = &CUDA::NumericConst<CUDA::constants::zero>(conv_descs_->DescType());
zTensorDesc = conv_descs_->Output().get();
zTensorIn = outputs[ArgIndices::output].get();
} else {
alpha2 = &CUDA::NumericConst<CUDA::constants::one>(conv_descs_->ElementType());
alpha2 = &CUDA::NumericConst<CUDA::constants::one>(conv_descs_->DescType());
zTensorDesc = add_desc_->get();
zTensorIn = inputs[ArgIndices::add].get();
}

throwIfError(
::cudnnConvolutionBiasActivationForward(dnnHandle.get(),
&CUDA::NumericConst<CUDA::constants::one>(conv_descs_->ElementType()),
&CUDA::NumericConst<CUDA::constants::one>(conv_descs_->DescType()),
conv_descs_->Input().get(),
inputs[ArgIndices::input].get(),
conv_descs_->Filter().get(),
Expand Down
2 changes: 2 additions & 0 deletions modules/nvidia_plugin/tests/functional/skip_tests_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ std::vector<std::string> disabledTestPatterns() {
R"(.*InferRequestIOBBlobTest.*secondCallGetOutputAfterInferSync.*)",
// Old API cannot deallocate tensor
R"(.*InferRequestIOBBlobTest.*canProcessDeallocatedOutputBlobAfterGetAndSetBlob.*)",
// 119703
R"(.*smoke_GroupConvolutionBias(Add|AddAdd)_2D_ExplicitPaddingSymmetric2.*FP16*.*)",
};

#ifdef _WIN32
Expand Down

0 comments on commit 3f8dcfe

Please sign in to comment.