From c12286fe5e90ac0e234c0f8b0846ed926ff03506 Mon Sep 17 00:00:00 2001 From: "Nickolay V. Shmyrev" Date: Sat, 7 Oct 2023 15:24:57 +0300 Subject: [PATCH] Proper convolution mode for fast GPU processing (#350) --- sherpa-onnx/csrc/session.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sherpa-onnx/csrc/session.cc b/sherpa-onnx/csrc/session.cc index fe747740f..e16fdaf70 100644 --- a/sherpa-onnx/csrc/session.cc +++ b/sherpa-onnx/csrc/session.cc @@ -25,6 +25,11 @@ static Ort::SessionOptions GetSessionOptionsImpl(int32_t num_threads, sess_opts.SetIntraOpNumThreads(num_threads); sess_opts.SetInterOpNumThreads(num_threads); + // Other possible options + // sess_opts.SetGraphOptimizationLevel(ORT_ENABLE_EXTENDED); + // sess_opts.SetLogSeverityLevel(ORT_LOGGING_LEVEL_VERBOSE); + // sess_opts.EnableProfiling("profile"); + switch (p) { case Provider::kCPU: break; // nothing to do for the CPU provider @@ -36,6 +41,8 @@ static Ort::SessionOptions GetSessionOptionsImpl(int32_t num_threads, // The CUDA provider is available, proceed with setting the options OrtCUDAProviderOptions options; options.device_id = 0; + // Default OrtCudnnConvAlgoSearchExhaustive is extremely slow + options.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchHeuristic; // set more options on need sess_opts.AppendExecutionProvider_CUDA(options); } else {