From c12286fe5e90ac0e234c0f8b0846ed926ff03506 Mon Sep 17 00:00:00 2001
From: "Nickolay V. Shmyrev" <nshmyrev@gmail.com>
Date: Sat, 7 Oct 2023 15:24:57 +0300
Subject: [PATCH] Proper convolution mode for fast GPU processing (#350)

---
 sherpa-onnx/csrc/session.cc | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sherpa-onnx/csrc/session.cc b/sherpa-onnx/csrc/session.cc
index fe747740f..e16fdaf70 100644
--- a/sherpa-onnx/csrc/session.cc
+++ b/sherpa-onnx/csrc/session.cc
@@ -25,6 +25,11 @@ static Ort::SessionOptions GetSessionOptionsImpl(int32_t num_threads,
   sess_opts.SetIntraOpNumThreads(num_threads);
   sess_opts.SetInterOpNumThreads(num_threads);
 
+  // Other possible options
+  // sess_opts.SetGraphOptimizationLevel(ORT_ENABLE_EXTENDED);
+  // sess_opts.SetLogSeverityLevel(ORT_LOGGING_LEVEL_VERBOSE);
+  // sess_opts.EnableProfiling("profile");
+
   switch (p) {
     case Provider::kCPU:
       break;  // nothing to do for the CPU provider
@@ -36,6 +41,8 @@ static Ort::SessionOptions GetSessionOptionsImpl(int32_t num_threads,
         // The CUDA provider is available, proceed with setting the options
         OrtCUDAProviderOptions options;
         options.device_id = 0;
+        // Default OrtCudnnConvAlgoSearchExhaustive is extremely slow
+        options.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchHeuristic;
         // set more options on need
         sess_opts.AppendExecutionProvider_CUDA(options);
       } else {