Bilinear works

microsoft · Feb 14, 2024 · 75042c7 · 75042c7
1 parent 2cfe280
commit 75042c7
Show file tree

Hide file tree

Showing 2 changed files with 178 additions and 230 deletions.
diff --git a/onnxruntime/core/providers/cpu/tensor/upsample_antialias.h b/onnxruntime/core/providers/cpu/tensor/upsample_antialias.h
@@ -257,6 +257,8 @@ void ComputeInterpolationAtLevel1(int64_t num_channels, int64_t input_height, in
                                   concurrency::ThreadPool* tp) {
   const uint8_t* clip8_lookups = &p.GetClip8LookupTable()[640];
 
+  std::cout << "L1: ";
+
   concurrency::ThreadPool::TrySimpleParallelFor(
       tp, narrow<std::ptrdiff_t>(num_channels),
       [&](std::ptrdiff_t c) {
@@ -286,6 +288,8 @@ void ComputeInterpolationAtLevel1(int64_t num_channels, int64_t input_height, in
               output += (*Xdata_offset++) * (*weight_coeff++);
             }
 
+            std::cout << " " << output;
+
             if constexpr (is_8bit_v<InputType>) {
               *Ydata_offset++ = static_cast<InputType>(clip8_lookups[output >> 22]);
             } else if constexpr (std::is_same<InputType, int32_t>::value) {
@@ -296,6 +300,8 @@ void ComputeInterpolationAtLevel1(int64_t num_channels, int64_t input_height, in
           }
         }
       });
+
+  std::cout << std::endl;
 }
 
 /**
@@ -322,6 +328,8 @@ void ComputeInterpolationAtLevel2(int64_t num_channels, int64_t input_height, in
                                   const FilterParamsAntiAlias<AccumulateType>& p,
                                   const FilterParamsBaseAntiAlias<AccumulateType>& p_dim,
                                   concurrency::ThreadPool* tp) {
+  std::cout << "L2: ";
+
   const uint8_t* clip8_lookups = &p.GetClip8LookupTable()[640];
   // This condition is set for higher performance.
   // Observed that TrySimpleParallelFor in dim num_channels is always have higher efficiency, so I would rather
@@ -357,6 +365,9 @@ void ComputeInterpolationAtLevel2(int64_t num_channels, int64_t input_height, in
                 output += *Xdata_offset * (*weight_coeff_start++);
                 Xdata_offset += output_width;
               }
+
+              std::cout << " " << output;
+
               if constexpr (is_8bit_v<InputType>) {
                 *Ydata_offset++ = static_cast<InputType>(clip8_lookups[output >> 22]);
               } else if constexpr (std::is_same<InputType, int32_t>::value) {
@@ -403,6 +414,9 @@ void ComputeInterpolationAtLevel2(int64_t num_channels, int64_t input_height, in
                 output += *Xdata_offset * (*weight_coeff_start++);
                 Xdata_offset += output_width;
               }
+
+              std::cout << " " << output;
+
               if constexpr (is_8bit_v<InputType>) {
                 *Ydata_offset++ = static_cast<InputType>(clip8_lookups[output >> 22]);
               } else if constexpr (std::is_same<InputType, int32_t>::value) {
@@ -414,6 +428,7 @@ void ComputeInterpolationAtLevel2(int64_t num_channels, int64_t input_height, in
           }
         });
   }
+  std::cout << std::endl;
 }
 
 template <typename InputType, typename AccumulateType>