From aac546d820f42cb2332100b5adff88d5c9fe24fd Mon Sep 17 00:00:00 2001
From: Vivek Trivedi <5340687+trivedivivek@users.noreply.github.com>
Date: Mon, 11 Nov 2024 13:54:49 -0800
Subject: [PATCH] [ET-VK] Reduced int precision for texture coordinates in
 conv2d_pw op, to reduce shader register pressure.

This diff reduces the precision of texture coordinates in the conv2d_pw op in Executorch's Vulkan backend to reduce shader register pressure. The changes made in the code include reducing the precision of the z coordinate in the loop and using uint16_t instead of int for the loop counter.

Differential Revision: [D64767415](https://our.internmc.facebook.com/intern/diff/D64767415/)

[ghstack-poisoned]
---
 backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl
index fedbdb0b5b..b1950f970e 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl
+++ b/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl
@@ -77,15 +77,16 @@ void main() {
     sum[i] = sum[0];
   }
 
+  int z4 = 0;
   // Since the kernel is 1x1, we only have to loop over the depth dimension.
-  for (int z = 0, z4 = 0; z < in_group_size; z += 4, ++z4) {
+  for (uint16_t z = uint16_t(0); z < uint16_t(in_group_size); z += uint16_t(4), ++z4) {
     // During prepacking, the weight tensor has been permuted so that the
     // channel (IC) dim is along the x-axis, and the batch (OC) dim is along
     // the z-axis.
-    const vec4 ktex_0 = texelFetch(t_kernel, u16vec2(z + 0, gpos.z), 0);
-    const vec4 ktex_1 = texelFetch(t_kernel, u16vec2(z + 1, gpos.z), 0);
-    const vec4 ktex_2 = texelFetch(t_kernel, u16vec2(z + 2, gpos.z), 0);
-    const vec4 ktex_3 = texelFetch(t_kernel, u16vec2(z + 3, gpos.z), 0);
+    const vec4 ktex_0 = texelFetchOffset(t_kernel, u16vec2(z, gpos.z), 0, u16vec2(0, 0));
+    const vec4 ktex_1 = texelFetchOffset(t_kernel, u16vec2(z, gpos.z), 0, u16vec2(1, 0));
+    const vec4 ktex_2 = texelFetchOffset(t_kernel, u16vec2(z, gpos.z), 0, u16vec2(2, 0));
+    const vec4 ktex_3 = texelFetchOffset(t_kernel, u16vec2(z, gpos.z), 0, u16vec2(3, 0));
 
 
 #pragma unroll