tracel-ai · nathanielsimard · Nov 25, 2024 · Nov 22, 2024 · Nov 22, 2024 · Nov 22, 2024
diff --git a/crates/cubecl-core/src/ir/operation.rs b/crates/cubecl-core/src/ir/operation.rs
@@ -1,8 +1,11 @@
 use std::fmt::Display;
 
-use crate::prelude::AtomicOp;
-
-use super::{Branch, CoopMma, Item, Plane, Select, Synchronization, Variable};
+use super::{Branch, CoopMma, Item, Plane, Scope, Select, Synchronization, Variable};
+use crate::{
+    cpa,
+    ir::{Elem, UIntKind},
+    prelude::AtomicOp,
+};
 use serde::{Deserialize, Serialize};
 
 /// All operations that can be used in a GPU compute shader.
@@ -358,6 +361,36 @@ pub struct FmaOperator {
     pub c: Variable,
 }
 
+#[allow(missing_docs)]
+pub struct CheckedIndexAssign {
+    pub lhs: Variable,
+    pub rhs: Variable,
+    pub out: Variable,
+}
+
+impl CheckedIndexAssign {
+    #[allow(missing_docs)]
+    pub fn expand(self, scope: &mut Scope) {
+        let lhs = self.lhs;
+        let rhs = self.rhs;
+        let out = self.out;
+        let array_len = scope.create_local(Item::new(Elem::UInt(UIntKind::U32)));
+        let inside_bound = scope.create_local(Item::new(Elem::Bool));
+
+        if out.has_buffer_length() {
+            cpa!(scope, array_len = buffer_len(out));
+        } else {
+            cpa!(scope, array_len = len(out));
+        }
+
+        cpa!(scope, inside_bound = lhs < array_len);
+
+        cpa!(scope, if(inside_bound).then(|scope| {
+            cpa!(scope, unchecked(out[lhs]) = rhs);
+        }));
+    }
+}
+
 impl From<Operator> for Operation {
     fn from(val: Operator) -> Self {
         Operation::Operator(val)

diff --git a/crates/cubecl-core/src/ir/variable.rs b/crates/cubecl-core/src/ir/variable.rs
@@ -114,6 +114,22 @@ impl Variable {
         )
     }
 
+    pub fn has_length(&self) -> bool {
+        matches!(
+            self.kind,
+            VariableKind::GlobalInputArray { .. }
+                | VariableKind::GlobalOutputArray { .. }
+                | VariableKind::Slice { .. }
+        )
+    }
+
+    pub fn has_buffer_length(&self) -> bool {
+        matches!(
+            self.kind,
+            VariableKind::GlobalInputArray { .. } | VariableKind::GlobalOutputArray { .. }
+        )
+    }
+
     /// Determines if the value is a constant with the specified value (converted if necessary)
     pub fn is_constant(&self, value: i64) -> bool {
         match self.kind {

diff --git a/crates/cubecl-core/src/runtime_tests/index.rs b/crates/cubecl-core/src/runtime_tests/index.rs
@@ -0,0 +1,56 @@
+use crate as cubecl;
+
+use cubecl::prelude::*;
+
+#[cube(launch)]
+pub fn kernel_assign<F: Float>(output: &mut Array<F>) {
+    if UNIT_POS == 0 {
+        let item = F::new(5.0);
+        // Assign normally.
+        output[0] = item;
+
+        // out of bounds write should not show up in the array.
+        output[2] = F::new(10.0);
+
+        // out of bounds read should be read as 0.
+        output[1] = output[2];
+    }
+}
+
+pub fn test_kernel_index_scalar<R: Runtime, F: Float + CubeElement>(
+    client: ComputeClient<R::Server, R::Channel>,
+) {
+    let handle = client.create(F::as_bytes(&[F::new(0.0), F::new(1.0), F::new(123.0)]));
+    let handle_slice = handle.clone().offset_end(1);
+    let vectorization = 1;
+
+    kernel_assign::launch::<F, R>(
+        &client,
+        CubeCount::Static(1, 1, 1),
+        CubeDim::default(),
+        unsafe { ArrayArg::from_raw_parts::<F>(&handle_slice, 3, vectorization) },
+    );
+
+    let actual = client.read_one(handle.binding());
+    let actual = F::from_bytes(&actual);
+
+    assert_eq!(actual[0], F::new(5.0));
+    assert_eq!(actual[1], F::new(0.0));
+    assert_eq!(actual[2], F::new(123.0));
+}
+
+#[allow(missing_docs)]
+#[macro_export]
+macro_rules! testgen_index {
+    () => {
+        use super::*;
+
+        #[test]
+        fn test_assign_index() {
+            let client = TestRuntime::client(&Default::default());
+            cubecl_core::runtime_tests::index::test_kernel_index_scalar::<TestRuntime, FloatType>(
+                client,
+            );
+        }
+    };
+}
diff --git a/crates/cubecl-core/src/runtime_tests/mod.rs b/crates/cubecl-core/src/runtime_tests/mod.rs
@@ -5,6 +5,7 @@ pub mod cmma;
 pub mod const_match;
 pub mod constants;
 pub mod different_rank;
+pub mod index;
 pub mod launch;
 pub mod metadata;
 pub mod plane;
@@ -23,6 +24,7 @@ macro_rules! testgen_all {
         type IntType = i32;
         type UintType = u32;
 
+        cubecl_core::testgen_index!();
         cubecl_core::testgen_assign!();
         cubecl_core::testgen_branch!();
         cubecl_core::testgen_const_match!();

diff --git a/crates/cubecl-cpp/src/shared/base.rs b/crates/cubecl-cpp/src/shared/base.rs
@@ -1,8 +1,8 @@
 use std::hash::Hash;
 use std::{collections::HashSet, fmt::Debug, num::NonZero};
 
+use cubecl_core::ir::CheckedIndexAssign;
 use cubecl_core::{
-    cpa,
     ir::{self as gpu},
     prelude::CubePrimitive,
     Compiler, Feature,
@@ -522,14 +522,14 @@ impl<D: Dialect> CppCompiler<D> {
                 out: self.compile_variable(out),
             }),
             gpu::Operator::Index(op) => {
-                if matches!(self.strategy, ExecutionMode::Checked) && has_length(&op.lhs) {
+                if matches!(self.strategy, ExecutionMode::Checked) && op.lhs.has_length() {
                     let lhs = op.lhs;
                     let rhs = op.rhs;
                     let array_len = scope.create_local(gpu::Item::new(u32::as_elem()));
 
                     instructions.extend(self.compile_scope(scope));
 
-                    let length = match has_buffer_length(&lhs) {
+                    let length = match lhs.has_buffer_length() {
                         true => gpu::Metadata::BufferLength { var: lhs },
                         false => gpu::Metadata::Length { var: lhs },
                     };
@@ -550,7 +550,7 @@ impl<D: Dialect> CppCompiler<D> {
             }
             gpu::Operator::IndexAssign(op) => {
                 if let ExecutionMode::Checked = self.strategy {
-                    if has_length(&out) {
+                    if out.has_length() {
                         CheckedIndexAssign {
                             lhs: op.lhs,
                             rhs: op.rhs,
@@ -972,52 +972,6 @@ impl<D: Dialect> CppCompiler<D> {
     }
 }
 
-#[allow(missing_docs)]
-struct CheckedIndexAssign {
-    pub lhs: gpu::Variable,
-    pub rhs: gpu::Variable,
-    pub out: gpu::Variable,
-}
-
-impl CheckedIndexAssign {
-    #[allow(missing_docs)]
-    fn expand(self, scope: &mut gpu::Scope) {
-        let lhs = self.lhs;
-        let rhs = self.rhs;
-        let out = self.out;
-        let array_len = scope.create_local(gpu::Item::new(u32::as_elem()));
-        let inside_bound = scope.create_local(gpu::Item::new(gpu::Elem::Bool));
-
-        if has_buffer_length(&out) {
-            cpa!(scope, array_len = buffer_len(out));
-        } else {
-            cpa!(scope, array_len = len(out));
-        }
-
-        cpa!(scope, inside_bound = lhs < array_len);
-
-        cpa!(scope, if(inside_bound).then(|scope| {
-            cpa!(scope, unchecked(out[lhs]) = rhs);
-        }));
-    }
-}
-
-fn has_length(var: &gpu::Variable) -> bool {
-    matches!(
-        var.kind,
-        gpu::VariableKind::GlobalInputArray { .. }
-            | gpu::VariableKind::GlobalOutputArray { .. }
-            | gpu::VariableKind::Slice { .. }
-    )
-}
-
-fn has_buffer_length(var: &gpu::Variable) -> bool {
-    matches!(
-        var.kind,
-        gpu::VariableKind::GlobalInputArray { .. } | gpu::VariableKind::GlobalOutputArray { .. }
-    )
-}
-
 pub fn register_supported_types(props: &mut DeviceProperties<Feature>) {
     let supported_types = [
         gpu::Elem::UInt(gpu::UIntKind::U8),

diff --git a/crates/cubecl-wgpu/src/compiler/base.rs b/crates/cubecl-wgpu/src/compiler/base.rs
@@ -18,7 +18,6 @@ pub trait WgpuCompiler: Compiler {
     fn create_pipeline(
         server: &mut WgpuServer<Self>,
         kernel: CompiledKernel<Self>,
-        mode: ExecutionMode,
     ) -> Arc<ComputePipeline>;
 
     #[allow(async_fn_in_trait)]

diff --git a/crates/cubecl-wgpu/src/compiler/spirv.rs b/crates/cubecl-wgpu/src/compiler/spirv.rs
@@ -46,7 +46,6 @@ impl WgpuCompiler for SpirvCompiler<GLCompute> {
     fn create_pipeline(
         server: &mut WgpuServer<Self>,
         kernel: CompiledKernel<Self>,
-        mode: ExecutionMode,
     ) -> Arc<ComputePipeline> {
         let (module, layout) = kernel
             .repr
@@ -102,23 +101,20 @@ impl WgpuCompiler for SpirvCompiler<GLCompute> {
             })
             .unwrap_or_else(|| {
                 let source = &kernel.source;
-                let module = match mode {
-                    ExecutionMode::Checked => {
-                        server
-                            .device
-                            .create_shader_module(wgpu::ShaderModuleDescriptor {
-                                label: None,
-                                source: wgpu::ShaderSource::Wgsl(Cow::Borrowed(source)),
-                            })
-                    }
-                    ExecutionMode::Unchecked => unsafe {
-                        server
-                            .device
-                            .create_shader_module_unchecked(wgpu::ShaderModuleDescriptor {
-                                label: None,
-                                source: wgpu::ShaderSource::Wgsl(Cow::Borrowed(source)),
-                            })
-                    },
+                // Cube always in principle uses unchecked modules. Certain operations like
+                // indexing are instead checked by cube. The WebGPU specification only makes
+                // incredibly loose gaurantees that Cube can't rely on. Additionally, kernels
+                // can opt in/out per operation whether checks should be performed which can be faster.
+                //
+                // SAFETY: Cube gaurantees OOB safety when launching in checked mode. Launching in unchecked mode
+                // is only availble through the use of unsafe code.
+                let module = unsafe {
+                    server
+                        .device
+                        .create_shader_module_unchecked(wgpu::ShaderModuleDescriptor {
+                            label: None,
+                            source: wgpu::ShaderSource::Wgsl(Cow::Borrowed(source)),
+                        })
                 };
                 (module, None)
             });