Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Consistent OOB behaviour for wgpu #296

Merged
merged 7 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 36 additions & 3 deletions crates/cubecl-core/src/ir/operation.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
use std::fmt::Display;

use crate::prelude::AtomicOp;

use super::{Branch, CoopMma, Item, Plane, Select, Synchronization, Variable};
use super::{Branch, CoopMma, Item, Plane, Scope, Select, Synchronization, Variable};
use crate::{
cpa,
ir::{Elem, UIntKind},
prelude::AtomicOp,
};
use serde::{Deserialize, Serialize};

/// All operations that can be used in a GPU compute shader.
Expand Down Expand Up @@ -358,6 +361,36 @@ pub struct FmaOperator {
pub c: Variable,
}

#[allow(missing_docs)]
pub struct CheckedIndexAssign {
pub lhs: Variable,
pub rhs: Variable,
pub out: Variable,
}

impl CheckedIndexAssign {
#[allow(missing_docs)]
pub fn expand(self, scope: &mut Scope) {
let lhs = self.lhs;
let rhs = self.rhs;
let out = self.out;
let array_len = scope.create_local(Item::new(Elem::UInt(UIntKind::U32)));
let inside_bound = scope.create_local(Item::new(Elem::Bool));

if out.has_buffer_length() {
cpa!(scope, array_len = buffer_len(out));
} else {
cpa!(scope, array_len = len(out));
}

cpa!(scope, inside_bound = lhs < array_len);

cpa!(scope, if(inside_bound).then(|scope| {
cpa!(scope, unchecked(out[lhs]) = rhs);
}));
}
}

impl From<Operator> for Operation {
fn from(val: Operator) -> Self {
Operation::Operator(val)
Expand Down
16 changes: 16 additions & 0 deletions crates/cubecl-core/src/ir/variable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,22 @@ impl Variable {
)
}

pub fn has_length(&self) -> bool {
matches!(
self.kind,
VariableKind::GlobalInputArray { .. }
| VariableKind::GlobalOutputArray { .. }
| VariableKind::Slice { .. }
)
}

pub fn has_buffer_length(&self) -> bool {
matches!(
self.kind,
VariableKind::GlobalInputArray { .. } | VariableKind::GlobalOutputArray { .. }
)
}

/// Determines if the value is a constant with the specified value (converted if necessary)
pub fn is_constant(&self, value: i64) -> bool {
match self.kind {
Expand Down
56 changes: 56 additions & 0 deletions crates/cubecl-core/src/runtime_tests/index.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
use crate as cubecl;

use cubecl::prelude::*;

#[cube(launch)]
pub fn kernel_assign<F: Float>(output: &mut Array<F>) {
if UNIT_POS == 0 {
let item = F::new(5.0);
// Assign normally.
output[0] = item;

// out of bounds write should not show up in the array.
output[2] = F::new(10.0);

// out of bounds read should be read as 0.
output[1] = output[2];
}
}

pub fn test_kernel_index_scalar<R: Runtime, F: Float + CubeElement>(
client: ComputeClient<R::Server, R::Channel>,
) {
let handle = client.create(F::as_bytes(&[F::new(0.0), F::new(1.0), F::new(123.0)]));
let handle_slice = handle.clone().offset_end(1);
let vectorization = 1;

kernel_assign::launch::<F, R>(
&client,
CubeCount::Static(1, 1, 1),
CubeDim::default(),
unsafe { ArrayArg::from_raw_parts::<F>(&handle_slice, 3, vectorization) },
);

let actual = client.read_one(handle.binding());
let actual = F::from_bytes(&actual);

assert_eq!(actual[0], F::new(5.0));
assert_eq!(actual[1], F::new(0.0));
assert_eq!(actual[2], F::new(123.0));
}

#[allow(missing_docs)]
#[macro_export]
macro_rules! testgen_index {
() => {
use super::*;

#[test]
fn test_assign_index() {
let client = TestRuntime::client(&Default::default());
cubecl_core::runtime_tests::index::test_kernel_index_scalar::<TestRuntime, FloatType>(
client,
);
}
};
}
2 changes: 2 additions & 0 deletions crates/cubecl-core/src/runtime_tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ pub mod cmma;
pub mod const_match;
pub mod constants;
pub mod different_rank;
pub mod index;
pub mod launch;
pub mod metadata;
pub mod plane;
Expand All @@ -23,6 +24,7 @@ macro_rules! testgen_all {
type IntType = i32;
type UintType = u32;

cubecl_core::testgen_index!();
cubecl_core::testgen_assign!();
cubecl_core::testgen_branch!();
cubecl_core::testgen_const_match!();
Expand Down
54 changes: 4 additions & 50 deletions crates/cubecl-cpp/src/shared/base.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use std::hash::Hash;
use std::{collections::HashSet, fmt::Debug, num::NonZero};

use cubecl_core::ir::CheckedIndexAssign;
use cubecl_core::{
cpa,
ir::{self as gpu},
prelude::CubePrimitive,
Compiler, Feature,
Expand Down Expand Up @@ -522,14 +522,14 @@ impl<D: Dialect> CppCompiler<D> {
out: self.compile_variable(out),
}),
gpu::Operator::Index(op) => {
if matches!(self.strategy, ExecutionMode::Checked) && has_length(&op.lhs) {
if matches!(self.strategy, ExecutionMode::Checked) && op.lhs.has_length() {
let lhs = op.lhs;
let rhs = op.rhs;
let array_len = scope.create_local(gpu::Item::new(u32::as_elem()));

instructions.extend(self.compile_scope(scope));

let length = match has_buffer_length(&lhs) {
let length = match lhs.has_buffer_length() {
true => gpu::Metadata::BufferLength { var: lhs },
false => gpu::Metadata::Length { var: lhs },
};
Expand All @@ -550,7 +550,7 @@ impl<D: Dialect> CppCompiler<D> {
}
gpu::Operator::IndexAssign(op) => {
if let ExecutionMode::Checked = self.strategy {
if has_length(&out) {
if out.has_length() {
CheckedIndexAssign {
lhs: op.lhs,
rhs: op.rhs,
Expand Down Expand Up @@ -972,52 +972,6 @@ impl<D: Dialect> CppCompiler<D> {
}
}

#[allow(missing_docs)]
struct CheckedIndexAssign {
pub lhs: gpu::Variable,
pub rhs: gpu::Variable,
pub out: gpu::Variable,
}

impl CheckedIndexAssign {
#[allow(missing_docs)]
fn expand(self, scope: &mut gpu::Scope) {
let lhs = self.lhs;
let rhs = self.rhs;
let out = self.out;
let array_len = scope.create_local(gpu::Item::new(u32::as_elem()));
let inside_bound = scope.create_local(gpu::Item::new(gpu::Elem::Bool));

if has_buffer_length(&out) {
cpa!(scope, array_len = buffer_len(out));
} else {
cpa!(scope, array_len = len(out));
}

cpa!(scope, inside_bound = lhs < array_len);

cpa!(scope, if(inside_bound).then(|scope| {
cpa!(scope, unchecked(out[lhs]) = rhs);
}));
}
}

fn has_length(var: &gpu::Variable) -> bool {
matches!(
var.kind,
gpu::VariableKind::GlobalInputArray { .. }
| gpu::VariableKind::GlobalOutputArray { .. }
| gpu::VariableKind::Slice { .. }
)
}

fn has_buffer_length(var: &gpu::Variable) -> bool {
matches!(
var.kind,
gpu::VariableKind::GlobalInputArray { .. } | gpu::VariableKind::GlobalOutputArray { .. }
)
}

pub fn register_supported_types(props: &mut DeviceProperties<Feature>) {
let supported_types = [
gpu::Elem::UInt(gpu::UIntKind::U8),
Expand Down
1 change: 0 additions & 1 deletion crates/cubecl-wgpu/src/compiler/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ pub trait WgpuCompiler: Compiler {
fn create_pipeline(
server: &mut WgpuServer<Self>,
kernel: CompiledKernel<Self>,
mode: ExecutionMode,
) -> Arc<ComputePipeline>;

#[allow(async_fn_in_trait)]
Expand Down
32 changes: 14 additions & 18 deletions crates/cubecl-wgpu/src/compiler/spirv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ impl WgpuCompiler for SpirvCompiler<GLCompute> {
fn create_pipeline(
server: &mut WgpuServer<Self>,
kernel: CompiledKernel<Self>,
mode: ExecutionMode,
) -> Arc<ComputePipeline> {
let (module, layout) = kernel
.repr
Expand Down Expand Up @@ -102,23 +101,20 @@ impl WgpuCompiler for SpirvCompiler<GLCompute> {
})
.unwrap_or_else(|| {
let source = &kernel.source;
let module = match mode {
ExecutionMode::Checked => {
server
.device
.create_shader_module(wgpu::ShaderModuleDescriptor {
label: None,
source: wgpu::ShaderSource::Wgsl(Cow::Borrowed(source)),
})
}
ExecutionMode::Unchecked => unsafe {
server
.device
.create_shader_module_unchecked(wgpu::ShaderModuleDescriptor {
label: None,
source: wgpu::ShaderSource::Wgsl(Cow::Borrowed(source)),
})
},
// Cube always in principle uses unchecked modules. Certain operations like
// indexing are instead checked by cube. The WebGPU specification only makes
// incredibly loose gaurantees that Cube can't rely on. Additionally, kernels
// can opt in/out per operation whether checks should be performed which can be faster.
//
// SAFETY: Cube gaurantees OOB safety when launching in checked mode. Launching in unchecked mode
// is only availble through the use of unsafe code.
let module = unsafe {
server
.device
.create_shader_module_unchecked(wgpu::ShaderModuleDescriptor {
label: None,
source: wgpu::ShaderSource::Wgsl(Cow::Borrowed(source)),
})
};
(module, None)
});
Expand Down
Loading