diff --git a/CHANGELOG.md b/CHANGELOG.md index d81c59e881..63894432f4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -49,7 +49,7 @@ Bottom level categories: #### Metal -- Support 32-bit floating-point atomic operations in shaders. It requires Metal 3.0 or later with Apple 7, 8, 9 or Mac 2. By @AsherJingkongChen in [#6234](https://github.com/gfx-rs/wgpu/pull/6234). +- Support some 32-bit floating-point atomic operations in shaders. It requires Metal 3.0 or later with Apple 7, 8, 9 or Mac 2. By @AsherJingkongChen in [#6234](https://github.com/gfx-rs/wgpu/pull/6234) ### Bug Fixes diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs index 33f892aa45..0ed74bbcda 100644 --- a/naga/src/back/spv/block.rs +++ b/naga/src/back/spv/block.rs @@ -2455,51 +2455,100 @@ impl<'w> BlockContext<'w> { let value_inner = self.fun_info[value].ty.inner_with(&self.ir_module.types); let instruction = match *fun { - crate::AtomicFunction::Add => Instruction::atomic_binary( - spirv::Op::AtomicIAdd, - result_type_id, - id, - pointer_id, - scope_constant_id, - semantics_id, - value_id, - ), - crate::AtomicFunction::Subtract => Instruction::atomic_binary( - spirv::Op::AtomicISub, - result_type_id, - id, - pointer_id, - scope_constant_id, - semantics_id, - value_id, - ), - crate::AtomicFunction::And => Instruction::atomic_binary( - spirv::Op::AtomicAnd, - result_type_id, - id, - pointer_id, - scope_constant_id, - semantics_id, - value_id, - ), - crate::AtomicFunction::InclusiveOr => Instruction::atomic_binary( - spirv::Op::AtomicOr, - result_type_id, - id, - pointer_id, - scope_constant_id, - semantics_id, - value_id, - ), - crate::AtomicFunction::ExclusiveOr => Instruction::atomic_binary( - spirv::Op::AtomicXor, - result_type_id, - id, - pointer_id, - scope_constant_id, - semantics_id, - value_id, - ), + crate::AtomicFunction::Add => { + let spirv_op = match *value_inner { + crate::TypeInner::Scalar(crate::Scalar { + kind: crate::ScalarKind::Sint | crate::ScalarKind::Uint, + width: _, + }) => spirv::Op::AtomicIAdd, + crate::TypeInner::Scalar(crate::Scalar { + kind: crate::ScalarKind::Float, + width: _, + }) => spirv::Op::AtomicFAddEXT, + _ => unimplemented!(), + }; + Instruction::atomic_binary( + spirv_op, + result_type_id, + id, + pointer_id, + scope_constant_id, + semantics_id, + value_id, + ) + } + crate::AtomicFunction::Subtract => { + let spirv_op = match *value_inner { + crate::TypeInner::Scalar(crate::Scalar { + kind: crate::ScalarKind::Sint | crate::ScalarKind::Uint, + width: _, + }) => spirv::Op::AtomicISub, + _ => unimplemented!(), + }; + Instruction::atomic_binary( + spirv_op, + result_type_id, + id, + pointer_id, + scope_constant_id, + semantics_id, + value_id, + ) + } + crate::AtomicFunction::And => { + let spirv_op = match *value_inner { + crate::TypeInner::Scalar(crate::Scalar { + kind: crate::ScalarKind::Sint | crate::ScalarKind::Uint, + width: _, + }) => spirv::Op::AtomicAnd, + _ => unimplemented!(), + }; + Instruction::atomic_binary( + spirv_op, + result_type_id, + id, + pointer_id, + scope_constant_id, + semantics_id, + value_id, + ) + } + crate::AtomicFunction::InclusiveOr => { + let spirv_op = match *value_inner { + crate::TypeInner::Scalar(crate::Scalar { + kind: crate::ScalarKind::Sint | crate::ScalarKind::Uint, + width: _, + }) => spirv::Op::AtomicOr, + _ => unimplemented!(), + }; + Instruction::atomic_binary( + spirv_op, + result_type_id, + id, + pointer_id, + scope_constant_id, + semantics_id, + value_id, + ) + } + crate::AtomicFunction::ExclusiveOr => { + let spirv_op = match *value_inner { + crate::TypeInner::Scalar(crate::Scalar { + kind: crate::ScalarKind::Sint | crate::ScalarKind::Uint, + width: _, + }) => spirv::Op::AtomicXor, + _ => unimplemented!(), + }; + Instruction::atomic_binary( + spirv_op, + result_type_id, + id, + pointer_id, + scope_constant_id, + semantics_id, + value_id, + ) + } crate::AtomicFunction::Min => { let spirv_op = match *value_inner { crate::TypeInner::Scalar(crate::Scalar { diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs index d1c1e82a20..5535824c78 100644 --- a/naga/src/back/spv/writer.rs +++ b/naga/src/back/spv/writer.rs @@ -881,6 +881,16 @@ impl Writer { crate::TypeInner::Atomic(crate::Scalar { width: 8, kind: _ }) => { self.require_any("64 bit integer atomics", &[spirv::Capability::Int64Atomics])?; } + crate::TypeInner::Atomic(crate::Scalar { + width: 4, + kind: crate::ScalarKind::Float, + }) => { + self.require_any( + "32 bit floating-point atomics", + &[spirv::Capability::AtomicFloat32AddEXT], + )?; + self.use_extension("SPV_EXT_shader_atomic_float_add"); + } _ => {} } Ok(()) diff --git a/naga/src/valid/function.rs b/naga/src/valid/function.rs index 2b859b6c9c..d162830b4a 100644 --- a/naga/src/valid/function.rs +++ b/naga/src/valid/function.rs @@ -455,12 +455,11 @@ impl super::Validator { // that that `Atomic` type has a permitted scalar width. if let crate::ScalarKind::Float = pointer_scalar.kind { // `Capabilities::SHADER_FLT32_ATOMIC` enables 32-bit floating-point - // atomic operations including `Add`, `Subtract`, and `Exchange` + // atomic operations including `Add` and `Exchange` // in storage address space. if !matches!( *fun, crate::AtomicFunction::Add - | crate::AtomicFunction::Subtract | crate::AtomicFunction::Exchange { compare: _ } ) { log::error!("Float32 atomic operation {:?} is not supported", fun); diff --git a/naga/src/valid/type.rs b/naga/src/valid/type.rs index 3cff36f386..38cad391f5 100644 --- a/naga/src/valid/type.rs +++ b/naga/src/valid/type.rs @@ -384,7 +384,7 @@ impl super::Validator { if width == 4 { if !self .capabilities - .intersects(Capabilities::SHADER_FLT32_ATOMIC) + .contains(Capabilities::SHADER_FLT32_ATOMIC) { return Err(TypeError::MissingCapability( Capabilities::SHADER_FLT32_ATOMIC, diff --git a/naga/tests/in/atomicOps-flt32.param.ron b/naga/tests/in/atomicOps-flt32.param.ron index e8cdbb4f04..13919f13ef 100644 --- a/naga/tests/in/atomicOps-flt32.param.ron +++ b/naga/tests/in/atomicOps-flt32.param.ron @@ -1,5 +1,9 @@ ( god_mode: true, + spv: ( + version: (1, 1), + capabilities: [ AtomicFloat32AddEXT ], + ), msl: ( lang_version: (3, 0), per_entry_point_map: {}, diff --git a/naga/tests/in/atomicOps-flt32.wgsl b/naga/tests/in/atomicOps-flt32.wgsl index 05d16c70ed..8302bba910 100644 --- a/naga/tests/in/atomicOps-flt32.wgsl +++ b/naga/tests/in/atomicOps-flt32.wgsl @@ -34,13 +34,6 @@ fn cs_main(@builtin(local_invocation_id) id: vec3) { workgroupBarrier(); - atomicSub(&storage_atomic_scalar, 1.0); - atomicSub(&storage_atomic_arr[1], 1.0); - atomicSub(&storage_struct.atomic_scalar, 1.0); - atomicSub(&storage_struct.atomic_arr[1], 1.0); - - workgroupBarrier(); - atomicExchange(&storage_atomic_scalar, 1.0); atomicExchange(&storage_atomic_arr[1], 1.0); atomicExchange(&storage_struct.atomic_scalar, 1.0); diff --git a/naga/tests/out/msl/atomicOps-flt32.msl b/naga/tests/out/msl/atomicOps-flt32.msl index 87661917dc..9d85f5f558 100644 --- a/naga/tests/out/msl/atomicOps-flt32.msl +++ b/naga/tests/out/msl/atomicOps-flt32.msl @@ -35,14 +35,9 @@ kernel void cs_main( float _e35 = metal::atomic_fetch_add_explicit(&storage_struct.atomic_scalar, 1.0, metal::memory_order_relaxed); float _e40 = metal::atomic_fetch_add_explicit(&storage_struct.atomic_arr.inner[1], 1.0, metal::memory_order_relaxed); metal::threadgroup_barrier(metal::mem_flags::mem_threadgroup); - float _e43 = metal::atomic_fetch_sub_explicit(&storage_atomic_scalar, 1.0, metal::memory_order_relaxed); - float _e47 = metal::atomic_fetch_sub_explicit(&storage_atomic_arr.inner[1], 1.0, metal::memory_order_relaxed); - float _e51 = metal::atomic_fetch_sub_explicit(&storage_struct.atomic_scalar, 1.0, metal::memory_order_relaxed); - float _e56 = metal::atomic_fetch_sub_explicit(&storage_struct.atomic_arr.inner[1], 1.0, metal::memory_order_relaxed); - metal::threadgroup_barrier(metal::mem_flags::mem_threadgroup); - float _e59 = metal::atomic_exchange_explicit(&storage_atomic_scalar, 1.0, metal::memory_order_relaxed); - float _e63 = metal::atomic_exchange_explicit(&storage_atomic_arr.inner[1], 1.0, metal::memory_order_relaxed); - float _e67 = metal::atomic_exchange_explicit(&storage_struct.atomic_scalar, 1.0, metal::memory_order_relaxed); - float _e72 = metal::atomic_exchange_explicit(&storage_struct.atomic_arr.inner[1], 1.0, metal::memory_order_relaxed); + float _e43 = metal::atomic_exchange_explicit(&storage_atomic_scalar, 1.0, metal::memory_order_relaxed); + float _e47 = metal::atomic_exchange_explicit(&storage_atomic_arr.inner[1], 1.0, metal::memory_order_relaxed); + float _e51 = metal::atomic_exchange_explicit(&storage_struct.atomic_scalar, 1.0, metal::memory_order_relaxed); + float _e56 = metal::atomic_exchange_explicit(&storage_struct.atomic_arr.inner[1], 1.0, metal::memory_order_relaxed); return; } diff --git a/naga/tests/out/spv/atomicOps-flt32.spvasm b/naga/tests/out/spv/atomicOps-flt32.spvasm new file mode 100644 index 0000000000..580c15028c --- /dev/null +++ b/naga/tests/out/spv/atomicOps-flt32.spvasm @@ -0,0 +1,98 @@ +; SPIR-V +; Version: 1.1 +; Generator: rspirv +; Bound: 62 +OpCapability Shader +OpCapability AtomicFloat32AddEXT +OpExtension "SPV_KHR_storage_buffer_storage_class" +OpExtension "SPV_EXT_shader_atomic_float_add" +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %22 "cs_main" %19 +OpExecutionMode %22 LocalSize 2 1 1 +OpDecorate %4 ArrayStride 4 +OpMemberDecorate %7 0 Offset 0 +OpMemberDecorate %7 1 Offset 4 +OpDecorate %9 DescriptorSet 0 +OpDecorate %9 Binding 0 +OpDecorate %10 Block +OpMemberDecorate %10 0 Offset 0 +OpDecorate %12 DescriptorSet 0 +OpDecorate %12 Binding 1 +OpDecorate %13 Block +OpMemberDecorate %13 0 Offset 0 +OpDecorate %15 DescriptorSet 0 +OpDecorate %15 Binding 2 +OpDecorate %16 Block +OpMemberDecorate %16 0 Offset 0 +OpDecorate %19 BuiltIn LocalInvocationId +%2 = OpTypeVoid +%3 = OpTypeFloat 32 +%6 = OpTypeInt 32 0 +%5 = OpConstant %6 2 +%4 = OpTypeArray %3 %5 +%7 = OpTypeStruct %3 %4 +%8 = OpTypeVector %6 3 +%10 = OpTypeStruct %3 +%11 = OpTypePointer StorageBuffer %10 +%9 = OpVariable %11 StorageBuffer +%13 = OpTypeStruct %4 +%14 = OpTypePointer StorageBuffer %13 +%12 = OpVariable %14 StorageBuffer +%16 = OpTypeStruct %7 +%17 = OpTypePointer StorageBuffer %16 +%15 = OpVariable %17 StorageBuffer +%20 = OpTypePointer Input %8 +%19 = OpVariable %20 Input +%23 = OpTypeFunction %2 +%24 = OpTypePointer StorageBuffer %3 +%25 = OpConstant %6 0 +%27 = OpTypePointer StorageBuffer %4 +%29 = OpTypePointer StorageBuffer %7 +%31 = OpConstant %3 1.0 +%34 = OpTypeInt 32 1 +%33 = OpConstant %34 1 +%35 = OpConstant %6 64 +%36 = OpConstant %6 1 +%40 = OpConstant %6 264 +%22 = OpFunction %2 None %23 +%18 = OpLabel +%21 = OpLoad %8 %19 +%26 = OpAccessChain %24 %9 %25 +%28 = OpAccessChain %27 %12 %25 +%30 = OpAccessChain %29 %15 %25 +OpBranch %32 +%32 = OpLabel +OpAtomicStore %26 %33 %35 %31 +%37 = OpAccessChain %24 %28 %36 +OpAtomicStore %37 %33 %35 %31 +%38 = OpAccessChain %24 %30 %25 +OpAtomicStore %38 %33 %35 %31 +%39 = OpAccessChain %24 %30 %36 %36 +OpAtomicStore %39 %33 %35 %31 +OpControlBarrier %5 %5 %40 +%41 = OpAtomicLoad %3 %26 %33 %35 +%42 = OpAccessChain %24 %28 %36 +%43 = OpAtomicLoad %3 %42 %33 %35 +%44 = OpAccessChain %24 %30 %25 +%45 = OpAtomicLoad %3 %44 %33 %35 +%46 = OpAccessChain %24 %30 %36 %36 +%47 = OpAtomicLoad %3 %46 %33 %35 +OpControlBarrier %5 %5 %40 +%48 = OpAtomicFAddEXT %3 %26 %33 %35 %31 +%50 = OpAccessChain %24 %28 %36 +%49 = OpAtomicFAddEXT %3 %50 %33 %35 %31 +%52 = OpAccessChain %24 %30 %25 +%51 = OpAtomicFAddEXT %3 %52 %33 %35 %31 +%54 = OpAccessChain %24 %30 %36 %36 +%53 = OpAtomicFAddEXT %3 %54 %33 %35 %31 +OpControlBarrier %5 %5 %40 +%55 = OpAtomicExchange %3 %26 %33 %35 %31 +%57 = OpAccessChain %24 %28 %36 +%56 = OpAtomicExchange %3 %57 %33 %35 %31 +%59 = OpAccessChain %24 %30 %25 +%58 = OpAtomicExchange %3 %59 %33 %35 %31 +%61 = OpAccessChain %24 %30 %36 %36 +%60 = OpAtomicExchange %3 %61 %33 %35 %31 +OpReturn +OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/wgsl/atomicOps-flt32.wgsl b/naga/tests/out/wgsl/atomicOps-flt32.wgsl index b29f5f4f6d..df44839c36 100644 --- a/naga/tests/out/wgsl/atomicOps-flt32.wgsl +++ b/naga/tests/out/wgsl/atomicOps-flt32.wgsl @@ -27,14 +27,9 @@ fn cs_main(@builtin(local_invocation_id) id: vec3) { let _e35 = atomicAdd((&storage_struct.atomic_scalar), 1f); let _e40 = atomicAdd((&storage_struct.atomic_arr[1]), 1f); workgroupBarrier(); - let _e43 = atomicSub((&storage_atomic_scalar), 1f); - let _e47 = atomicSub((&storage_atomic_arr[1]), 1f); - let _e51 = atomicSub((&storage_struct.atomic_scalar), 1f); - let _e56 = atomicSub((&storage_struct.atomic_arr[1]), 1f); - workgroupBarrier(); - let _e59 = atomicExchange((&storage_atomic_scalar), 1f); - let _e63 = atomicExchange((&storage_atomic_arr[1]), 1f); - let _e67 = atomicExchange((&storage_struct.atomic_scalar), 1f); - let _e72 = atomicExchange((&storage_struct.atomic_arr[1]), 1f); + let _e43 = atomicExchange((&storage_atomic_scalar), 1f); + let _e47 = atomicExchange((&storage_atomic_arr[1]), 1f); + let _e51 = atomicExchange((&storage_struct.atomic_scalar), 1f); + let _e56 = atomicExchange((&storage_struct.atomic_arr[1]), 1f); return; } diff --git a/naga/tests/snapshots.rs b/naga/tests/snapshots.rs index 01284daf72..ed74bda4c3 100644 --- a/naga/tests/snapshots.rs +++ b/naga/tests/snapshots.rs @@ -774,7 +774,10 @@ fn convert_wgsl() { "atomicOps-int64-min-max", Targets::SPIRV | Targets::METAL | Targets::HLSL | Targets::WGSL, ), - ("atomicOps-flt32", Targets::METAL | Targets::WGSL), + ( + "atomicOps-flt32", + Targets::SPIRV | Targets::METAL | Targets::WGSL, + ), ( "atomicCompareExchange-int64", Targets::SPIRV | Targets::WGSL, diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 1a89aa807a..569aa95c3e 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -109,6 +109,9 @@ pub struct PhysicalDeviceFeatures { /// Features provided by `VK_KHR_shader_atomic_int64`, promoted to Vulkan 1.2. shader_atomic_int64: Option>, + /// Features provided by `VK_EXT_shader_atomic_float`. + shader_atomic_float: Option>, + /// Features provided by `VK_EXT_subgroup_size_control`, promoted to Vulkan 1.3. subgroup_size_control: Option>, } @@ -157,6 +160,9 @@ impl PhysicalDeviceFeatures { if let Some(ref mut feature) = self.shader_atomic_int64 { info = info.push_next(feature); } + if let Some(ref mut feature) = self.shader_atomic_float { + info = info.push_next(feature); + } if let Some(ref mut feature) = self.subgroup_size_control { info = info.push_next(feature); } @@ -440,6 +446,18 @@ impl PhysicalDeviceFeatures { } else { None }, + shader_atomic_float: if device_api_version >= vk::API_VERSION_1_1 + || enabled_extensions.contains(&ext::shader_atomic_float::NAME) + { + let needed = requested_features.contains(wgt::Features::SHADER_FLT32_ATOMIC); + Some( + vk::PhysicalDeviceShaderAtomicFloatFeaturesEXT::default() + .shader_buffer_float32_atomics(needed) + .shader_buffer_float32_atomic_add(needed), + ) + } else { + None + }, subgroup_size_control: if device_api_version >= vk::API_VERSION_1_3 || enabled_extensions.contains(&ext::subgroup_size_control::NAME) { @@ -588,6 +606,14 @@ impl PhysicalDeviceFeatures { ); } + if let Some(ref shader_atomic_float) = self.shader_atomic_float { + features.set( + F::SHADER_FLT32_ATOMIC, + shader_atomic_float.shader_buffer_float32_atomics != 0 + && shader_atomic_float.shader_buffer_float32_atomic_add != 0, + ); + } + //if caps.supports_extension(khr::sampler_mirror_clamp_to_edge::NAME) { //if caps.supports_extension(ext::sampler_filter_minmax::NAME) { features.set( @@ -999,6 +1025,16 @@ impl PhysicalDeviceProperties { extensions.push(khr::shader_atomic_int64::NAME); } + // Require `VK_EXT_shader_atomic_float` if the associated feature was requested + if requested_features.contains(wgt::Features::SHADER_FLT32_ATOMIC) { + extensions.push(ext::shader_atomic_float::NAME); + } + + // Require VK_GOOGLE_display_timing if the associated feature was requested + if requested_features.contains(wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING) { + extensions.push(google::display_timing::NAME); + } + extensions } @@ -1244,6 +1280,12 @@ impl super::InstanceShared { features2 = features2.push_next(next); } + if capabilities.supports_extension(ext::shader_atomic_float::NAME) { + let next = features + .shader_atomic_float + .insert(vk::PhysicalDeviceShaderAtomicFloatFeaturesEXT::default()); + features2 = features2.push_next(next); + } if capabilities.supports_extension(ext::image_robustness::NAME) { let next = features .image_robustness @@ -1735,6 +1777,10 @@ impl super::Adapter { capabilities.push(spv::Capability::Int64Atomics); } + if features.contains(wgt::Features::SHADER_FLT32_ATOMIC) { + capabilities.push(spv::Capability::AtomicFloat32AddEXT); + } + let mut flags = spv::WriterFlags::empty(); flags.set( spv::WriterFlags::DEBUG, diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index dbeabce50a..226bee174a 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -935,10 +935,11 @@ bitflags::bitflags! { /// /// This is a native only feature. const SHADER_INT64_ATOMIC_ALL_OPS = 1 << 61; - /// Allows shaders to use all f32 atomic operations. + /// Allows shaders to use some f32 atomic operations: /// /// Supported platforms: /// - Metal (with MSL 3.0+) + /// - Vulkan (with VK_EXT_shader_atomic_float) /// /// This is a native only feature. const SHADER_FLT32_ATOMIC = 1 << 62;