From d41dd0c00381fb935e4389927ee4bef0b8aba785 Mon Sep 17 00:00:00 2001 From: Nathaniel Simard Date: Sun, 25 Aug 2024 14:16:19 -0400 Subject: [PATCH] Use simple memory management with wasm (#81) --- crates/cubecl-core/src/codegen/integrator.rs | 5 ++ .../tests/error/for_loop_range.stderr | 2 +- .../src/codegen_function/branch.rs | 4 +- crates/cubecl-wgpu/Cargo.toml | 4 ++ crates/cubecl-wgpu/build.rs | 8 +++ .../cubecl-wgpu/src/compiler/wgsl/shader.rs | 5 ++ crates/cubecl-wgpu/src/runtime.rs | 60 ++++++++++++------- crates/cubecl/Cargo.toml | 1 + 8 files changed, 64 insertions(+), 25 deletions(-) create mode 100644 crates/cubecl-wgpu/build.rs diff --git a/crates/cubecl-core/src/codegen/integrator.rs b/crates/cubecl-core/src/codegen/integrator.rs index c55ee2d3..7bf25c23 100644 --- a/crates/cubecl-core/src/codegen/integrator.rs +++ b/crates/cubecl-core/src/codegen/integrator.rs @@ -495,6 +495,11 @@ impl KernelIntegrator { let output = match self.expansion.outputs.get_mut(mapping.pos_output) { Some(output) => output, None => { + if let Some(binding) = self.input_bindings.get_mut(mapping.pos_input) { + // Update input visibility. + binding.visibility = Visibility::ReadWrite; + } + // The mapping is handled differently, normally by cube itself. return; } diff --git a/crates/cubecl-core/tests/error/for_loop_range.stderr b/crates/cubecl-core/tests/error/for_loop_range.stderr index 947b5817..0a31e86c 100644 --- a/crates/cubecl-core/tests/error/for_loop_range.stderr +++ b/crates/cubecl-core/tests/error/for_loop_range.stderr @@ -1,4 +1,4 @@ -error: Invalid for loop: use [range](cubecl::prelude::range] instead. +error: Invalid for loop: use [range](cubecl::prelude::range] or [range_stepped](cubecl::prelude::range_stepped) instead. --> tests/error/for_loop_range.rs:6:14 | 6 | for _ in 0..10 {} diff --git a/crates/cubecl-macros/src/codegen_function/branch.rs b/crates/cubecl-macros/src/codegen_function/branch.rs index bdadef0b..0305aff5 100644 --- a/crates/cubecl-macros/src/codegen_function/branch.rs +++ b/crates/cubecl-macros/src/codegen_function/branch.rs @@ -15,11 +15,11 @@ use super::{ /// Codegen of for loops /// Supports range: -/// ```norun +/// ```ignore /// for i in range(start, end, unroll) {...} /// ``` /// and range_stepped: -/// ```norun +/// ```ignore /// for i in range_stepped(start, end, step, unroll) {...} /// ``` pub(crate) fn codegen_for_loop( diff --git a/crates/cubecl-wgpu/Cargo.toml b/crates/cubecl-wgpu/Cargo.toml index 88bcbeb7..86d4e5c5 100644 --- a/crates/cubecl-wgpu/Cargo.toml +++ b/crates/cubecl-wgpu/Cargo.toml @@ -17,6 +17,7 @@ default = [ "cubecl-core/default", ] std = ["cubecl-runtime/std", "cubecl-common/std", "cubecl-core/std"] +simple-memory-management = [] [dependencies] cubecl-runtime = { path = "../cubecl-runtime", version = "0.1.1", default-features = false, features = [ @@ -41,3 +42,6 @@ cubecl-core = { path = "../cubecl-core", version = "0.1.1", features = [ cubecl-linalg = { path = "../cubecl-linalg", version = "0.1.1", features = [ "export_tests", ] } + +[build-dependencies] +cfg_aliases = "0.2.1" diff --git a/crates/cubecl-wgpu/build.rs b/crates/cubecl-wgpu/build.rs new file mode 100644 index 00000000..c28f0edd --- /dev/null +++ b/crates/cubecl-wgpu/build.rs @@ -0,0 +1,8 @@ +use cfg_aliases::cfg_aliases; + +fn main() { + // Setup cfg aliases + cfg_aliases! { + simple_memory_management: { any(feature = "simple-memory-management", target_family = "wasm") }, + } +} diff --git a/crates/cubecl-wgpu/src/compiler/wgsl/shader.rs b/crates/cubecl-wgpu/src/compiler/wgsl/shader.rs index a43bd0cb..cf42386d 100644 --- a/crates/cubecl-wgpu/src/compiler/wgsl/shader.rs +++ b/crates/cubecl-wgpu/src/compiler/wgsl/shader.rs @@ -228,7 +228,12 @@ impl Display for Location { impl Display for Visibility { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { + // With the dynamic memory strategy we have to put everything read_write. + #[cfg(not(simple_memory_management))] Visibility::Read => f.write_str("read_write"), + // With the simple memory strategy we can use the correct visibility. + #[cfg(simple_memory_management)] + Visibility::Read => f.write_str("read"), Visibility::ReadWrite => f.write_str("read_write"), } } diff --git a/crates/cubecl-wgpu/src/runtime.rs b/crates/cubecl-wgpu/src/runtime.rs index df935910..450cde12 100644 --- a/crates/cubecl-wgpu/src/runtime.rs +++ b/crates/cubecl-wgpu/src/runtime.rs @@ -5,13 +5,9 @@ use crate::{ }; use alloc::sync::Arc; use cubecl_core::{Feature, FeatureSet, Runtime}; -use cubecl_runtime::{ - channel::MutexComputeChannel, - client::ComputeClient, - memory_management::dynamic::{DynamicMemoryManagement, DynamicMemoryManagementOptions}, - ComputeRuntime, -}; -use wgpu::DeviceDescriptor; +use cubecl_runtime::memory_management; +use cubecl_runtime::{channel::MutexComputeChannel, client::ComputeClient, ComputeRuntime}; +use wgpu::{DeviceDescriptor, Limits}; /// Runtime that uses the [wgpu] crate with the wgsl compiler. This is used in the Wgpu backend. /// For advanced configuration, use [`init_sync`] to pass in runtime options or to select a @@ -23,13 +19,42 @@ pub struct WgpuRuntime; static RUNTIME: ComputeRuntime> = ComputeRuntime::new(); -type Server = WgpuServer>; +type Server = WgpuServer; + +#[cfg(not(simple_memory_management))] +type MemoryManagement = memory_management::dynamic::DynamicMemoryManagement; +#[cfg(simple_memory_management)] +type MemoryManagement = memory_management::simple::SimpleMemoryManagement; + +#[cfg(not(simple_memory_management))] +fn init_memory_management(device: Arc, limits: &Limits) -> MemoryManagement { + let storage = WgpuStorage::new(device.clone()); + + memory_management::dynamic::DynamicMemoryManagement::new( + storage, + memory_management::dynamic::DynamicMemoryManagementOptions::preset( + limits.max_storage_buffer_binding_size as usize, + limits.min_storage_buffer_offset_alignment as usize, + ), + ) +} + +#[cfg(simple_memory_management)] +fn init_memory_management(device: Arc, _limits: &Limits) -> MemoryManagement { + let storage = WgpuStorage::new(device.clone()); + + memory_management::simple::SimpleMemoryManagement::new( + storage, + memory_management::simple::DeallocStrategy::new_period_tick(32), + memory_management::simple::SliceStrategy::Ratio(0.8), + ) +} impl Runtime for WgpuRuntime { type Compiler = wgsl::WgslCompiler; - type Server = WgpuServer>; + type Server = WgpuServer; - type Channel = MutexComputeChannel>>; + type Channel = MutexComputeChannel>; type Device = WgpuDevice; fn client(device: &Self::Device) -> ComputeClient { @@ -112,19 +137,10 @@ fn create_client( device_wgpu: Arc, queue: Arc, options: RuntimeOptions, -) -> ComputeClient< - WgpuServer>, - MutexComputeChannel>>, -> { +) -> ComputeClient, MutexComputeChannel>> +{ let limits = device_wgpu.limits(); - let storage = WgpuStorage::new(device_wgpu.clone()); - let memory_management = DynamicMemoryManagement::new( - storage, - DynamicMemoryManagementOptions::preset( - limits.max_storage_buffer_binding_size as usize, - limits.min_storage_buffer_offset_alignment as usize, - ), - ); + let memory_management = init_memory_management(device_wgpu.clone(), &limits); let server = WgpuServer::new(memory_management, device_wgpu, queue, options.tasks_max); let channel = MutexComputeChannel::new(server); diff --git a/crates/cubecl/Cargo.toml b/crates/cubecl/Cargo.toml index 3bed4202..45b6a0fa 100644 --- a/crates/cubecl/Cargo.toml +++ b/crates/cubecl/Cargo.toml @@ -22,6 +22,7 @@ default = ["std", "linalg", "cubecl-core/default", "cubecl-wgpu?/default", "cube std = ["cubecl-core/std", "cubecl-wgpu?/std", "cubecl-cuda?/std"] template = ["cubecl-core/template"] linalg = ["dep:cubecl-linalg"] +simple-memory-management = ["cubecl-wgpu?/simple-memory-management"] # Runtimes wgpu = ["cubecl-wgpu"]