From 1ef0541ff94f258ecf028db141779f43d3a300d5 Mon Sep 17 00:00:00 2001 From: Xiaochun Tong Date: Sat, 23 Sep 2023 13:10:07 -0400 Subject: [PATCH] fixed leaking kernel... --- luisa_compute/src/runtime.rs | 26 +++++++++++++++++--------- luisa_compute/tests/misc.rs | 24 ++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/luisa_compute/src/runtime.rs b/luisa_compute/src/runtime.rs index 6d1ae63..68ebe8c 100644 --- a/luisa_compute/src/runtime.rs +++ b/luisa_compute/src/runtime.rs @@ -447,12 +447,12 @@ impl Device { ShaderArtifact::Sync(self.inner.create_shader(&module, &shader_options)) }; Kernel { - inner: RawKernel { + inner: Arc::new(RawKernel { device: self.clone(), artifact, module, resource_tracker: k.inner.resource_tracker.clone(), - }, + }), _marker: PhantomData {}, } } @@ -885,7 +885,12 @@ pub struct RawKernel { pub(crate) resource_tracker: ResourceTracker, pub(crate) module: CArc, } - +impl Drop for RawKernel { + fn drop(&mut self) { + let shader = self.unwrap(); + self.device.inner.destroy_shader(shader); + } +} pub struct CallableArgEncoder { pub(crate) args: Vec, } @@ -1088,12 +1093,13 @@ impl RawKernel { } pub fn dispatch_async( - &self, + self: &Arc, args: KernelArgEncoder, dispatch_size: [u32; 3], ) -> Command<'static> { let mut rt = ResourceTracker::new(); rt.add(Arc::new(args.uniform_data)); + rt.add(self.clone()); let args = args.args; let args = Arc::new(args); rt.add(args.clone()); @@ -1109,7 +1115,7 @@ impl RawKernel { callback: None, } } - pub fn dispatch(&self, args: KernelArgEncoder, dispatch_size: [u32; 3]) { + pub fn dispatch(self: &Arc, args: KernelArgEncoder, dispatch_size: [u32; 3]) { submit_default_stream_and_sync(&self.device, vec![self.dispatch_async(args, dispatch_size)]) } } @@ -1221,12 +1227,13 @@ pub struct KernelDef { /// Kernel creation can be done in multiple ways: /// - Seperate recording and compilation: /// ```no_run -//// // Recording: +/// // Recording: /// use luisa_compute::prelude::*; /// let ctx = Context::new(std::env::current_exe().unwrap()); /// let device = ctx.create_device("cpu"); /// let kernel = KernelDef::, Buffer, -/// Buffer)>::new(&device, track!(|a,b,c|{ })); // Compilation: +/// Buffer)>::new(&device, track!(|a,b,c|{ })); +/// // Compilation: /// let kernel = device.compile_kernel(&kernel); /// ``` /// - Recording and compilation in one step: @@ -1235,11 +1242,12 @@ pub struct KernelDef { /// let ctx = Context::new(std::env::current_exe().unwrap()); /// let device = ctx.create_device("cpu"); /// let kernel = Kernel::, Buffer, -/// Buffer)>::new(&device, track!(|a,b,c|{ })); ``` +/// Buffer)>::new(&device, track!(|a,b,c|{ })); +/// ``` /// - Asynchronous compilation use [`Kernel::::new_async`] /// - Custom build options using [`Kernel::::new_with_options`] pub struct Kernel { - pub(crate) inner: RawKernel, + pub(crate) inner: Arc, pub(crate) _marker: PhantomData, } unsafe impl Send for Kernel {} diff --git a/luisa_compute/tests/misc.rs b/luisa_compute/tests/misc.rs index a5d1f5a..ea120a3 100644 --- a/luisa_compute/tests/misc.rs +++ b/luisa_compute/tests/misc.rs @@ -1083,3 +1083,27 @@ fn dyn_callable() { assert_eq!(w_data[i], i as i32 + 1000 * i as i32); } } + +#[test] +fn dispatch_async() { + let device = get_device(); + let x = device.create_buffer::(1024); + x.fill_fn(|i| i as f32); + let kernel = Kernel::::new( + &device, + track!(|| { + for _ in 0..10000000 { + let buf_x = x.var(); + let tid = dispatch_id().x; + let x = buf_x.read(tid); + buf_x.write(tid, x + 1.0); + } + }), + ); + let s = device.default_stream().scope(); + s.submit([ + kernel.dispatch_async([1024, 1, 1]), + kernel.dispatch_async([1024, 1, 1]), + ]); + drop(kernel); +}