diff --git a/CHANGELOG.md b/CHANGELOG.md index a0a7089..cc1cd24 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Added + +- Support for separate image samplers (`SamplerState` in HLSL, `sampler` in GLSL) + ### Changed - Updated `egui` to v0.26 diff --git a/Cargo.toml b/Cargo.toml index 26074a1..ad959da 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,8 +41,10 @@ ash-molten = "0.17" anyhow = "1.0" bmfont = { version = "0.3", default-features = false } bytemuck = "1.14" +clap = { version = "4.5", features = ["derive"] } glam = { version = "0.25", features = ["bytemuck"] } half = { version = "2.3", features = ["bytemuck"] } +hassle-rs = "0.11" image = "0.24" inline-spirv = "0.2" log = "0.4" diff --git a/contrib/rel-mgmt/run-all-examples b/contrib/rel-mgmt/run-all-examples index 994a2c3..98afd74 100755 --- a/contrib/rel-mgmt/run-all-examples +++ b/contrib/rel-mgmt/run-all-examples @@ -22,6 +22,9 @@ cargo run --example cpu_readback cargo run --example subgroup_ops cargo run --example bindless cargo run --example image_sampler +cargo run --example image_sampler -- --hlsl +cargo run --example image_sampler -- --separate +cargo run --example image_sampler -- --hlsl --separate cargo run --example vertex_layout cargo run --example font_bmp cargo run --example egui diff --git a/examples/image_sampler.rs b/examples/image_sampler.rs index 2b96778..e31c3a2 100644 --- a/examples/image_sampler.rs +++ b/examples/image_sampler.rs @@ -1,6 +1,8 @@ mod profile_with_puffin; use { + clap::Parser, + hassle_rs::compile_hlsl, inline_spirv::inline_spirv, screen_13::prelude::*, std::{ @@ -16,6 +18,12 @@ use { /// instead use use name suffixes such as _llr or _nne for linear/linear repeat or nearest/nearest /// clamp-to-edge. /// +/// You may run this example program with either --hlsl or --separate arguments as follows: +/// +/// cargo run --example image_sampler -- --hlsl --separate +/// +/// Run with --help for more information. +/// /// See min_max.rs for more advanced image sampler usage. fn main() -> anyhow::Result<()> { pretty_env_logger::init(); @@ -74,6 +82,114 @@ fn create_pipeline( device: &Arc, sampler_info: impl Into, ) -> anyhow::Result> { + let args = Args::parse(); + + let mut frag_shader = match (args.hlsl, args.separate) { + (true, true) => { + // HLSL separate image sampler + Shader::new_fragment( + inline_spirv!( + r#" + struct FullscreenVertexOutput + { + float4 position : SV_Position; + [[vk::location(0)]] float2 uv : TEXCOORD0; + }; + + [[vk::binding(0, 0)]] Texture2D screenTexture : register(t0); + [[vk::binding(1, 0)]] SamplerState textureSampler : register(s0); + + float4 main(FullscreenVertexOutput input) + : SV_Target + { + return screenTexture.Sample(textureSampler, input.uv); + } + "#, + frag, + hlsl + ) + .as_slice(), + ) + } + (true, false) => { + // HLSL combined image sampler: inline_spirv uses shaderc which does not support this, so + // we are using hassle_rs which uses dxc. You must follow the instructions listed here to + // use hassle_rs: + // See: https://github.com/Traverse-Research/hassle-rs + // See: https://github.com/microsoft/DirectXShaderCompiler/wiki/Vulkan-combined-image-sampler-type + // See: https://github.com/google/shaderc/issues/1310 + Shader::new_fragment( + compile_hlsl( + "fragment.hlsl", + r#" + struct FullscreenVertexOutput + { + float4 position : SV_Position; + [[vk::location(0)]] float2 uv : TEXCOORD0; + }; + + [[vk::combinedImageSampler]][[vk::binding(0, 0)]] Texture2D screenTexture : register(t0); + [[vk::combinedImageSampler]][[vk::binding(0, 0)]] SamplerState textureSampler : register(s0); + + float4 main(FullscreenVertexOutput input) + : SV_Target + { + return screenTexture.Sample(textureSampler, input.uv); + } + "#, + "main", "ps_5_0", &["-spirv"], &[], + )? + .as_slice(), + ) + } + (false, true) => { + // GLSL separate image sampler + Shader::new_fragment( + inline_spirv!( + r#" + #version 460 core + + layout(binding = 0) uniform texture2D image; + layout(binding = 1) uniform sampler image_sampler; + layout(location = 0) in vec2 vk_TexCoord; + layout(location = 0) out vec4 vk_Color; + + void main() { + vk_Color = texture(sampler2D(image, image_sampler), vk_TexCoord); + } + "#, + frag + ) + .as_slice(), + ) + } + (false, false) => { + // GLSL combined image sampler + Shader::new_fragment( + inline_spirv!( + r#" + #version 460 core + + layout(binding = 0) uniform sampler2D image; + layout(location = 0) in vec2 vk_TexCoord; + layout(location = 0) out vec4 vk_Color; + + void main() { + vk_Color = texture(image, vk_TexCoord); + } + "#, + frag + ) + .as_slice(), + ) + } + }; + + // Use the builder pattern to specify an image sampler at the combined binding index (0) or + // separate binding index (1). + let sampler_binding = args.separate as u32; + frag_shader = frag_shader.image_sampler(sampler_binding, sampler_info); + Ok(Arc::new(GraphicPipeline::create( device, GraphicPipelineInfo::default(), @@ -100,24 +216,7 @@ fn create_pipeline( ) .as_slice(), ), - Shader::new_fragment( - inline_spirv!( - r#" - #version 460 core - - layout(binding = 0) uniform sampler2D image; - layout(location = 0) in vec2 vk_TexCoord; - layout(location = 0) out vec4 vk_Color; - - void main() { - vk_Color = texture(image, vk_TexCoord); - } - "#, - frag - ) - .as_slice(), - ) - .image_sampler(0, sampler_info), + frag_shader, ], )?)) } @@ -153,3 +252,15 @@ fn read_image(device: &Arc, path: impl AsRef) -> anyhow::Result 0 { + pool_sizes[pool_size_count] = vk::DescriptorPoolSize { + ty: vk::DescriptorType::SAMPLER, + descriptor_count: info.sampler_count, + }; + pool_size_count += 1; + } + if info.storage_buffer_count > 0 { pool_sizes[pool_size_count] = vk::DescriptorPoolSize { ty: vk::DescriptorType::STORAGE_BUFFER, @@ -214,6 +222,7 @@ pub struct DescriptorPoolInfo { pub input_attachment_count: u32, pub max_sets: u32, pub sampled_image_count: u32, + pub sampler_count: u32, pub storage_buffer_count: u32, pub storage_buffer_dynamic_count: u32, pub storage_image_count: u32, @@ -229,6 +238,7 @@ impl DescriptorPoolInfo { + self.combined_image_sampler_count + self.input_attachment_count + self.sampled_image_count + + self.sampler_count + self.storage_buffer_count + self.storage_buffer_dynamic_count + self.storage_image_count diff --git a/src/driver/graphic.rs b/src/driver/graphic.rs index 9c9f603..d91e536 100644 --- a/src/driver/graphic.rs +++ b/src/driver/graphic.rs @@ -5,8 +5,11 @@ use { device::Device, image::SampleCount, merge_push_constant_ranges, - shader::{DescriptorBindingMap, PipelineDescriptorInfo, Shader, SpecializationInfo}, - DriverError, + shader::{ + DescriptorBindingMap, DescriptorInfo, PipelineDescriptorInfo, Shader, + SpecializationInfo, + }, + DescriptorBinding, DriverError, }, ash::vk, derive_builder::{Builder, UninitializedFieldError}, @@ -363,6 +366,7 @@ pub struct GraphicPipeline { pub name: Option, pub(crate) push_constants: Vec, + pub(crate) separate_samplers: Box<[DescriptorBinding]>, pub(crate) shader_modules: Vec, pub(super) state: GraphicPipelineState, } @@ -459,6 +463,13 @@ impl GraphicPipeline { } } + let separate_samplers = descriptor_bindings + .iter() + .filter_map(|(&descriptor_binding, (descriptor_info, _))| { + matches!(descriptor_info, DescriptorInfo::Sampler(..)).then_some(descriptor_binding) + }) + .collect(); + let descriptor_info = PipelineDescriptorInfo::create(&device, &descriptor_bindings)?; let descriptor_sets_layouts = descriptor_info .layouts @@ -560,6 +571,7 @@ impl GraphicPipeline { layout, name: None, push_constants, + separate_samplers, shader_modules, state: GraphicPipelineState { layout, diff --git a/src/driver/shader.rs b/src/driver/shader.rs index f1f2408..dc99f20 100644 --- a/src/driver/shader.rs +++ b/src/driver/shader.rs @@ -105,7 +105,7 @@ pub(crate) enum DescriptorInfo { CombinedImageSampler(u32, Sampler, bool), //count, sampler, is-manually-defined? InputAttachment(u32, u32), //count, input index, SampledImage(u32), - Sampler(u32), + Sampler(u32, Sampler, bool), //count, sampler, is-manually-defined? StorageBuffer(u32), StorageImage(u32), StorageTexelBuffer(u32), @@ -120,7 +120,7 @@ impl DescriptorInfo { Self::CombinedImageSampler(binding_count, ..) => binding_count, Self::InputAttachment(binding_count, _) => binding_count, Self::SampledImage(binding_count) => binding_count, - Self::Sampler(binding_count) => binding_count, + Self::Sampler(binding_count, ..) => binding_count, Self::StorageBuffer(binding_count) => binding_count, Self::StorageImage(binding_count) => binding_count, Self::StorageTexelBuffer(binding_count) => binding_count, @@ -135,7 +135,7 @@ impl DescriptorInfo { Self::CombinedImageSampler(..) => vk::DescriptorType::COMBINED_IMAGE_SAMPLER, Self::InputAttachment(..) => vk::DescriptorType::INPUT_ATTACHMENT, Self::SampledImage(_) => vk::DescriptorType::SAMPLED_IMAGE, - Self::Sampler(_) => vk::DescriptorType::SAMPLER, + Self::Sampler(..) => vk::DescriptorType::SAMPLER, Self::StorageBuffer(_) => vk::DescriptorType::STORAGE_BUFFER, Self::StorageImage(_) => vk::DescriptorType::STORAGE_IMAGE, Self::StorageTexelBuffer(_) => vk::DescriptorType::STORAGE_TEXEL_BUFFER, @@ -146,7 +146,9 @@ impl DescriptorInfo { pub fn sampler(&self) -> Option<&Sampler> { match self { - Self::CombinedImageSampler(_, sampler, _) => Some(sampler), + Self::CombinedImageSampler(_, sampler, _) | Self::Sampler(_, sampler, _) => { + Some(sampler) + } _ => None, } } @@ -157,7 +159,7 @@ impl DescriptorInfo { Self::CombinedImageSampler(binding_count, ..) => binding_count, Self::InputAttachment(binding_count, _) => binding_count, Self::SampledImage(binding_count) => binding_count, - Self::Sampler(binding_count) => binding_count, + Self::Sampler(binding_count, ..) => binding_count, Self::StorageBuffer(binding_count) => binding_count, Self::StorageImage(binding_count) => binding_count, Self::StorageTexelBuffer(binding_count) => binding_count, @@ -856,15 +858,20 @@ impl Shader { desc_ty, nbind, .. - } => Some((name, desc_bind, desc_ty, *nbind)), + } => Some(( + name, + DescriptorBinding(desc_bind.set(), desc_bind.bind()), + desc_ty, + *nbind, + )), _ => None, }) { trace!( "binding {}: {}.{} = {:?}[{}]", name.as_deref().unwrap_or_default(), - binding.set(), - binding.bind(), + binding.0, + binding.1, *desc_ty, binding_count ); @@ -874,17 +881,8 @@ impl Shader { DescriptorInfo::AccelerationStructure(binding_count) } DescriptorType::CombinedImageSampler() => { - let (sampler_info, is_manually_defined) = self - .image_samplers - .get(&DescriptorBinding(binding.set(), binding.bind())) - .copied() - .map(|sampler_info| (sampler_info, true)) - .unwrap_or_else(|| { - ( - guess_immutable_sampler(name.as_deref().unwrap_or_default()), - false, - ) - }); + let (sampler_info, is_manually_defined) = + self.image_sampler(binding, name.as_deref().unwrap_or_default()); DescriptorInfo::CombinedImageSampler( binding_count, @@ -896,7 +894,16 @@ impl Shader { DescriptorInfo::InputAttachment(binding_count, *attachment) } DescriptorType::SampledImage() => DescriptorInfo::SampledImage(binding_count), - DescriptorType::Sampler() => DescriptorInfo::Sampler(binding_count), + DescriptorType::Sampler() => { + let (sampler_info, is_manually_defined) = + self.image_sampler(binding, name.as_deref().unwrap_or_default()); + + DescriptorInfo::Sampler( + binding_count, + Sampler::create(device, sampler_info)?, + is_manually_defined, + ) + } DescriptorType::StorageBuffer(_access_ty) => { DescriptorInfo::StorageBuffer(binding_count) } @@ -911,15 +918,20 @@ impl Shader { DescriptorInfo::UniformTexelBuffer(binding_count) } }; - res.insert( - DescriptorBinding(binding.set(), binding.bind()), - (descriptor_info, self.stage), - ); + res.insert(binding, (descriptor_info, self.stage)); } Ok(res) } + fn image_sampler(&self, binding: DescriptorBinding, name: &str) -> (SamplerInfo, bool) { + self.image_samplers + .get(&binding) + .copied() + .map(|sampler_info| (sampler_info, true)) + .unwrap_or_else(|| (guess_immutable_sampler(name), false)) + } + #[profiling::function] pub(super) fn merge_descriptor_bindings( descriptor_bindings: impl IntoIterator, @@ -970,8 +982,16 @@ impl Shader { return false; } } - DescriptorInfo::Sampler(lhs) => { - if let DescriptorInfo::Sampler(rhs) = rhs { + DescriptorInfo::Sampler(lhs, lhs_sampler, lhs_is_manually_defined) => { + if let DescriptorInfo::Sampler(rhs, rhs_sampler, rhs_is_manually_defined) = rhs + { + // Allow one of the samplers to be manually defined (only one!) + if *lhs_is_manually_defined && rhs_is_manually_defined { + return false; + } else if rhs_is_manually_defined { + *lhs_sampler = rhs_sampler; + } + (lhs, rhs) } else { return false; diff --git a/src/graph/resolver.rs b/src/graph/resolver.rs index 731ef12..69c5647 100644 --- a/src/graph/resolver.rs +++ b/src/graph/resolver.rs @@ -770,10 +770,10 @@ impl Resolver { // Find the total count of descriptors per type (there may be multiple pipelines!) for pool_sizes in pass.descriptor_pools_sizes() { for pool_size in pool_sizes.values() { - for (descriptor_ty, descriptor_count) in pool_size { - debug_assert_ne!(*descriptor_count, 0); + for (&descriptor_ty, &descriptor_count) in pool_size { + debug_assert_ne!(descriptor_count, 0); - match *descriptor_ty { + match descriptor_ty { vk::DescriptorType::ACCELERATION_STRUCTURE_KHR => { info.acceleration_structure_count += descriptor_count; } @@ -786,6 +786,9 @@ impl Resolver { vk::DescriptorType::SAMPLED_IMAGE => { info.sampled_image_count += descriptor_count; } + vk::DescriptorType::SAMPLER => { + info.sampler_count += descriptor_count; + } vk::DescriptorType::STORAGE_BUFFER => { info.storage_buffer_count += descriptor_count; } @@ -807,7 +810,7 @@ impl Resolver { vk::DescriptorType::UNIFORM_TEXEL_BUFFER => { info.uniform_texel_buffer_count += descriptor_count; } - _ => unimplemented!(), + _ => unimplemented!("{descriptor_ty:?}"), }; } } @@ -824,6 +827,7 @@ impl Resolver { info.combined_image_sampler_count = align_up(info.combined_image_sampler_count, ATOM); info.input_attachment_count = align_up(info.input_attachment_count, ATOM); info.sampled_image_count = align_up(info.sampled_image_count, ATOM); + info.sampler_count = align_up(info.sampler_count, ATOM); info.storage_buffer_count = align_up(info.storage_buffer_count, ATOM); info.storage_buffer_dynamic_count = align_up(info.storage_buffer_dynamic_count, ATOM); info.storage_image_count = align_up(info.storage_image_count, ATOM); @@ -2845,7 +2849,7 @@ impl Resolver { let sampler = descriptor_info.sampler().map(|sampler| **sampler).unwrap_or_default(); let image_view = Image::view(image, image_view_info)?; let image_layout = match descriptor_type { - vk::DescriptorType::COMBINED_IMAGE_SAMPLER => { + vk::DescriptorType::COMBINED_IMAGE_SAMPLER | vk::DescriptorType::SAMPLED_IMAGE => { if image_view_info.aspect_mask.contains( vk::ImageAspectFlags::DEPTH | vk::ImageAspectFlags::STENCIL, ) { @@ -2865,7 +2869,7 @@ impl Resolver { } } vk::DescriptorType::STORAGE_IMAGE => vk::ImageLayout::GENERAL, - _ => unimplemented!(), + _ => unimplemented!("{descriptor_type:?}"), }; if binding_offset == 0 { @@ -2936,70 +2940,90 @@ impl Resolver { } } - // Write graphic render pass input attachments (they're automatic) - if exec_idx > 0 && pipeline.is_graphic() { - let pipeline = pipeline.unwrap_graphic(); - for (&DescriptorBinding(descriptor_set_idx, dst_binding), (descriptor_info, _)) in - &pipeline.descriptor_bindings - { - if let DescriptorInfo::InputAttachment(_, attachment_idx) = *descriptor_info { - let is_random_access = exec.color_stores.contains_key(&attachment_idx) - || exec.color_resolves.contains_key(&attachment_idx); - let (attachment, write_exec) = pass.execs[0..exec_idx] - .iter() - .rev() - .find_map(|exec| { - exec.color_stores.get(&attachment_idx).copied() - .map(|attachment| { - (attachment, exec) - }) - .or_else(|| { - exec.color_resolves.get(&attachment_idx) - .map( - |(resolved_attachment, _)| { - (*resolved_attachment, exec) - }, - ) - }) - }) - .expect("input attachment not written"); - let [_, late] = &write_exec.accesses[&attachment.target]; - let image_subresource = late.subresource.as_ref().unwrap().unwrap_image(); - let image_binding = &bindings[attachment.target]; - let image = image_binding.as_driver_image().unwrap(); - let image_view_info = ImageViewInfo { - array_layer_count: image_subresource.array_layer_count, - aspect_mask: attachment.aspect_mask, - base_array_layer: image_subresource.base_array_layer, - base_mip_level: image_subresource.base_mip_level, - fmt: attachment.format, - mip_level_count: image_subresource.mip_level_count, - ty: image.info.ty, - }; - let image_view = Image::view(image, image_view_info)?; - let sampler = descriptor_info.sampler().map(|sampler| **sampler).unwrap_or_else(vk::Sampler::null); + if let ExecutionPipeline::Graphic(pipeline) = pipeline { + for descriptor_binding @ DescriptorBinding(descriptor_set_idx, dst_binding) in pipeline.separate_samplers.iter().copied() { + tls.image_writes.push(IndexWrite { + idx: tls.image_infos.len(), + write: vk::WriteDescriptorSet { + dst_set: *descriptor_sets[descriptor_set_idx as usize], + dst_binding, + descriptor_type: vk::DescriptorType::SAMPLER, + descriptor_count: 1, + ..Default::default() + }, + } + ); + tls.image_infos.push(vk::DescriptorImageInfo { + image_layout: Default::default(), + image_view: Default::default(), + sampler: **pipeline.descriptor_bindings[&descriptor_binding].0.sampler().unwrap(), + }); + } - tls.image_writes.push(IndexWrite { - idx: tls.image_infos.len(), - write: vk::WriteDescriptorSet { - dst_set: *descriptor_sets[descriptor_set_idx as usize], - dst_binding, - descriptor_type: vk::DescriptorType::INPUT_ATTACHMENT, - descriptor_count: 1, - ..Default::default() - }, - } - ); + // Write graphic render pass input attachments (they're automatic) + if exec_idx > 0 { + for (&DescriptorBinding(descriptor_set_idx, dst_binding), (descriptor_info, _)) in + &pipeline.descriptor_bindings + { + if let DescriptorInfo::InputAttachment(_, attachment_idx) = *descriptor_info { + let is_random_access = exec.color_stores.contains_key(&attachment_idx) + || exec.color_resolves.contains_key(&attachment_idx); + let (attachment, write_exec) = pass.execs[0..exec_idx] + .iter() + .rev() + .find_map(|exec| { + exec.color_stores.get(&attachment_idx).copied() + .map(|attachment| { + (attachment, exec) + }) + .or_else(|| { + exec.color_resolves.get(&attachment_idx) + .map( + |(resolved_attachment, _)| { + (*resolved_attachment, exec) + }, + ) + }) + }) + .expect("input attachment not written"); + let [_, late] = &write_exec.accesses[&attachment.target]; + let image_subresource = late.subresource.as_ref().unwrap().unwrap_image(); + let image_binding = &bindings[attachment.target]; + let image = image_binding.as_driver_image().unwrap(); + let image_view_info = ImageViewInfo { + array_layer_count: image_subresource.array_layer_count, + aspect_mask: attachment.aspect_mask, + base_array_layer: image_subresource.base_array_layer, + base_mip_level: image_subresource.base_mip_level, + fmt: attachment.format, + mip_level_count: image_subresource.mip_level_count, + ty: image.info.ty, + }; + let image_view = Image::view(image, image_view_info)?; + let sampler = descriptor_info.sampler().map(|sampler| **sampler).unwrap_or_else(vk::Sampler::null); + + tls.image_writes.push(IndexWrite { + idx: tls.image_infos.len(), + write: vk::WriteDescriptorSet { + dst_set: *descriptor_sets[descriptor_set_idx as usize], + dst_binding, + descriptor_type: vk::DescriptorType::INPUT_ATTACHMENT, + descriptor_count: 1, + ..Default::default() + }, + } + ); - tls.image_infos.push(vk::DescriptorImageInfo { - image_layout: Self::attachment_layout( - attachment.aspect_mask, - is_random_access, - true, - ), - image_view, - sampler, - }); + tls.image_infos.push(vk::DescriptorImageInfo { + image_layout: Self::attachment_layout( + attachment.aspect_mask, + is_random_access, + true, + ), + image_view, + sampler, + }); + } } } } diff --git a/src/pool/fifo.rs b/src/pool/fifo.rs index 78e9bbe..9fff6bb 100644 --- a/src/pool/fifo.rs +++ b/src/pool/fifo.rs @@ -225,6 +225,7 @@ impl Pool for FifoPool { && item.info.combined_image_sampler_count >= info.combined_image_sampler_count && item.info.input_attachment_count >= info.input_attachment_count && item.info.sampled_image_count >= info.sampled_image_count + && item.info.sampler_count >= info.sampler_count && item.info.storage_buffer_count >= info.storage_buffer_count && item.info.storage_buffer_dynamic_count >= info.storage_buffer_dynamic_count && item.info.storage_image_count >= info.storage_image_count diff --git a/src/pool/lazy.rs b/src/pool/lazy.rs index fdf09d1..0d1dccb 100644 --- a/src/pool/lazy.rs +++ b/src/pool/lazy.rs @@ -285,6 +285,7 @@ impl Pool for LazyPool { && item.info.combined_image_sampler_count >= info.combined_image_sampler_count && item.info.input_attachment_count >= info.input_attachment_count && item.info.sampled_image_count >= info.sampled_image_count + && item.info.sampler_count >= info.sampled_image_count && item.info.storage_buffer_count >= info.storage_buffer_count && item.info.storage_buffer_dynamic_count >= info.storage_buffer_dynamic_count && item.info.storage_image_count >= info.storage_image_count