From 6bf281ef12b6d69292dbe792f4973c7d620ca793 Mon Sep 17 00:00:00 2001 From: iaomw Date: Fri, 8 Dec 2023 19:39:44 +0800 Subject: [PATCH 1/4] Optix 7.7 --- ...ix_7_device_impl.h => optix_device_impl.h} | 123 +- ...eption.h => optix_device_impl_exception.h} | 10 +- ....h => optix_device_impl_transformations.h} | 10 +- .../include/internal/optix_micromap_impl.h | 93 +- zenovis/xinxinoptix/include/optix.h | 8 +- zenovis/xinxinoptix/include/optix_7_device.h | 1009 ------- zenovis/xinxinoptix/include/optix_7_host.h | 993 ------- zenovis/xinxinoptix/include/optix_7_types.h | 2250 --------------- .../include/optix_denoiser_tiling.h | 29 +- zenovis/xinxinoptix/include/optix_device.h | 1090 +++++++- .../include/optix_function_table.h | 73 +- .../include/optix_function_table_definition.h | 10 +- zenovis/xinxinoptix/include/optix_host.h | 1032 ++++++- zenovis/xinxinoptix/include/optix_micromap.h | 32 +- .../xinxinoptix/include/optix_stack_size.h | 30 +- zenovis/xinxinoptix/include/optix_stubs.h | 76 +- zenovis/xinxinoptix/include/optix_types.h | 2470 ++++++++++++++++- 17 files changed, 4848 insertions(+), 4490 deletions(-) rename zenovis/xinxinoptix/include/internal/{optix_7_device_impl.h => optix_device_impl.h} (91%) rename zenovis/xinxinoptix/include/internal/{optix_7_device_impl_exception.h => optix_device_impl_exception.h} (97%) rename zenovis/xinxinoptix/include/internal/{optix_7_device_impl_transformations.h => optix_device_impl_transformations.h} (98%) delete mode 100644 zenovis/xinxinoptix/include/optix_7_device.h delete mode 100644 zenovis/xinxinoptix/include/optix_7_host.h delete mode 100644 zenovis/xinxinoptix/include/optix_7_types.h diff --git a/zenovis/xinxinoptix/include/internal/optix_7_device_impl.h b/zenovis/xinxinoptix/include/internal/optix_device_impl.h similarity index 91% rename from zenovis/xinxinoptix/include/internal/optix_7_device_impl.h rename to zenovis/xinxinoptix/include/internal/optix_device_impl.h index 5e803bb330..13ad007ba2 100644 --- a/zenovis/xinxinoptix/include/internal/optix_7_device_impl.h +++ b/zenovis/xinxinoptix/include/internal/optix_device_impl.h @@ -19,7 +19,7 @@ */ /** -* @file optix_7_device_impl.h +* @file optix_device_impl.h * @author NVIDIA Corporation * @brief OptiX public API * @@ -27,14 +27,14 @@ */ #if !defined( __OPTIX_INCLUDE_INTERNAL_HEADERS__ ) -#error("optix_7_device_impl.h is an internal header file and must not be used directly. Please use optix_device.h or optix.h instead.") +#error("optix_device_impl.h is an internal header file and must not be used directly. Please use optix_device.h or optix.h instead.") #endif -#ifndef __optix_optix_7_device_impl_h__ -#define __optix_optix_7_device_impl_h__ +#ifndef OPTIX_OPTIX_DEVICE_IMPL_H +#define OPTIX_OPTIX_DEVICE_IMPL_H -#include "internal/optix_7_device_impl_exception.h" -#include "internal/optix_7_device_impl_transformations.h" +#include "internal/optix_device_impl_exception.h" +#include "internal/optix_device_impl_transformations.h" #ifndef __CUDACC_RTC__ #include @@ -69,8 +69,9 @@ static __forceinline__ __device__ void optixTrace( OptixTraversableHandle handle "All payload parameters need to be unsigned int." ); #endif - float ox = rayOrigin.x, oy = rayOrigin.y, oz = rayOrigin.z; - float dx = rayDirection.x, dy = rayDirection.y, dz = rayDirection.z; + OptixPayloadTypeID type = OPTIX_PAYLOAD_TYPE_DEFAULT; + float ox = rayOrigin.x, oy = rayOrigin.y, oz = rayOrigin.z; + float dx = rayDirection.x, dy = rayDirection.y, dz = rayDirection.z; unsigned int p[33] = { 0, payload... }; int payloadSize = (int)sizeof...( Payload ); asm volatile( @@ -85,7 +86,7 @@ static __forceinline__ __device__ void optixTrace( OptixTraversableHandle handle "=r"( p[15] ), "=r"( p[16] ), "=r"( p[17] ), "=r"( p[18] ), "=r"( p[19] ), "=r"( p[20] ), "=r"( p[21] ), "=r"( p[22] ), "=r"( p[23] ), "=r"( p[24] ), "=r"( p[25] ), "=r"( p[26] ), "=r"( p[27] ), "=r"( p[28] ), "=r"( p[29] ), "=r"( p[30] ), "=r"( p[31] ), "=r"( p[32] ) - : "r"( 0 ), "l"( handle ), "f"( ox ), "f"( oy ), "f"( oz ), "f"( dx ), "f"( dy ), "f"( dz ), "f"( tmin ), + : "r"( type ), "l"( handle ), "f"( ox ), "f"( oy ), "f"( oz ), "f"( dx ), "f"( dy ), "f"( dz ), "f"( tmin ), "f"( tmax ), "f"( rayTime ), "r"( visibilityMask ), "r"( rayFlags ), "r"( SBToffset ), "r"( SBTstride ), "r"( missSBTIndex ), "r"( payloadSize ), "r"( p[1] ), "r"( p[2] ), "r"( p[3] ), "r"( p[4] ), "r"( p[5] ), "r"( p[6] ), "r"( p[7] ), "r"( p[8] ), "r"( p[9] ), "r"( p[10] ), "r"( p[11] ), "r"( p[12] ), "r"( p[13] ), @@ -94,9 +95,10 @@ static __forceinline__ __device__ void optixTrace( OptixTraversableHandle handle "r"( p[28] ), "r"( p[29] ), "r"( p[30] ), "r"( p[31] ), "r"( p[32] ) : ); unsigned int index = 1; - (void)std::initializer_list{ index, ( payload = p[index++] )... }; + (void)std::initializer_list{index, ( payload = p[index++] )...}; } + template static __forceinline__ __device__ void optixTrace( OptixPayloadTypeID type, OptixTraversableHandle handle, @@ -116,12 +118,14 @@ static __forceinline__ __device__ void optixTrace( OptixPayloadTypeID type, // TypePack 1 unsigned int T0 T1 T2 ... Tn-1 Tn // TypePack 2 T0 T1 T2 T3 ... Tn unsigned int static_assert( sizeof...( Payload ) <= 32, "Only up to 32 payload values are allowed." ); +#ifndef __CUDACC_RTC__ static_assert( std::is_same, optix_internal::TypePack>::value, "All payload parameters need to be unsigned int." ); +#endif float ox = rayOrigin.x, oy = rayOrigin.y, oz = rayOrigin.z; float dx = rayDirection.x, dy = rayDirection.y, dz = rayDirection.z; - unsigned int p[33] = { 0, payload... }; + unsigned int p[33] = {0, payload...}; int payloadSize = (int)sizeof...( Payload ); asm volatile( @@ -145,9 +149,10 @@ static __forceinline__ __device__ void optixTrace( OptixPayloadTypeID type, "r"( p[28] ), "r"( p[29] ), "r"( p[30] ), "r"( p[31] ), "r"( p[32] ) : ); unsigned int index = 1; - (void)std::initializer_list{ index, ( payload = p[index++] )... }; + (void)std::initializer_list{index, ( payload = p[index++] )...}; } + static __forceinline__ __device__ void optixSetPayload_0( unsigned int p ) { asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 0 ), "r"( p ) : ); @@ -639,6 +644,21 @@ static __forceinline__ __device__ void optixGetTriangleVertexData( OptixTraversa : ); } +static __forceinline__ __device__ void optixGetMicroTriangleVertexData( float3 data[3] ) +{ + asm( "call (%0, %1, %2, %3, %4, %5, %6, %7, %8), _optix_get_microtriangle_vertex_data, " + "();" + : "=f"( data[0].x ), "=f"( data[0].y ), "=f"( data[0].z ), "=f"( data[1].x ), "=f"( data[1].y ), + "=f"( data[1].z ), "=f"( data[2].x ), "=f"( data[2].y ), "=f"( data[2].z ) + : ); +} +static __forceinline__ __device__ void optixGetMicroTriangleBarycentricsData( float2 data[3] ) +{ + asm( "call (%0, %1, %2, %3, %4, %5), _optix_get_microtriangle_barycentrics_data, " + "();" + : "=f"( data[0].x ), "=f"( data[0].y ), "=f"( data[1].x ), "=f"( data[1].y ), "=f"( data[2].x ), "=f"( data[2].y ) + : ); +} static __forceinline__ __device__ void optixGetLinearCurveVertexData( OptixTraversableHandle gas, unsigned int primIdx, @@ -662,7 +682,7 @@ static __forceinline__ __device__ void optixGetQuadraticBSplineVertexData( Optix { asm( "call (%0, %1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11), _optix_get_quadratic_bspline_vertex_data, " "(%12, %13, %14, %15);" - : "=f"( data[0].x ), "=f"( data[0].y ), "=f"( data[0].z ), "=f"( data[0].w ), + : "=f"( data[0].x ), "=f"( data[0].y ), "=f"( data[0].z ), "=f"( data[0].w ), "=f"( data[1].x ), "=f"( data[1].y ), "=f"( data[1].z ), "=f"( data[1].w ), "=f"( data[2].x ), "=f"( data[2].y ), "=f"( data[2].z ), "=f"( data[2].w ) : "l"( gas ), "r"( primIdx ), "r"( sbtGASIndex ), "f"( time ) @@ -678,7 +698,7 @@ static __forceinline__ __device__ void optixGetCubicBSplineVertexData( OptixTrav asm( "call (%0, %1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12, %13, %14, %15), " "_optix_get_cubic_bspline_vertex_data, " "(%16, %17, %18, %19);" - : "=f"( data[0].x ), "=f"( data[0].y ), "=f"( data[0].z ), "=f"( data[0].w ), + : "=f"( data[0].x ), "=f"( data[0].y ), "=f"( data[0].z ), "=f"( data[0].w ), "=f"( data[1].x ), "=f"( data[1].y ), "=f"( data[1].z ), "=f"( data[1].w ), "=f"( data[2].x ), "=f"( data[2].y ), "=f"( data[2].z ), "=f"( data[2].w ), "=f"( data[3].x ), "=f"( data[3].y ), "=f"( data[3].z ), "=f"( data[3].w ) @@ -702,6 +722,52 @@ static __forceinline__ __device__ void optixGetCatmullRomVertexData( OptixTraver : ); } +static __forceinline__ __device__ void optixGetCubicBezierVertexData( OptixTraversableHandle gas, + unsigned int primIdx, + unsigned int sbtGASIndex, + float time, + float4 data[4] ) +{ + asm( "call (%0, %1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12, %13, %14, %15), " + "_optix_get_cubic_bezier_vertex_data, " + "(%16, %17, %18, %19);" + : "=f"( data[0].x ), "=f"( data[0].y ), "=f"( data[0].z ), "=f"( data[0].w ), "=f"( data[1].x ), + "=f"( data[1].y ), "=f"( data[1].z ), "=f"( data[1].w ), "=f"( data[2].x ), "=f"( data[2].y ), + "=f"( data[2].z ), "=f"( data[2].w ), "=f"( data[3].x ), "=f"( data[3].y ), "=f"( data[3].z ), "=f"( data[3].w ) + : "l"( gas ), "r"( primIdx ), "r"( sbtGASIndex ), "f"( time ) + : ); +} + +static __forceinline__ __device__ void optixGetRibbonVertexData( OptixTraversableHandle gas, + unsigned int primIdx, + unsigned int sbtGASIndex, + float time, + float4 data[3] ) +{ + asm( "call (%0, %1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11), _optix_get_ribbon_vertex_data, " + "(%12, %13, %14, %15);" + : "=f"( data[0].x ), "=f"( data[0].y ), "=f"( data[0].z ), "=f"( data[0].w ), "=f"( data[1].x ), "=f"( data[1].y ), + "=f"( data[1].z ), "=f"( data[1].w ), "=f"( data[2].x ), "=f"( data[2].y ), "=f"( data[2].z ), "=f"( data[2].w ) + : "l"( gas ), "r"( primIdx ), "r"( sbtGASIndex ), "f"( time ) + : ); +} + +static __forceinline__ __device__ float3 optixGetRibbonNormal( OptixTraversableHandle gas, + unsigned int primIdx, + unsigned int sbtGASIndex, + float time, + float2 ribbonParameters ) +{ + float3 normal; + asm( "call (%0, %1, %2), _optix_get_ribbon_normal, " + "(%3, %4, %5, %6, %7, %8);" + : "=f"( normal.x ), "=f"( normal.y ), "=f"( normal.z ) + : "l"( gas ), "r"( primIdx ), "r"( sbtGASIndex ), "f"( time ), + "f"( ribbonParameters.x ), "f"( ribbonParameters.y ) + : ); + return normal; +} + static __forceinline__ __device__ void optixGetSphereData( OptixTraversableHandle gas, unsigned int primIdx, unsigned int sbtGASIndex, @@ -1229,9 +1295,33 @@ static __forceinline__ __device__ bool optixIsTriangleBackFaceHit() return optixGetHitKind() == OPTIX_HIT_KIND_TRIANGLE_BACK_FACE; } +static __forceinline__ __device__ bool optixIsDisplacedMicromeshTriangleHit() +{ + return optixGetPrimitiveType( optixGetHitKind() ) == OPTIX_PRIMITIVE_TYPE_DISPLACED_MICROMESH_TRIANGLE; +} + +static __forceinline__ __device__ bool optixIsDisplacedMicromeshTriangleFrontFaceHit() +{ + return optixIsDisplacedMicromeshTriangleHit() && optixIsFrontFaceHit(); +} + +static __forceinline__ __device__ bool optixIsDisplacedMicromeshTriangleBackFaceHit() +{ + return optixIsDisplacedMicromeshTriangleHit() && optixIsBackFaceHit(); +} + static __forceinline__ __device__ float optixGetCurveParameter() { - return __int_as_float( optixGetAttribute_0() ); + float f0; + asm( "call (%0), _optix_get_curve_parameter, ();" : "=f"(f0) : ); + return f0; +} + +static __forceinline__ __device__ float2 optixGetRibbonParameters() +{ + float f0, f1; + asm( "call (%0, %1), _optix_get_ribbon_parameters, ();" : "=f"( f0 ), "=f"( f1 ) : ); + return make_float2( f0, f1 ); } static __forceinline__ __device__ float2 optixGetTriangleBarycentrics() @@ -1472,7 +1562,6 @@ static __forceinline__ __device__ ReturnT optixContinuationCall( unsigned int sb funcT call = ( funcT )( func ); return call( args... ); } -#endif static __forceinline__ __device__ uint4 optixTexFootprint2D( unsigned long long tex, unsigned int texInfo, float x, float y, unsigned int* singleMipLevel ) { @@ -1533,3 +1622,5 @@ optixTexFootprint2DLod( unsigned long long tex, unsigned int texInfo, float x, f : ); return result; } + +#endif // OPTIX_OPTIX_DEVICE_IMPL_H diff --git a/zenovis/xinxinoptix/include/internal/optix_7_device_impl_exception.h b/zenovis/xinxinoptix/include/internal/optix_device_impl_exception.h similarity index 97% rename from zenovis/xinxinoptix/include/internal/optix_7_device_impl_exception.h rename to zenovis/xinxinoptix/include/internal/optix_device_impl_exception.h index c398ef5fe5..33c585f7b2 100644 --- a/zenovis/xinxinoptix/include/internal/optix_7_device_impl_exception.h +++ b/zenovis/xinxinoptix/include/internal/optix_device_impl_exception.h @@ -19,7 +19,7 @@ */ /** -* @file optix_7_device_impl_exception.h +* @file optix_device_impl_exception.h * @author NVIDIA Corporation * @brief OptiX public API * @@ -27,11 +27,11 @@ */ #if !defined( __OPTIX_INCLUDE_INTERNAL_HEADERS__ ) -#error("optix_7_device_impl_exception.h is an internal header file and must not be used directly. Please use optix_device.h or optix.h instead.") +#error("optix_device_impl_exception.h is an internal header file and must not be used directly. Please use optix_device.h or optix.h instead.") #endif -#ifndef __optix_optix_7_device_impl_exception_h__ -#define __optix_optix_7_device_impl_exception_h__ +#ifndef OPTIX_OPTIX_DEVICE_IMPL_EXCEPTION_H +#define OPTIX_OPTIX_DEVICE_IMPL_EXCEPTION_H #if !defined(__CUDACC_RTC__) #include /* for printf */ @@ -292,4 +292,4 @@ namespace optix_impl { } // namespace optix_impl -#endif +#endif // OPTIX_OPTIX_DEVICE_IMPL_EXCEPTION_H diff --git a/zenovis/xinxinoptix/include/internal/optix_7_device_impl_transformations.h b/zenovis/xinxinoptix/include/internal/optix_device_impl_transformations.h similarity index 98% rename from zenovis/xinxinoptix/include/internal/optix_7_device_impl_transformations.h rename to zenovis/xinxinoptix/include/internal/optix_device_impl_transformations.h index c2cc69add5..a6601f1bf2 100644 --- a/zenovis/xinxinoptix/include/internal/optix_7_device_impl_transformations.h +++ b/zenovis/xinxinoptix/include/internal/optix_device_impl_transformations.h @@ -19,7 +19,7 @@ */ /** -* @file optix_7_device_impl_transformations.h +* @file optix_device_impl_transformations.h * @author NVIDIA Corporation * @brief OptiX public API * @@ -27,11 +27,11 @@ */ #if !defined( __OPTIX_INCLUDE_INTERNAL_HEADERS__ ) -#error("optix_7_device_impl_transformations.h is an internal header file and must not be used directly. Please use optix_device.h or optix.h instead.") +#error("optix_device_impl_transformations.h is an internal header file and must not be used directly. Please use optix_device.h or optix.h instead.") #endif -#ifndef __optix_optix_7_device_impl_transformations_h__ -#define __optix_optix_7_device_impl_transformations_h__ +#ifndef OPTIX_OPTIX_DEVICE_IMPL_TRANSFORMATIONS_H +#define OPTIX_OPTIX_DEVICE_IMPL_TRANSFORMATIONS_H namespace optix_impl { @@ -421,4 +421,4 @@ static __forceinline__ __device__ float3 optixTransformNormal( const float4& m0, } // namespace optix_impl -#endif +#endif // OPTIX_OPTIX_DEVICE_IMPL_TRANSFORMATIONS_H diff --git a/zenovis/xinxinoptix/include/internal/optix_micromap_impl.h b/zenovis/xinxinoptix/include/internal/optix_micromap_impl.h index 9de2ad0ee5..bff72be312 100644 --- a/zenovis/xinxinoptix/include/internal/optix_micromap_impl.h +++ b/zenovis/xinxinoptix/include/internal/optix_micromap_impl.h @@ -32,18 +32,18 @@ * @brief OptiX micromap helper functions */ -#ifndef __optix_optix_micromap_impl_h__ -#define __optix_optix_micromap_impl_h__ +#ifndef OPTIX_OPTIX_MICROMAP_IMPL_H +#define OPTIX_OPTIX_MICROMAP_IMPL_H #include -#if __CUDACC__ +#ifdef __CUDACC__ #include #endif #ifndef OPTIX_MICROMAP_FUNC -#if __CUDACC__ -#define OPTIX_MICROMAP_FUNC __host__ __device__ +#ifdef __CUDACC__ +#define OPTIX_MICROMAP_FUNC __device__ #else #define OPTIX_MICROMAP_FUNC #endif @@ -57,9 +57,10 @@ namespace optix_impl { #define OPTIX_MICROMAP_INLINE_FUNC OPTIX_MICROMAP_FUNC inline -#if __CUDACC__ +#ifdef __CUDACC__ // the device implementation of __uint_as_float is declared in cuda_runtime.h #else +// the host implementation of __uint_as_float OPTIX_MICROMAP_INLINE_FUNC float __uint_as_float( uint32_t x ) { union { float f; uint32_t i; } var; @@ -68,17 +69,6 @@ OPTIX_MICROMAP_INLINE_FUNC float __uint_as_float( uint32_t x ) } #endif - -// Deinterleave bits from x into even and odd halves -OPTIX_MICROMAP_INLINE_FUNC uint32_t deinterleaveBits( uint32_t x ) -{ - x = ( ( ( ( x >> 1 ) & 0x22222222u ) | ( ( x << 1 ) & ~0x22222222u ) ) & 0x66666666u ) | ( x & ~0x66666666u ); - x = ( ( ( ( x >> 2 ) & 0x0c0c0c0cu ) | ( ( x << 2 ) & ~0x0c0c0c0cu ) ) & 0x3c3c3c3cu ) | ( x & ~0x3c3c3c3cu ); - x = ( ( ( ( x >> 4 ) & 0x00f000f0u ) | ( ( x << 4 ) & ~0x00f000f0u ) ) & 0x0ff00ff0u ) | ( x & ~0x0ff00ff0u ); - x = ( ( ( ( x >> 8 ) & 0x0000ff00u ) | ( ( x << 8 ) & ~0x0000ff00u ) ) & 0x00ffff00u ) | ( x & ~0x00ffff00u ); - return x; -} - // Extract even bits OPTIX_MICROMAP_INLINE_FUNC uint32_t extractEvenBits( uint32_t x ) { @@ -101,7 +91,6 @@ OPTIX_MICROMAP_INLINE_FUNC uint32_t prefixEor( uint32_t x ) return x; } - // Convert distance along the curve to discrete barycentrics OPTIX_MICROMAP_INLINE_FUNC void index2dbary( uint32_t index, uint32_t& u, uint32_t& v, uint32_t& w ) { @@ -118,15 +107,15 @@ OPTIX_MICROMAP_INLINE_FUNC void index2dbary( uint32_t index, uint32_t& u, uint32 w = ( ~fx & ~t ) | ( b0 & ~t ) | ( ~b0 & fx & t ); } - -// Compute barycentrics for micro triangle -OPTIX_MICROMAP_INLINE_FUNC void micro2bary( uint32_t index, uint32_t subdivisionLevel, float2& uv0, float2& uv1, float2& uv2 ) +// Compute barycentrics of a sub or micro triangle wrt a base triangle +// The order of the returned bary0, bary1, bary2 matters and allows for using this function for sub triangles and the conversion from sub triangle to base triangle barycentric space +OPTIX_MICROMAP_INLINE_FUNC void micro2bary( uint32_t index, uint32_t subdivisionLevel, float2& bary0, float2& bary1, float2& bary2 ) { if( subdivisionLevel == 0 ) { - uv0 = { 0, 0 }; - uv1 = { 1, 0 }; - uv2 = { 0, 1 }; + bary0 = { 0, 0 }; + bary1 = { 1, 0 }; + bary2 = { 0, 1 }; return; } @@ -138,12 +127,10 @@ OPTIX_MICROMAP_INLINE_FUNC void micro2bary( uint32_t index, uint32_t subdivision iv = iv & ( ( 1 << subdivisionLevel ) - 1 ); iw = iw & ( ( 1 << subdivisionLevel ) - 1 ); - bool upright = ( iu & 1 ) ^ ( iv & 1 ) ^ ( iw & 1 ); - if( !upright ) - { - iu = iu + 1; - iv = iv + 1; - } + int yFlipped = ( iu & 1 ) ^ ( iv & 1 ) ^ ( iw & 1 ) ^ 1; + + int xFlipped = ( ( 0x8888888888888888ull ^ 0xf000f000f000f000ull ^ 0xffff000000000000ull ) >> index ) & 1; + xFlipped ^= ( ( 0x8888888888888888ull ^ 0xf000f000f000f000ull ^ 0xffff000000000000ull ) >> ( index >> 6 ) ) & 1; const float levelScale = __uint_as_float( ( 127u - subdivisionLevel ) << 23 ); @@ -155,20 +142,46 @@ OPTIX_MICROMAP_INLINE_FUNC void micro2bary( uint32_t index, uint32_t subdivision float u = (float)iu * levelScale; float v = (float)iv * levelScale; - if( !upright ) - { - du = -du; - dv = -dv; - } - - uv0 = { u, v }; - uv1 = { u + du, v }; - uv2 = { u, v + dv }; + // c d + // x-----x + // / \ / + // / \ / + // x-----x + // a b + // + // !xFlipped && !yFlipped: abc + // !xFlipped && yFlipped: cdb + // xFlipped && !yFlipped: bac + // xFlipped && yFlipped: dcb + + bary0 = { u + xFlipped * du , v + yFlipped * dv }; + bary1 = { u + (1-xFlipped) * du, v + yFlipped * dv }; + bary2 = { u + yFlipped * du , v + (1-yFlipped) * dv }; } +// avoid any conflicts due to multiple definitions +#define OPTIX_MICROMAP_FLOAT2_SUB(a,b) { a.x - b.x, a.y - b.y } + +// Compute barycentrics for micro triangle from base barycentrics +OPTIX_MICROMAP_INLINE_FUNC float2 base2micro( const float2& baseBarycentrics, const float2 microVertexBaseBarycentrics[3] ) +{ + float2 baryV0P = OPTIX_MICROMAP_FLOAT2_SUB( baseBarycentrics, microVertexBaseBarycentrics[0] ); + float2 baryV0V1 = OPTIX_MICROMAP_FLOAT2_SUB( microVertexBaseBarycentrics[1], microVertexBaseBarycentrics[0] ); + float2 baryV0V2 = OPTIX_MICROMAP_FLOAT2_SUB( microVertexBaseBarycentrics[2], microVertexBaseBarycentrics[0] ); + + float rdetA = 1.f / ( baryV0V1.x * baryV0V2.y - baryV0V1.y * baryV0V2.x ); + float4 A = { baryV0V2.y, -baryV0V2.x, -baryV0V1.y, baryV0V1.x }; + + float2 localUV; + localUV.x = rdetA * ( baryV0P.x * A.x + baryV0P.y * A.y ); + localUV.y = rdetA * ( baryV0P.x * A.z + baryV0P.y * A.w ); + + return localUV; +} +#undef OPTIX_MICROMAP_FLOAT2_SUB /*@}*/ // end group optix_utilities } // namespace optix_impl -#endif // __optix_optix_micromap_impl_h__ +#endif // OPTIX_OPTIX_MICROMAP_IMPL_H diff --git a/zenovis/xinxinoptix/include/optix.h b/zenovis/xinxinoptix/include/optix.h index 3690782d9d..0df3c8ef02 100644 --- a/zenovis/xinxinoptix/include/optix.h +++ b/zenovis/xinxinoptix/include/optix.h @@ -26,15 +26,15 @@ /// Includes the host api if compiling host code, includes the cuda api if compiling device code. /// For the math library routines include optix_math.h -#ifndef __optix_optix_h__ -#define __optix_optix_h__ +#ifndef OPTIX_OPTIX_H +#define OPTIX_OPTIX_H /// The OptiX version. /// /// - major = OPTIX_VERSION/10000 /// - minor = (OPTIX_VERSION%10000)/100 /// - micro = OPTIX_VERSION%100 -#define OPTIX_VERSION 70600 +#define OPTIX_VERSION 70700 #ifdef __CUDACC__ @@ -44,4 +44,4 @@ #endif -#endif // __optix_optix_h__ +#endif // OPTIX_OPTIX_H diff --git a/zenovis/xinxinoptix/include/optix_7_device.h b/zenovis/xinxinoptix/include/optix_7_device.h deleted file mode 100644 index 9c6bacd63b..0000000000 --- a/zenovis/xinxinoptix/include/optix_7_device.h +++ /dev/null @@ -1,1009 +0,0 @@ -/* -* Copyright (c) 2021 NVIDIA Corporation. All rights reserved. -* -* NVIDIA Corporation and its licensors retain all intellectual property and proprietary -* rights in and to this software, related documentation and any modifications thereto. -* Any use, reproduction, disclosure or distribution of this software and related -* documentation without an express license agreement from NVIDIA Corporation is strictly -* prohibited. -* -* TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* -* AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, -* INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -* PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY -* SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT -* LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF -* BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR -* INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF -* SUCH DAMAGES -*/ - -/// @file -/// @author NVIDIA Corporation -/// @brief OptiX public API header -/// -/// OptiX public API Reference - Device API declarations - -#if !defined( __OPTIX_INCLUDE_INTERNAL_HEADERS__ ) -#error("optix_7_device.h is an internal header file and must not be used directly. Please use optix_device.h or optix.h instead.") -#endif - - -#ifndef __optix_optix_7_device_h__ -#define __optix_optix_7_device_h__ - -#if defined( __cplusplus ) && ( __cplusplus < 201103L ) && !defined( _WIN32 ) -#error Device code for OptiX requires at least C++11. Consider adding "--std c++11" to the nvcc command-line. -#endif - -#include "optix_7_types.h" - -/// \defgroup optix_device_api Device API -/// \brief OptiX Device API - -/** \addtogroup optix_device_api -@{ -*/ - -/// Initiates a ray tracing query starting with the given traversable. -/// -/// \param[in] handle -/// \param[in] rayOrigin -/// \param[in] rayDirection -/// \param[in] tmin -/// \param[in] tmax -/// \param[in] rayTime -/// \param[in] visibilityMask really only 8 bits -/// \param[in] rayFlags really only 8 bits, combination of OptixRayFlags -/// \param[in] SBToffset really only 4 bits -/// \param[in] SBTstride really only 4 bits -/// \param[in] missSBTIndex specifies the miss program invoked on a miss -/// \param[in,out] payload up to 32 unsigned int values that hold the payload -template -static __forceinline__ __device__ void optixTrace( OptixTraversableHandle handle, - float3 rayOrigin, - float3 rayDirection, - float tmin, - float tmax, - float rayTime, - OptixVisibilityMask visibilityMask, - unsigned int rayFlags, - unsigned int SBToffset, - unsigned int SBTstride, - unsigned int missSBTIndex, - Payload&... payload ); - -/// Initiates a ray tracing query starting with the given traversable. -/// -/// \param[in] type -/// \param[in] handle -/// \param[in] rayOrigin -/// \param[in] rayDirection -/// \param[in] tmin -/// \param[in] tmax -/// \param[in] rayTime -/// \param[in] visibilityMask really only 8 bits -/// \param[in] rayFlags really only 8 bits, combination of OptixRayFlags -/// \param[in] SBToffset really only 4 bits -/// \param[in] SBTstride really only 4 bits -/// \param[in] missSBTIndex specifies the miss program invoked on a miss -/// \param[in,out] payload up to 32 unsigned int values that hold the payload -template -static __forceinline__ __device__ void optixTrace( OptixPayloadTypeID type, - OptixTraversableHandle handle, - float3 rayOrigin, - float3 rayDirection, - float tmin, - float tmax, - float rayTime, - OptixVisibilityMask visibilityMask, - unsigned int rayFlags, - unsigned int SBToffset, - unsigned int SBTstride, - unsigned int missSBTIndex, - Payload&... payload ); - -/// Writes the 32-bit payload value at slot 0. -static __forceinline__ __device__ void optixSetPayload_0( unsigned int p ); -/// Writes the 32-bit payload value at slot 1. -static __forceinline__ __device__ void optixSetPayload_1( unsigned int p ); -/// Writes the 32-bit payload value at slot 2. -static __forceinline__ __device__ void optixSetPayload_2( unsigned int p ); -/// Writes the 32-bit payload value at slot 3. -static __forceinline__ __device__ void optixSetPayload_3( unsigned int p ); -/// Writes the 32-bit payload value at slot 4. -static __forceinline__ __device__ void optixSetPayload_4( unsigned int p ); -/// Writes the 32-bit payload value at slot 5. -static __forceinline__ __device__ void optixSetPayload_5( unsigned int p ); -/// Writes the 32-bit payload value at slot 6. -static __forceinline__ __device__ void optixSetPayload_6( unsigned int p ); -/// Writes the 32-bit payload value at slot 7. -static __forceinline__ __device__ void optixSetPayload_7( unsigned int p ); - -/// Writes the 32-bit payload value at slot 8. -static __forceinline__ __device__ void optixSetPayload_8( unsigned int p ); -/// Writes the 32-bit payload value at slot 9. -static __forceinline__ __device__ void optixSetPayload_9( unsigned int p ); -/// Writes the 32-bit payload value at slot 10. -static __forceinline__ __device__ void optixSetPayload_10( unsigned int p ); -/// Writes the 32-bit payload value at slot 11. -static __forceinline__ __device__ void optixSetPayload_11( unsigned int p ); -/// Writes the 32-bit payload value at slot 12. -static __forceinline__ __device__ void optixSetPayload_12( unsigned int p ); -/// Writes the 32-bit payload value at slot 13. -static __forceinline__ __device__ void optixSetPayload_13( unsigned int p ); -/// Writes the 32-bit payload value at slot 14. -static __forceinline__ __device__ void optixSetPayload_14( unsigned int p ); -/// Writes the 32-bit payload value at slot 15. -static __forceinline__ __device__ void optixSetPayload_15( unsigned int p ); -/// Writes the 32-bit payload value at slot 16. -static __forceinline__ __device__ void optixSetPayload_16( unsigned int p ); -/// Writes the 32-bit payload value at slot 17. -static __forceinline__ __device__ void optixSetPayload_17( unsigned int p ); -/// Writes the 32-bit payload value at slot 18. -static __forceinline__ __device__ void optixSetPayload_18( unsigned int p ); -/// Writes the 32-bit payload value at slot 19. -static __forceinline__ __device__ void optixSetPayload_19( unsigned int p ); -/// Writes the 32-bit payload value at slot 20. -static __forceinline__ __device__ void optixSetPayload_20( unsigned int p ); -/// Writes the 32-bit payload value at slot 21. -static __forceinline__ __device__ void optixSetPayload_21( unsigned int p ); -/// Writes the 32-bit payload value at slot 22. -static __forceinline__ __device__ void optixSetPayload_22( unsigned int p ); -/// Writes the 32-bit payload value at slot 23. -static __forceinline__ __device__ void optixSetPayload_23( unsigned int p ); -/// Writes the 32-bit payload value at slot 24. -static __forceinline__ __device__ void optixSetPayload_24( unsigned int p ); -/// Writes the 32-bit payload value at slot 25. -static __forceinline__ __device__ void optixSetPayload_25( unsigned int p ); -/// Writes the 32-bit payload value at slot 26. -static __forceinline__ __device__ void optixSetPayload_26( unsigned int p ); -/// Writes the 32-bit payload value at slot 27. -static __forceinline__ __device__ void optixSetPayload_27( unsigned int p ); -/// Writes the 32-bit payload value at slot 28. -static __forceinline__ __device__ void optixSetPayload_28( unsigned int p ); -/// Writes the 32-bit payload value at slot 29. -static __forceinline__ __device__ void optixSetPayload_29( unsigned int p ); -/// Writes the 32-bit payload value at slot 30. -static __forceinline__ __device__ void optixSetPayload_30( unsigned int p ); -/// Writes the 32-bit payload value at slot 31. -static __forceinline__ __device__ void optixSetPayload_31( unsigned int p ); - -/// Reads the 32-bit payload value at slot 0. -static __forceinline__ __device__ unsigned int optixGetPayload_0(); -/// Reads the 32-bit payload value at slot 1. -static __forceinline__ __device__ unsigned int optixGetPayload_1(); -/// Reads the 32-bit payload value at slot 2. -static __forceinline__ __device__ unsigned int optixGetPayload_2(); -/// Reads the 32-bit payload value at slot 3. -static __forceinline__ __device__ unsigned int optixGetPayload_3(); -/// Reads the 32-bit payload value at slot 4. -static __forceinline__ __device__ unsigned int optixGetPayload_4(); -/// Reads the 32-bit payload value at slot 5. -static __forceinline__ __device__ unsigned int optixGetPayload_5(); -/// Reads the 32-bit payload value at slot 6. -static __forceinline__ __device__ unsigned int optixGetPayload_6(); -/// Reads the 32-bit payload value at slot 7. -static __forceinline__ __device__ unsigned int optixGetPayload_7(); - -/// Reads the 32-bit payload value at slot 8. -static __forceinline__ __device__ unsigned int optixGetPayload_8(); -/// Reads the 32-bit payload value at slot 9. -static __forceinline__ __device__ unsigned int optixGetPayload_9(); -/// Reads the 32-bit payload value at slot 10. -static __forceinline__ __device__ unsigned int optixGetPayload_10(); -/// Reads the 32-bit payload value at slot 11. -static __forceinline__ __device__ unsigned int optixGetPayload_11(); -/// Reads the 32-bit payload value at slot 12. -static __forceinline__ __device__ unsigned int optixGetPayload_12(); -/// Reads the 32-bit payload value at slot 13. -static __forceinline__ __device__ unsigned int optixGetPayload_13(); -/// Reads the 32-bit payload value at slot 14. -static __forceinline__ __device__ unsigned int optixGetPayload_14(); -/// Reads the 32-bit payload value at slot 15. -static __forceinline__ __device__ unsigned int optixGetPayload_15(); -/// Reads the 32-bit payload value at slot 16. -static __forceinline__ __device__ unsigned int optixGetPayload_16(); -/// Reads the 32-bit payload value at slot 17. -static __forceinline__ __device__ unsigned int optixGetPayload_17(); -/// Reads the 32-bit payload value at slot 18. -static __forceinline__ __device__ unsigned int optixGetPayload_18(); -/// Reads the 32-bit payload value at slot 19. -static __forceinline__ __device__ unsigned int optixGetPayload_19(); -/// Reads the 32-bit payload value at slot 20. -static __forceinline__ __device__ unsigned int optixGetPayload_20(); -/// Reads the 32-bit payload value at slot 21. -static __forceinline__ __device__ unsigned int optixGetPayload_21(); -/// Reads the 32-bit payload value at slot 22. -static __forceinline__ __device__ unsigned int optixGetPayload_22(); -/// Reads the 32-bit payload value at slot 23. -static __forceinline__ __device__ unsigned int optixGetPayload_23(); -/// Reads the 32-bit payload value at slot 24. -static __forceinline__ __device__ unsigned int optixGetPayload_24(); -/// Reads the 32-bit payload value at slot 25. -static __forceinline__ __device__ unsigned int optixGetPayload_25(); -/// Reads the 32-bit payload value at slot 26. -static __forceinline__ __device__ unsigned int optixGetPayload_26(); -/// Reads the 32-bit payload value at slot 27. -static __forceinline__ __device__ unsigned int optixGetPayload_27(); -/// Reads the 32-bit payload value at slot 28. -static __forceinline__ __device__ unsigned int optixGetPayload_28(); -/// Reads the 32-bit payload value at slot 29. -static __forceinline__ __device__ unsigned int optixGetPayload_29(); -/// Reads the 32-bit payload value at slot 30. -static __forceinline__ __device__ unsigned int optixGetPayload_30(); -/// Reads the 32-bit payload value at slot 31. -static __forceinline__ __device__ unsigned int optixGetPayload_31(); - -/// Specify the supported payload types for a program. -/// -/// The supported types are specified as a bitwise combination of payload types. (See OptixPayloadTypeID) -/// May only be called once per program. -/// Must be called at the top of the program. -/// Only available in IS, AH, CH, MS -static __forceinline__ __device__ void optixSetPayloadTypes( unsigned int typeMask ); - -/// Returns an undefined value. -static __forceinline__ __device__ unsigned int optixUndefinedValue(); - -/// Returns the rayOrigin passed into optixTrace. -/// -/// May be more expensive to call in IS and AH than their object space counterparts, -/// so effort should be made to use the object space ray in those programs. -/// Only available in IS, AH, CH, MS -static __forceinline__ __device__ float3 optixGetWorldRayOrigin(); - -/// Returns the rayDirection passed into optixTrace. -/// -/// May be more expensive to call in IS and AH than their object space counterparts, -/// so effort should be made to use the object space ray in those programs. -/// Only available in IS, AH, CH, MS -static __forceinline__ __device__ float3 optixGetWorldRayDirection(); - -/// Returns the current object space ray origin based on the current transform stack. -/// -/// Only available in IS and AH. -static __forceinline__ __device__ float3 optixGetObjectRayOrigin(); - -/// Returns the current object space ray direction based on the current transform stack. -/// -/// Only available in IS and AH. -static __forceinline__ __device__ float3 optixGetObjectRayDirection(); - -/// Returns the tmin passed into optixTrace. -/// -/// Only available in IS, AH, CH, MS -static __forceinline__ __device__ float optixGetRayTmin(); - -/// In IS and CH returns the current smallest reported hitT or the tmax passed into optixTrace if no hit has been reported -/// In AH returns the hitT value as passed in to optixReportIntersection -/// In MS returns the tmax passed into optixTrace -/// Only available in IS, AH, CH, MS -static __forceinline__ __device__ float optixGetRayTmax(); - -/// Returns the rayTime passed into optixTrace. -/// -/// Will return 0 if motion is disabled. -/// Only available in IS, AH, CH, MS -static __forceinline__ __device__ float optixGetRayTime(); - -/// Returns the rayFlags passed into optixTrace -/// -/// Only available in IS, AH, CH, MS -static __forceinline__ __device__ unsigned int optixGetRayFlags(); - -/// Returns the visibilityMask passed into optixTrace -/// -/// Only available in IS, AH, CH, MS -static __forceinline__ __device__ unsigned int optixGetRayVisibilityMask(); - -/// Return the traversable handle of a given instance in an Instance -/// Acceleration Structure (IAS) -static __forceinline__ __device__ OptixTraversableHandle optixGetInstanceTraversableFromIAS( OptixTraversableHandle ias, unsigned int instIdx ); - -/// Return the object space triangle vertex positions of a given triangle in a Geometry -/// Acceleration Structure (GAS) at a given motion time. -/// To access vertex data, the GAS must be built using the flag OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS. -/// -/// If motion is disabled via OptixPipelineCompileOptions::usesMotionBlur, or the GAS does not contain motion, the -/// time parameter is ignored. -static __forceinline__ __device__ void optixGetTriangleVertexData( OptixTraversableHandle gas, unsigned int primIdx, unsigned int sbtGASIndex, float time, float3 data[3]); - - -/// Return the object space curve control vertex data of a linear curve in a Geometry -/// Acceleration Structure (GAS) at a given motion time. -/// To access vertex data, the GAS must be built using the flag OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS. -/// -/// data[i] = {x,y,z,w} with {x,y,z} the position and w the radius of control vertex i. -/// If motion is disabled via OptixPipelineCompileOptions::usesMotionBlur, or the GAS does not contain motion, the -/// time parameter is ignored. -static __forceinline__ __device__ void optixGetLinearCurveVertexData( OptixTraversableHandle gas, unsigned int primIdx, unsigned int sbtGASIndex, float time, float4 data[2] ); - -/// Return the object space curve control vertex data of a quadratic BSpline curve in a Geometry -/// Acceleration Structure (GAS) at a given motion time. -/// To access vertex data, the GAS must be built using the flag OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS. -/// -/// data[i] = {x,y,z,w} with {x,y,z} the position and w the radius of control vertex i. -/// If motion is disabled via OptixPipelineCompileOptions::usesMotionBlur, or the GAS does not contain motion, the -/// time parameter is ignored. -static __forceinline__ __device__ void optixGetQuadraticBSplineVertexData( OptixTraversableHandle gas, unsigned int primIdx, unsigned int sbtGASIndex, float time, float4 data[3] ); - -/// Return the object space curve control vertex data of a cubic BSpline curve in a Geometry -/// Acceleration Structure (GAS) at a given motion time. -/// To access vertex data, the GAS must be built using the flag OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS. -/// -/// data[i] = {x,y,z,w} with {x,y,z} the position and w the radius of control vertex i. -/// If motion is disabled via OptixPipelineCompileOptions::usesMotionBlur, or the GAS does not contain motion, the -/// time parameter is ignored. -static __forceinline__ __device__ void optixGetCubicBSplineVertexData( OptixTraversableHandle gas, unsigned int primIdx, unsigned int sbtGASIndex, float time, float4 data[4] ); - -/// Return the object space curve control vertex data of a CatmullRom spline curve in a Geometry -/// Acceleration Structure (GAS) at a given motion time. -/// To access vertex data, the GAS must be built using the flag OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS. -/// -/// data[i] = {x,y,z,w} with {x,y,z} the position and w the radius of control vertex i. -/// If motion is disabled via OptixPipelineCompileOptions::usesMotionBlur, or the GAS does not contain motion, the -/// time parameter is ignored. -static __forceinline__ __device__ void optixGetCatmullRomVertexData( OptixTraversableHandle gas, unsigned int primIdx, unsigned int sbtGASIndex, float time, float4 data[4] ); - -/// Return the object space sphere data, center point and radius, in a Geometry Acceleration Structure (GAS) at a given motion time. -/// To access sphere data, the GAS must be built using the flag OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS. -/// -/// data[0] = {x,y,z,w} with {x,y,z} the position of the sphere center and w the radius. -/// If motion is disabled via OptixPipelineCompileOptions::usesMotionBlur, or the GAS does not contain motion, the -/// time parameter is ignored. -static __forceinline__ __device__ void optixGetSphereData( OptixTraversableHandle gas, unsigned int primIdx, unsigned int sbtGASIndex, float time, float4 data[1] ); - -/// Returns the traversable handle for the Geometry Acceleration Structure (GAS) containing -/// the current hit. May be called from IS, AH and CH. -static __forceinline__ __device__ OptixTraversableHandle optixGetGASTraversableHandle(); - -/// Returns the motion begin time of a GAS (see OptixMotionOptions) -static __forceinline__ __device__ float optixGetGASMotionTimeBegin( OptixTraversableHandle gas ); - -/// Returns the motion end time of a GAS (see OptixMotionOptions) -static __forceinline__ __device__ float optixGetGASMotionTimeEnd( OptixTraversableHandle gas ); - -/// Returns the number of motion steps of a GAS (see OptixMotionOptions) -static __forceinline__ __device__ unsigned int optixGetGASMotionStepCount( OptixTraversableHandle gas ); - -/// Returns the world-to-object transformation matrix resulting from the current active transformation list. -/// -/// The cost of this function may be proportional to the size of the transformation list. -static __forceinline__ __device__ void optixGetWorldToObjectTransformMatrix( float m[12] ); - -/// Returns the object-to-world transformation matrix resulting from the current active transformation list. -/// -/// The cost of this function may be proportional to the size of the transformation list. -static __forceinline__ __device__ void optixGetObjectToWorldTransformMatrix( float m[12] ); - -/// Transforms the point using world-to-object transformation matrix resulting from the current active transformation -/// list. -/// -/// The cost of this function may be proportional to the size of the transformation list. -static __forceinline__ __device__ float3 optixTransformPointFromWorldToObjectSpace( float3 point ); - -/// Transforms the vector using world-to-object transformation matrix resulting from the current active transformation -/// list. -/// -/// The cost of this function may be proportional to the size of the transformation list. -static __forceinline__ __device__ float3 optixTransformVectorFromWorldToObjectSpace( float3 vec ); - -/// Transforms the normal using world-to-object transformation matrix resulting from the current active transformation -/// list. -/// -/// The cost of this function may be proportional to the size of the transformation list. -static __forceinline__ __device__ float3 optixTransformNormalFromWorldToObjectSpace( float3 normal ); - -/// Transforms the point using object-to-world transformation matrix resulting from the current active transformation -/// list. -/// -/// The cost of this function may be proportional to the size of the transformation list. -static __forceinline__ __device__ float3 optixTransformPointFromObjectToWorldSpace( float3 point ); - -/// Transforms the vector using object-to-world transformation matrix resulting from the current active transformation -/// list. -/// -/// The cost of this function may be proportional to the size of the transformation list. -static __forceinline__ __device__ float3 optixTransformVectorFromObjectToWorldSpace( float3 vec ); - -/// Transforms the normal using object-to-world transformation matrix resulting from the current active transformation -/// list. -/// -/// The cost of this function may be proportional to the size of the transformation list. -static __forceinline__ __device__ float3 optixTransformNormalFromObjectToWorldSpace( float3 normal ); - -/// Returns the number of transforms on the current transform list. -/// -/// Only available in IS, AH, CH, EX -static __forceinline__ __device__ unsigned int optixGetTransformListSize(); - -/// Returns the traversable handle for a transform on the current transform list. -/// -/// Only available in IS, AH, CH, EX -static __forceinline__ __device__ OptixTraversableHandle optixGetTransformListHandle( unsigned int index ); - - -/// Returns the transform type of a traversable handle from a transform list. -static __forceinline__ __device__ OptixTransformType optixGetTransformTypeFromHandle( OptixTraversableHandle handle ); - -/// Returns a pointer to a OptixStaticTransform from its traversable handle. -/// -/// Returns 0 if the traversable is not of type OPTIX_TRANSFORM_TYPE_STATIC_TRANSFORM. -static __forceinline__ __device__ const OptixStaticTransform* optixGetStaticTransformFromHandle( OptixTraversableHandle handle ); - -/// Returns a pointer to a OptixSRTMotionTransform from its traversable handle. -/// -/// Returns 0 if the traversable is not of type OPTIX_TRANSFORM_TYPE_SRT_MOTION_TRANSFORM. -static __forceinline__ __device__ const OptixSRTMotionTransform* optixGetSRTMotionTransformFromHandle( OptixTraversableHandle handle ); - -/// Returns a pointer to a OptixMatrixMotionTransform from its traversable handle. -/// -/// Returns 0 if the traversable is not of type OPTIX_TRANSFORM_TYPE_MATRIX_MOTION_TRANSFORM. -static __forceinline__ __device__ const OptixMatrixMotionTransform* optixGetMatrixMotionTransformFromHandle( OptixTraversableHandle handle ); - -/// Returns instanceId from an OptixInstance traversable. -/// -/// Returns 0 if the traversable handle does not reference an OptixInstance. -static __forceinline__ __device__ unsigned int optixGetInstanceIdFromHandle( OptixTraversableHandle handle ); - -/// Returns child traversable handle from an OptixInstance traversable. -/// -/// Returns 0 if the traversable handle does not reference an OptixInstance. -static __forceinline__ __device__ OptixTraversableHandle optixGetInstanceChildFromHandle( OptixTraversableHandle handle ); - -/// Returns object-to-world transform from an OptixInstance traversable. -/// -/// Returns 0 if the traversable handle does not reference an OptixInstance. -static __forceinline__ __device__ const float4* optixGetInstanceTransformFromHandle( OptixTraversableHandle handle ); - -/// Returns world-to-object transform from an OptixInstance traversable. -/// -/// Returns 0 if the traversable handle does not reference an OptixInstance. -static __forceinline__ __device__ const float4* optixGetInstanceInverseTransformFromHandle( OptixTraversableHandle handle ); - -/// Reports an intersections (overload without attributes). -/// -/// If optixGetRayTmin() <= hitT <= optixGetRayTmax(), the any hit program associated with this intersection program (via the SBT entry) is called. -/// The AH program can do one of three things: -/// 1. call optixIgnoreIntersection - no hit is recorded, optixReportIntersection returns false -/// 2. call optixTerminateRay - hit is recorded, optixReportIntersection does not return, no further traversal occurs, -/// and the associated closest hit program is called -/// 3. neither - hit is recorded, optixReportIntersection returns true -/// hitKind - Only the 7 least significant bits should be written [0..127]. Any values above 127 are reserved for built in intersection. The value can be queried with optixGetHitKind() in AH and CH. -/// -/// The attributes specified with a0..a7 are available in the AH and CH programs. -/// Note that the attributes available in the CH program correspond to the closest recorded intersection. -/// The number of attributes in registers and memory can be configured in the pipeline. -/// -/// \param[in] hitT -/// \param[in] hitKind -static __forceinline__ __device__ bool optixReportIntersection( float hitT, unsigned int hitKind ); - -/// Reports an intersection (overload with 1 attribute register). -/// -/// \see #optixReportIntersection(float,unsigned int) -static __forceinline__ __device__ bool optixReportIntersection( float hitT, unsigned int hitKind, unsigned int a0 ); - -/// Reports an intersection (overload with 2 attribute registers). -/// -/// \see #optixReportIntersection(float,unsigned int) -static __forceinline__ __device__ bool optixReportIntersection( float hitT, unsigned int hitKind, unsigned int a0, unsigned int a1 ); - -/// Reports an intersection (overload with 3 attribute registers). -/// -/// \see #optixReportIntersection(float,unsigned int) -static __forceinline__ __device__ bool optixReportIntersection( float hitT, unsigned int hitKind, unsigned int a0, unsigned int a1, unsigned int a2 ); - -/// Reports an intersection (overload with 4 attribute registers). -/// -/// \see #optixReportIntersection(float,unsigned int) -static __forceinline__ __device__ bool optixReportIntersection( float hitT, - unsigned int hitKind, - unsigned int a0, - unsigned int a1, - unsigned int a2, - unsigned int a3 ); - -/// Reports an intersection (overload with 5 attribute registers). -/// -/// \see #optixReportIntersection(float,unsigned int) -static __forceinline__ __device__ bool optixReportIntersection( float hitT, - unsigned int hitKind, - unsigned int a0, - unsigned int a1, - unsigned int a2, - unsigned int a3, - unsigned int a4 ); - -/// Reports an intersection (overload with 6 attribute registers). -/// -/// \see #optixReportIntersection(float,unsigned int) -static __forceinline__ __device__ bool optixReportIntersection( float hitT, - unsigned int hitKind, - unsigned int a0, - unsigned int a1, - unsigned int a2, - unsigned int a3, - unsigned int a4, - unsigned int a5 ); - -/// Reports an intersection (overload with 7 attribute registers). -/// -/// \see #optixReportIntersection(float,unsigned int) -static __forceinline__ __device__ bool optixReportIntersection( float hitT, - unsigned int hitKind, - unsigned int a0, - unsigned int a1, - unsigned int a2, - unsigned int a3, - unsigned int a4, - unsigned int a5, - unsigned int a6 ); - -/// Reports an intersection (overload with 8 attribute registers). -/// -/// \see #optixReportIntersection(float,unsigned int) -static __forceinline__ __device__ bool optixReportIntersection( float hitT, - unsigned int hitKind, - unsigned int a0, - unsigned int a1, - unsigned int a2, - unsigned int a3, - unsigned int a4, - unsigned int a5, - unsigned int a6, - unsigned int a7 ); - -/// Returns the attribute at slot 0. -static __forceinline__ __device__ unsigned int optixGetAttribute_0(); -/// Returns the attribute at slot 1. -static __forceinline__ __device__ unsigned int optixGetAttribute_1(); -/// Returns the attribute at slot 2. -static __forceinline__ __device__ unsigned int optixGetAttribute_2(); -/// Returns the attribute at slot 3. -static __forceinline__ __device__ unsigned int optixGetAttribute_3(); -/// Returns the attribute at slot 4. -static __forceinline__ __device__ unsigned int optixGetAttribute_4(); -/// Returns the attribute at slot 5. -static __forceinline__ __device__ unsigned int optixGetAttribute_5(); -/// Returns the attribute at slot 6. -static __forceinline__ __device__ unsigned int optixGetAttribute_6(); -/// Returns the attribute at slot 7. -static __forceinline__ __device__ unsigned int optixGetAttribute_7(); - -/// Record the hit, stops traversal, and proceeds to CH. -/// -/// Available only in AH. -static __forceinline__ __device__ void optixTerminateRay(); - -/// Discards the hit, and returns control to the calling optixReportIntersection or built-in intersection routine. -/// -/// Available only in AH. -static __forceinline__ __device__ void optixIgnoreIntersection(); - - -/// For a given OptixBuildInputTriangleArray the number of primitives is defined as -/// "(OptixBuildInputTriangleArray::indexBuffer == 0) ? OptixBuildInputTriangleArray::numVertices/3 : -/// OptixBuildInputTriangleArray::numIndexTriplets;". -/// For a given OptixBuildInputCustomPrimitiveArray the number of primitives is defined as -/// numAabbs. -/// -/// The primitive index returns the index into the array of primitives -/// plus the primitiveIndexOffset. -/// -/// In IS and AH this corresponds to the currently intersected primitive. -/// In CH this corresponds to the primitive index of the closest intersected primitive. -static __forceinline__ __device__ unsigned int optixGetPrimitiveIndex(); - -/// Returns the Sbt GAS index of the primitive associated with the current intersection. -/// -/// In IS and AH this corresponds to the currently intersected primitive. -/// In CH this corresponds to the Sbt GAS index of the closest intersected primitive. -/// In EX with exception code OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_HIT_SBT corresponds to the sbt index within the hit GAS. Returns zero for all other exceptions. -static __forceinline__ __device__ unsigned int optixGetSbtGASIndex(); - - -/// Returns the OptixInstance::instanceId of the instance within the top level acceleration structure associated with the current intersection. -/// -/// When building an acceleration structure using OptixBuildInputInstanceArray each OptixInstance has a user supplied instanceId. -/// OptixInstance objects reference another acceleration structure. During traversal the acceleration structures are visited top down. -/// In the IS and AH programs the OptixInstance::instanceId corresponding to the most recently visited OptixInstance is returned when calling optixGetInstanceId(). -/// In CH optixGetInstanceId() returns the OptixInstance::instanceId when the hit was recorded with optixReportIntersection. -/// In the case where there is no OptixInstance visited, optixGetInstanceId returns ~0u -static __forceinline__ __device__ unsigned int optixGetInstanceId(); - -/// Returns the zero-based index of the instance within its instance acceleration structure associated with the current intersection. -/// -/// In the IS and AH programs the index corresponding to the most recently visited OptixInstance is returned when calling optixGetInstanceIndex(). -/// In CH optixGetInstanceIndex() returns the index when the hit was recorded with optixReportIntersection. -/// In the case where there is no OptixInstance visited, optixGetInstanceIndex returns 0 -static __forceinline__ __device__ unsigned int optixGetInstanceIndex(); - -/// Returns the 8 bit hit kind associated with the current hit. -/// -/// Use optixGetPrimitiveType() to interpret the hit kind. -/// For custom intersections (primitive type OPTIX_PRIMITIVE_TYPE_CUSTOM), -/// this is the 7-bit hitKind passed to optixReportIntersection(). -/// Hit kinds greater than 127 are reserved for built-in primitives. -/// -/// Available only in AH and CH. -static __forceinline__ __device__ unsigned int optixGetHitKind(); - -/// Function interpreting the result of #optixGetHitKind(). -static __forceinline__ __device__ OptixPrimitiveType optixGetPrimitiveType( unsigned int hitKind ); - -/// Function interpreting the result of #optixGetHitKind(). -static __forceinline__ __device__ bool optixIsFrontFaceHit( unsigned int hitKind ); - -/// Function interpreting the result of #optixGetHitKind(). -static __forceinline__ __device__ bool optixIsBackFaceHit( unsigned int hitKind ); - -/// Function interpreting the hit kind associated with the current optixReportIntersection. -static __forceinline__ __device__ OptixPrimitiveType optixGetPrimitiveType(); - -/// Function interpreting the hit kind associated with the current optixReportIntersection. -static __forceinline__ __device__ bool optixIsFrontFaceHit(); - -/// Function interpreting the hit kind associated with the current optixReportIntersection. -static __forceinline__ __device__ bool optixIsBackFaceHit(); - -/// Convenience function interpreting the result of #optixGetHitKind(). -static __forceinline__ __device__ bool optixIsTriangleHit(); - -/// Convenience function interpreting the result of #optixGetHitKind(). -static __forceinline__ __device__ bool optixIsTriangleFrontFaceHit(); - -/// Convenience function interpreting the result of #optixGetHitKind(). -static __forceinline__ __device__ bool optixIsTriangleBackFaceHit(); - -/// Convenience function that returns the first two attributes as floats. -/// -/// When using OptixBuildInputTriangleArray objects, during intersection the barycentric -/// coordinates are stored into the first two attribute registers. -static __forceinline__ __device__ float2 optixGetTriangleBarycentrics(); - -/// Convenience function that returns the curve parameter. -/// -/// When using OptixBuildInputCurveArray objects, during intersection the curve parameter -/// is stored into the first attribute register. -static __forceinline__ __device__ float optixGetCurveParameter(); - -/// Available in any program, it returns the current launch index within the launch dimensions specified by optixLaunch on the host. -/// -/// The raygen program is typically only launched once per launch index. -static __forceinline__ __device__ uint3 optixGetLaunchIndex(); - -/// Available in any program, it returns the dimensions of the current launch specified by optixLaunch on the host. -static __forceinline__ __device__ uint3 optixGetLaunchDimensions(); - -/// Returns the generic memory space pointer to the data region (past the header) of the currently active SBT record corresponding to the current program. -static __forceinline__ __device__ CUdeviceptr optixGetSbtDataPointer(); - -/// Throws a user exception with the given exception code (overload without exception details). -/// -/// The exception code must be in the range from 0 to 2^30 - 1. Up to 8 optional exception details can be passed. They -/// can be queried in the EX program using optixGetExceptionDetail_0() to ..._8(). -/// -/// The exception details must not be used to encode pointers to the stack since the current stack is not preserved in -/// the EX program. -/// -/// Not available in EX. -/// -/// \param[in] exceptionCode The exception code to be thrown. -static __forceinline__ __device__ void optixThrowException( int exceptionCode ); - -/// Throws a user exception with the given exception code (overload with 1 exception detail). -/// -/// \see #optixThrowException(int) -static __forceinline__ __device__ void optixThrowException( int exceptionCode, unsigned int exceptionDetail0 ); - -/// Throws a user exception with the given exception code (overload with 2 exception details). -/// -/// \see #optixThrowException(int) -static __forceinline__ __device__ void optixThrowException( int exceptionCode, - unsigned int exceptionDetail0, - unsigned int exceptionDetail1 ); - -/// Throws a user exception with the given exception code (overload with 3 exception details). -/// -/// \see #optixThrowException(int) -static __forceinline__ __device__ void optixThrowException( int exceptionCode, - unsigned int exceptionDetail0, - unsigned int exceptionDetail1, - unsigned int exceptionDetail2 ); - -/// Throws a user exception with the given exception code (overload with 4 exception details). -/// -/// \see #optixThrowException(int) -static __forceinline__ __device__ void optixThrowException( int exceptionCode, - unsigned int exceptionDetail0, - unsigned int exceptionDetail1, - unsigned int exceptionDetail2, - unsigned int exceptionDetail3 ); - -/// Throws a user exception with the given exception code (overload with 5 exception details). -/// -/// \see #optixThrowException(int) -static __forceinline__ __device__ void optixThrowException( int exceptionCode, - unsigned int exceptionDetail0, - unsigned int exceptionDetail1, - unsigned int exceptionDetail2, - unsigned int exceptionDetail3, - unsigned int exceptionDetail4 ); - -/// Throws a user exception with the given exception code (overload with 6 exception details). -/// -/// \see #optixThrowException(int) -static __forceinline__ __device__ void optixThrowException( int exceptionCode, - unsigned int exceptionDetail0, - unsigned int exceptionDetail1, - unsigned int exceptionDetail2, - unsigned int exceptionDetail3, - unsigned int exceptionDetail4, - unsigned int exceptionDetail5 ); - -/// Throws a user exception with the given exception code (overload with 7 exception details). -/// -/// \see #optixThrowException(int) -static __forceinline__ __device__ void optixThrowException( int exceptionCode, - unsigned int exceptionDetail0, - unsigned int exceptionDetail1, - unsigned int exceptionDetail2, - unsigned int exceptionDetail3, - unsigned int exceptionDetail4, - unsigned int exceptionDetail5, - unsigned int exceptionDetail6 ); - -/// Throws a user exception with the given exception code (overload with 8 exception details). -/// -/// \see #optixThrowException(int) -static __forceinline__ __device__ void optixThrowException( int exceptionCode, - unsigned int exceptionDetail0, - unsigned int exceptionDetail1, - unsigned int exceptionDetail2, - unsigned int exceptionDetail3, - unsigned int exceptionDetail4, - unsigned int exceptionDetail5, - unsigned int exceptionDetail6, - unsigned int exceptionDetail7 ); - -/// Returns the exception code. -/// -/// Only available in EX. -static __forceinline__ __device__ int optixGetExceptionCode(); - -/// Returns the 32-bit exception detail at slot 0. -/// -/// The behavior is undefined if the exception is not a user exception, or the used overload #optixThrowException() did -/// not provide the queried exception detail. -/// -/// Only available in EX. -static __forceinline__ __device__ unsigned int optixGetExceptionDetail_0(); - -/// Returns the 32-bit exception detail at slot 1. -/// -/// \see #optixGetExceptionDetail_0() -static __forceinline__ __device__ unsigned int optixGetExceptionDetail_1(); - -/// Returns the 32-bit exception detail at slot 2. -/// -/// \see #optixGetExceptionDetail_0() -static __forceinline__ __device__ unsigned int optixGetExceptionDetail_2(); - -/// Returns the 32-bit exception detail at slot 3. -/// -/// \see #optixGetExceptionDetail_0() -static __forceinline__ __device__ unsigned int optixGetExceptionDetail_3(); - -/// Returns the 32-bit exception detail at slot 4. -/// -/// \see #optixGetExceptionDetail_0() -static __forceinline__ __device__ unsigned int optixGetExceptionDetail_4(); - -/// Returns the 32-bit exception detail at slot 5. -/// -/// \see #optixGetExceptionDetail_0() -static __forceinline__ __device__ unsigned int optixGetExceptionDetail_5(); - -/// Returns the 32-bit exception detail at slot 6. -/// -/// \see #optixGetExceptionDetail_0() -static __forceinline__ __device__ unsigned int optixGetExceptionDetail_6(); - -/// Returns the 32-bit exception detail at slot 7. -/// -/// \see #optixGetExceptionDetail_0() -static __forceinline__ __device__ unsigned int optixGetExceptionDetail_7(); - -/// Returns the invalid traversable handle for exceptions with exception code OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_TRAVERSABLE. -/// -/// Returns zero for all other exception codes. -/// -/// Only available in EX. -static __forceinline__ __device__ OptixTraversableHandle optixGetExceptionInvalidTraversable(); - -/// Returns the invalid sbt offset for exceptions with exception code OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_MISS_SBT and OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_HIT_SBT. -/// -/// Returns zero for all other exception codes. -/// -/// Only available in EX. -static __forceinline__ __device__ int optixGetExceptionInvalidSbtOffset(); - -/// Returns the invalid ray for exceptions with exception code OPTIX_EXCEPTION_CODE_INVALID_RAY. -/// Exceptions of type OPTIX_EXCEPTION_CODE_INVALID_RAY are thrown when one or more values that were -/// passed into optixTrace are either inf or nan. -/// -/// OptixInvalidRayExceptionDetails::rayTime will always be 0 if OptixPipelineCompileOptions::usesMotionBlur is 0. -/// Values in the returned struct are all zero for all other exception codes. -/// -/// Only available in EX. -static __forceinline__ __device__ OptixInvalidRayExceptionDetails optixGetExceptionInvalidRay(); - -/// Returns information about an exception with code OPTIX_EXCEPTION_CODE_CALLABLE_PARAMETER_MISMATCH. -/// -/// Exceptions of type OPTIX_EXCEPTION_CODE_CALLABLE_PARAMETER_MISMATCH are called when the number of -/// arguments that were passed into a call to optixDirectCall or optixContinuationCall does not match -/// the number of parameters of the callable that is called. -/// Note that the parameters are packed by OptiX into individual 32 bit values, so the number of -/// expected and passed values may not correspond to the number of arguments passed into optixDirectCall -/// or optixContinuationCall. -/// -/// Values in the returned struct are all zero for all other exception codes. -/// -/// Only available in EX. -static __forceinline__ __device__ OptixParameterMismatchExceptionDetails optixGetExceptionParameterMismatch(); - -/// Returns a string that includes information about the source location that caused the current exception. -/// -/// The source location is only available for exceptions of type OPTIX_EXCEPTION_CODE_CALLABLE_PARAMETER_MISMATCH, -/// OPTIX_EXCEPTION_CODE_UNSUPPORTED_PRIMITIVE_TYPE, OPTIX_EXCEPTION_CODE_INVALID_RAY, and for user exceptions. -/// Line information needs to be present in the input PTX and OptixModuleCompileOptions::debugLevel -/// may not be set to OPTIX_COMPILE_DEBUG_LEVEL_NONE. -/// -/// Returns a NULL pointer if no line information is available. -/// -/// Only available in EX. -static __forceinline__ __device__ char* optixGetExceptionLineInfo(); - -/// Creates a call to the direct callable program at the specified SBT entry. -/// -/// This will call the program that was specified in the OptixProgramGroupCallables::entryFunctionNameDC in the -/// module specified by OptixProgramGroupCallables::moduleDC. -/// The address of the SBT entry is calculated by OptixShaderBindingTable::callablesRecordBase + ( OptixShaderBindingTable::callablesRecordStrideInBytes * sbtIndex ). -/// -/// Behavior is undefined if there is no direct callable program at the specified SBT entry. -/// -/// Behavior is undefined if the number of arguments that are being passed in does not match the number of -/// parameters expected by the program that is called. In that case an exception of type OPTIX_EXCEPTION_CODE_CALLABLE_PARAMETER_MISMATCH -/// will be thrown if OPTIX_EXCEPTION_FLAG_DEBUG was specified for the OptixPipelineCompileOptions::exceptionFlags. -/// -/// \param[in] sbtIndex The offset of the SBT entry of the direct callable program to call relative to OptixShaderBindingTable::callablesRecordBase. -/// \param[in] args The arguments to pass to the direct callable program. -template -static __forceinline__ __device__ ReturnT optixDirectCall( unsigned int sbtIndex, ArgTypes... args ); - - -/// Creates a call to the continuation callable program at the specified SBT entry. -/// -/// This will call the program that was specified in the OptixProgramGroupCallables::entryFunctionNameCC in the -/// module specified by OptixProgramGroupCallables::moduleCC. -/// The address of the SBT entry is calculated by OptixShaderBindingTable::callablesRecordBase + ( OptixShaderBindingTable::callablesRecordStrideInBytes * sbtIndex ). -/// As opposed to direct callable programs, continuation callable programs are allowed to call optixTrace recursively. -/// -/// Behavior is undefined if there is no continuation callable program at the specified SBT entry. -/// -/// Behavior is undefined if the number of arguments that are being passed in does not match the number of -/// parameters expected by the program that is called. In that case an exception of type OPTIX_EXCEPTION_CODE_CALLABLE_PARAMETER_MISMATCH -/// will be thrown if OPTIX_EXCEPTION_FLAG_DEBUG was specified for the OptixPipelineCompileOptions::exceptionFlags. -/// -/// \param[in] sbtIndex The offset of the SBT entry of the continuation callable program to call relative to OptixShaderBindingTable::callablesRecordBase. -/// \param[in] args The arguments to pass to the continuation callable program. -template -static __forceinline__ __device__ ReturnT optixContinuationCall( unsigned int sbtIndex, ArgTypes... args ); - - -/// optixTexFootprint2D calculates the footprint of a corresponding 2D texture fetch (non-mipmapped). -/// -/// On Turing and subsequent architectures, a texture footprint instruction allows user programs to -/// determine the set of texels that would be accessed by an equivalent filtered texture lookup. -/// -/// \param[in] tex CUDA texture object (cast to 64-bit integer) -/// \param[in] texInfo Texture info packed into 32-bit integer, described below. -/// \param[in] x Texture coordinate -/// \param[in] y Texture coordinate -/// \param[out] singleMipLevel Result indicating whether the footprint spans only a single miplevel. -/// -/// The texture info argument is a packed 32-bit integer with the following layout: -/// -/// texInfo[31:29] = reserved (3 bits) -/// texInfo[28:24] = miplevel count (5 bits) -/// texInfo[23:20] = log2 of tile width (4 bits) -/// texInfo[19:16] = log2 of tile height (4 bits) -/// texInfo[15:10] = reserved (6 bits) -/// texInfo[9:8] = horizontal wrap mode (2 bits) (CUaddress_mode) -/// texInfo[7:6] = vertical wrap mode (2 bits) (CUaddress_mode) -/// texInfo[5] = mipmap filter mode (1 bit) (CUfilter_mode) -/// texInfo[4:0] = maximum anisotropy (5 bits) -/// -/// Returns a 16-byte structure (as a uint4) that stores the footprint of a texture request at a -/// particular "granularity", which has the following layout: -/// -/// struct Texture2DFootprint -/// { -/// unsigned long long mask; -/// unsigned int tileY : 12; -/// unsigned int reserved1 : 4; -/// unsigned int dx : 3; -/// unsigned int dy : 3; -/// unsigned int reserved2 : 2; -/// unsigned int granularity : 4; -/// unsigned int reserved3 : 4; -/// unsigned int tileX : 12; -/// unsigned int level : 4; -/// unsigned int reserved4 : 16; -/// }; -/// -/// The granularity indicates the size of texel groups that are represented by an 8x8 bitmask. For -/// example, a granularity of 12 indicates texel groups that are 128x64 texels in size. In a -/// footprint call, The returned granularity will either be the actual granularity of the result, or -/// 0 if the footprint call was able to honor the requested granularity (the usual case). -/// -/// level is the mip level of the returned footprint. Two footprint calls are needed to get the -/// complete footprint when a texture call spans multiple mip levels. -/// -/// mask is an 8x8 bitmask of texel groups that are covered, or partially covered, by the footprint. -/// tileX and tileY give the starting position of the mask in 8x8 texel-group blocks. For example, -/// suppose a granularity of 12 (128x64 texels), and tileX=3 and tileY=4. In this case, bit 0 of the -/// mask (the low order bit) corresponds to texel group coordinates (3*8, 4*8), and texel -/// coordinates (3*8*128, 4*8*64), within the specified mip level. -/// -/// If nonzero, dx and dy specify a "toroidal rotation" of the bitmask. Toroidal rotation of a -/// coordinate in the mask simply means that its value is reduced by 8. Continuing the example from -/// above, if dx=0 and dy=0 the mask covers texel groups (3*8, 4*8) to (3*8+7, 4*8+7) inclusive. -/// If, on the other hand, dx=2, the rightmost 2 columns in the mask have their x coordinates -/// reduced by 8, and similarly for dy. -/// -/// See the OptiX SDK for sample code that illustrates how to unpack the result. -static __forceinline__ __device__ uint4 optixTexFootprint2D( unsigned long long tex, unsigned int texInfo, float x, float y, unsigned int* singleMipLevel ); - -/// optixTexFootprint2DLod calculates the footprint of a corresponding 2D texture fetch (tex2DLod) -/// \param[in] tex CUDA texture object (cast to 64-bit integer) -/// \param[in] texInfo Texture info packed into 32-bit integer, described below. -/// \param[in] x Texture coordinate -/// \param[in] y Texture coordinate -/// \param[in] level Level of detail (lod) -/// \param[in] coarse Requests footprint from coarse miplevel, when the footprint spans two levels. -/// \param[out] singleMipLevel Result indicating whether the footprint spans only a single miplevel. -/// \see #optixTexFootprint2D(unsigned long long,unsigned int,float,float,unsigned int*) -static __forceinline__ __device__ uint4 -optixTexFootprint2DLod( unsigned long long tex, unsigned int texInfo, float x, float y, float level, bool coarse, unsigned int* singleMipLevel ); - -/// optixTexFootprint2DGrad calculates the footprint of a corresponding 2D texture fetch (tex2DGrad) -/// \param[in] tex CUDA texture object (cast to 64-bit integer) -/// \param[in] texInfo Texture info packed into 32-bit integer, described below. -/// \param[in] x Texture coordinate -/// \param[in] y Texture coordinate -/// \param[in] dPdx_x Derivative of x coordinte, which determines level of detail. -/// \param[in] dPdx_y Derivative of x coordinte, which determines level of detail. -/// \param[in] dPdy_x Derivative of y coordinte, which determines level of detail. -/// \param[in] dPdy_y Derivative of y coordinte, which determines level of detail. -/// \param[in] coarse Requests footprint from coarse miplevel, when the footprint spans two levels. -/// \param[out] singleMipLevel Result indicating whether the footprint spans only a single miplevel. -/// \see #optixTexFootprint2D(unsigned long long,unsigned int,float,float,unsigned int*) -static __forceinline__ __device__ uint4 optixTexFootprint2DGrad( unsigned long long tex, - unsigned int texInfo, - float x, - float y, - float dPdx_x, - float dPdx_y, - float dPdy_x, - float dPdy_y, - bool coarse, - unsigned int* singleMipLevel ); - -/*@}*/ // end group optix_device_api - -#include "internal/optix_7_device_impl.h" - -#endif // __optix_optix_7_device_h__ diff --git a/zenovis/xinxinoptix/include/optix_7_host.h b/zenovis/xinxinoptix/include/optix_7_host.h deleted file mode 100644 index abafbab80f..0000000000 --- a/zenovis/xinxinoptix/include/optix_7_host.h +++ /dev/null @@ -1,993 +0,0 @@ -/* - * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. - * - * NVIDIA Corporation and its licensors retain all intellectual property and proprietary - * rights in and to this software, related documentation and any modifications thereto. - * Any use, reproduction, disclosure or distribution of this software and related - * documentation without an express license agreement from NVIDIA Corporation is strictly - * prohibited. - * - * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* - * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, - * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY - * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT - * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF - * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR - * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGES - */ - -/// @file -/// @author NVIDIA Corporation -/// @brief OptiX public API header -/// -/// OptiX host include file -- includes the host api if compiling host code. -/// For the math library routines include optix_math.h - -#if !defined( __OPTIX_INCLUDE_INTERNAL_HEADERS__ ) -#error("optix_7_host.h is an internal header file and must not be used directly. Please use optix_host.h or optix.h instead.") -#endif - -#ifndef __optix_optix_7_host_h__ -#define __optix_optix_7_host_h__ - -#include "optix_7_types.h" -#if !defined( OPTIX_DONT_INCLUDE_CUDA ) -// If OPTIX_DONT_INCLUDE_CUDA is defined, cuda driver types must be defined through other -// means before including optix headers. -#include -#endif - - - -#ifdef __cplusplus -extern "C" { -#endif - -/// \defgroup optix_host_api Host API -/// \brief OptiX Host API - -/// \defgroup optix_host_api_error_handling Error handling -/// \ingroup optix_host_api -//@{ - -/// Returns a string containing the name of an error code in the enum. -/// -/// Output is a string representation of the enum. For example "OPTIX_SUCCESS" for -/// OPTIX_SUCCESS and "OPTIX_ERROR_INVALID_VALUE" for OPTIX_ERROR_INVALID_VALUE. -/// -/// If the error code is not recognized, "Unrecognized OptixResult code" is returned. -/// -/// \param[in] result OptixResult enum to generate string name for -/// -/// \see #optixGetErrorString -const char* optixGetErrorName( OptixResult result ); - -/// Returns the description string for an error code. -/// -/// Output is a string description of the enum. For example "Success" for -/// OPTIX_SUCCESS and "Invalid value" for OPTIX_ERROR_INVALID_VALUE. -/// -/// If the error code is not recognized, "Unrecognized OptixResult code" is returned. -/// -/// \param[in] result OptixResult enum to generate string description for -/// -/// \see #optixGetErrorName -const char* optixGetErrorString( OptixResult result ); - -//@} -/// \defgroup optix_host_api_device_context Device context -/// \ingroup optix_host_api -//@{ - -/// Create a device context associated with the CUDA context specified with 'fromContext'. -/// -/// If zero is specified for 'fromContext', OptiX will use the current CUDA context. The -/// CUDA context should be initialized before calling optixDeviceContextCreate. -/// -/// \param[in] fromContext -/// \param[in] options -/// \param[out] context -/// \return -/// - OPTIX_ERROR_CUDA_NOT_INITIALIZED -/// If using zero for 'fromContext' and CUDA has not been initialized yet on the calling -/// thread. -/// - OPTIX_ERROR_CUDA_ERROR -/// CUDA operation failed. -/// - OPTIX_ERROR_HOST_OUT_OF_MEMORY -/// Heap allocation failed. -/// - OPTIX_ERROR_INTERNAL_ERROR -/// Internal error -OptixResult optixDeviceContextCreate( CUcontext fromContext, const OptixDeviceContextOptions* options, OptixDeviceContext* context ); - -/// Destroys all CPU and GPU state associated with the device. -/// -/// It will attempt to block on CUDA streams that have launch work outstanding. -/// -/// Any API objects, such as OptixModule and OptixPipeline, not already destroyed will be -/// destroyed. -/// -/// Thread safety: A device context must not be destroyed while it is still in use by concurrent API calls in other threads. -OptixResult optixDeviceContextDestroy( OptixDeviceContext context ); - -/// Query properties of a device context. -/// -/// \param[in] context the device context to query the property for -/// \param[in] property the property to query -/// \param[out] value pointer to the returned -/// \param[in] sizeInBytes size of output -OptixResult optixDeviceContextGetProperty( OptixDeviceContext context, OptixDeviceProperty property, void* value, size_t sizeInBytes ); - -/// Sets the current log callback method. -/// -/// See #OptixLogCallback for more details. -/// -/// Thread safety: It is guaranteed that the callback itself (callbackFunction and callbackData) are updated atomically. -/// It is not guaranteed that the callback itself (callbackFunction and callbackData) and the callbackLevel are updated -/// atomically. It is unspecified when concurrent API calls using the same context start to make use of the new -/// callback method. -/// -/// \param[in] context the device context -/// \param[in] callbackFunction the callback function to call -/// \param[in] callbackData pointer to data passed to callback function while invoking it -/// \param[in] callbackLevel callback level -OptixResult optixDeviceContextSetLogCallback( OptixDeviceContext context, - OptixLogCallback callbackFunction, - void* callbackData, - unsigned int callbackLevel ); - -/// Enables or disables the disk cache. -/// -/// If caching was previously disabled, enabling it will attempt to initialize -/// the disk cache database using the currently configured cache location. An -/// error will be returned if initialization fails. -/// -/// Note that no in-memory cache is used, so no caching behavior will be observed if the disk cache -/// is disabled. -/// -/// The cache can be disabled by setting the environment variable OPTIX_CACHE_MAXSIZE=0. -/// The environment variable takes precedence over this setting. -/// See #optixDeviceContextSetCacheDatabaseSizes for additional information. -/// -/// Note that the disk cache can be disabled by the environment variable, but it cannot be enabled -/// via the environment if it is disabled via the API. -/// -/// \param[in] context the device context -/// \param[in] enabled 1 to enabled, 0 to disable -OptixResult optixDeviceContextSetCacheEnabled( OptixDeviceContext context, - int enabled ); - -/// Sets the location of the disk cache. -/// -/// The location is specified by a directory. This directory should not be used for other purposes -/// and will be created if it does not exist. An error will be returned if is not possible to -/// create the disk cache at the specified location for any reason (e.g., the path is invalid or -/// the directory is not writable). Caching will be disabled if the disk cache cannot be -/// initialized in the new location. If caching is disabled, no error will be returned until caching -/// is enabled. If the disk cache is located on a network file share, behavior is undefined. -/// -/// The location of the disk cache can be overridden with the environment variable OPTIX_CACHE_PATH. -/// The environment variable takes precedence over this setting. -/// -/// The default location depends on the operating system: -/// - Windows: %LOCALAPPDATA%\\NVIDIA\\OptixCache -/// - Linux: /var/tmp/OptixCache_\ (or /tmp/OptixCache_\ if the first choice is not usable), -/// the underscore and username suffix are omitted if the username cannot be obtained -/// - MacOS X: /Library/Application Support/NVIDIA/OptixCache -/// -/// \param[in] context the device context -/// \param[in] location directory of disk cache -OptixResult optixDeviceContextSetCacheLocation( OptixDeviceContext context, const char* location ); - -/// Sets the low and high water marks for disk cache garbage collection. -/// -/// Garbage collection is triggered when a new entry is written to the cache and -/// the current cache data size plus the size of the cache entry that is about -/// to be inserted exceeds the high water mark. Garbage collection proceeds until -/// the size reaches the low water mark. Garbage collection will always free enough -/// space to insert the new entry without exceeding the low water mark. Setting -/// either limit to zero will disable garbage collection. An error will be returned -/// if both limits are non-zero and the high water mark is smaller than the low water mark. -/// -/// Note that garbage collection is performed only on writes to the disk cache. No garbage -/// collection is triggered on disk cache initialization or immediately when calling this function, -/// but on subsequent inserting of data into the database. -/// -/// If the size of a compiled module exceeds the value configured for the high water -/// mark and garbage collection is enabled, the module will not be added to the cache -/// and a warning will be added to the log. -/// -/// The high water mark can be overridden with the environment variable OPTIX_CACHE_MAXSIZE. -/// The environment variable takes precedence over the function parameters. The low water mark -/// will be set to half the value of OPTIX_CACHE_MAXSIZE. Setting OPTIX_CACHE_MAXSIZE to 0 will -/// disable the disk cache, but will not alter the contents of the cache. Negative and non-integer -/// values will be ignored. -/// -/// \param[in] context the device context -/// \param[in] lowWaterMark the low water mark -/// \param[in] highWaterMark the high water mark -OptixResult optixDeviceContextSetCacheDatabaseSizes( OptixDeviceContext context, size_t lowWaterMark, size_t highWaterMark ); - -/// Indicates whether the disk cache is enabled or disabled. -/// -/// \param[in] context the device context -/// \param[out] enabled 1 if enabled, 0 if disabled -OptixResult optixDeviceContextGetCacheEnabled( OptixDeviceContext context, int* enabled ); -/// Returns the location of the disk cache. If the cache has been disabled by setting the environment -/// variable OPTIX_CACHE_MAXSIZE=0, this function will return an empy string. -/// -/// \param[in] context the device context -/// \param[out] location directory of disk cache, null terminated if locationSize > 0 -/// \param[in] locationSize locationSize -OptixResult optixDeviceContextGetCacheLocation( OptixDeviceContext context, char* location, size_t locationSize ); - -/// Returns the low and high water marks for disk cache garbage collection. If the cache has been disabled by -/// setting the environment variable OPTIX_CACHE_MAXSIZE=0, this function will return 0 for the low and high -/// water marks. -/// -/// \param[in] context the device context -/// \param[out] lowWaterMark the low water mark -/// \param[out] highWaterMark the high water mark -OptixResult optixDeviceContextGetCacheDatabaseSizes( OptixDeviceContext context, size_t* lowWaterMark, size_t* highWaterMark ); - -//@} -/// \defgroup optix_host_api_pipelines Pipelines -/// \ingroup optix_host_api -//@{ - -/// logString is an optional buffer that contains compiler feedback and errors. This -/// information is also passed to the context logger (if enabled), however it may be -/// difficult to correlate output to the logger to specific API invocations when using -/// multiple threads. The output to logString will only contain feedback for this specific -/// invocation of this API call. -/// -/// logStringSize as input should be a pointer to the number of bytes backing logString. -/// Upon return it contains the length of the log message (including the null terminator) -/// which may be greater than the input value. In this case, the log message will be -/// truncated to fit into logString. -/// -/// If logString or logStringSize are NULL, no output is written to logString. If -/// logStringSize points to a value that is zero, no output is written. This does not -/// affect output to the context logger if enabled. -/// -/// \param[in] context -/// \param[in] pipelineCompileOptions -/// \param[in] pipelineLinkOptions -/// \param[in] programGroups array of ProgramGroup objects -/// \param[in] numProgramGroups number of ProgramGroup objects -/// \param[out] logString Information will be written to this string. If logStringSize > 0 logString will be null terminated. -/// \param[in,out] logStringSize -/// \param[out] pipeline -OptixResult optixPipelineCreate( OptixDeviceContext context, - const OptixPipelineCompileOptions* pipelineCompileOptions, - const OptixPipelineLinkOptions* pipelineLinkOptions, - const OptixProgramGroup* programGroups, - unsigned int numProgramGroups, - char* logString, - size_t* logStringSize, - OptixPipeline* pipeline ); - -/// Thread safety: A pipeline must not be destroyed while it is still in use by concurrent API calls in other threads. -OptixResult optixPipelineDestroy( OptixPipeline pipeline ); - -/// Sets the stack sizes for a pipeline. -/// -/// Users are encouraged to see the programming guide and the implementations of the helper functions -/// to understand how to construct the stack sizes based on their particular needs. -/// -/// If this method is not used, an internal default implementation is used. The default implementation is correct (but -/// not necessarily optimal) as long as the maximum depth of call trees of CC and DC programs is at most 2 and no motion transforms are used. -/// -/// The maxTraversableGraphDepth responds to the maximal number of traversables visited when calling trace. -/// Every acceleration structure and motion transform count as one level of traversal. -/// E.g., for a simple IAS (instance acceleration structure) -> GAS (geometry acceleration structure) -/// traversal graph, the maxTraversableGraphDepth is two. -/// For IAS -> MT (motion transform) -> GAS, the maxTraversableGraphDepth is three. -/// Note that it does not matter whether a IAS or GAS has motion or not, it always counts as one. -/// Launching optix with exceptions turned on (see #OPTIX_EXCEPTION_FLAG_TRACE_DEPTH) will throw an exception -/// if the specified maxTraversableGraphDepth is too small. -/// -/// \param[in] pipeline The pipeline to configure the stack size for. -/// \param[in] directCallableStackSizeFromTraversal The direct stack size requirement for direct callables invoked from IS or AH. -/// \param[in] directCallableStackSizeFromState The direct stack size requirement for direct callables invoked from RG, MS, or CH. -/// \param[in] continuationStackSize The continuation stack requirement. -/// \param[in] maxTraversableGraphDepth The maximum depth of a traversable graph passed to trace. -OptixResult optixPipelineSetStackSize( OptixPipeline pipeline, - unsigned int directCallableStackSizeFromTraversal, - unsigned int directCallableStackSizeFromState, - unsigned int continuationStackSize, - unsigned int maxTraversableGraphDepth ); - -//@} -/// \defgroup optix_host_api_modules Modules -/// \ingroup optix_host_api -//@{ - -/// logString is an optional buffer that contains compiler feedback and errors. This -/// information is also passed to the context logger (if enabled), however it may be -/// difficult to correlate output to the logger to specific API invocations when using -/// multiple threads. The output to logString will only contain feedback for this specific -/// invocation of this API call. -/// -/// logStringSize as input should be a pointer to the number of bytes backing logString. -/// Upon return it contains the length of the log message (including the null terminator) -/// which may be greater than the input value. In this case, the log message will be -/// truncated to fit into logString. -/// -/// If logString or logStringSize are NULL, no output is written to logString. If -/// logStringSize points to a value that is zero, no output is written. This does not -/// affect output to the context logger if enabled. -/// -/// \param[in] context -/// \param[in] moduleCompileOptions -/// \param[in] pipelineCompileOptions All modules in a pipeline need to use the same values for the pipeline compile options. -/// \param[in] PTX Pointer to the PTX input string. -/// \param[in] PTXsize Parsing proceeds up to PTXsize characters, or the first NUL byte, whichever occurs first. -/// \param[out] logString Information will be written to this string. If logStringSize > 0 logString will be null terminated. -/// \param[in,out] logStringSize -/// \param[out] module -/// -/// \return OPTIX_ERROR_INVALID_VALUE - context is 0, moduleCompileOptions is 0, pipelineCompileOptions is 0, PTX is 0, module is 0. -OptixResult optixModuleCreateFromPTX( OptixDeviceContext context, - const OptixModuleCompileOptions* moduleCompileOptions, - const OptixPipelineCompileOptions* pipelineCompileOptions, - const char* PTX, - size_t PTXsize, - char* logString, - size_t* logStringSize, - OptixModule* module ); - -/// This function is designed to do just enough work to create the OptixTask return -/// parameter and is expected to be fast enough run without needing parallel execution. A -/// single thread could generate all the OptixTask objects for further processing in a -/// work pool. -/// -/// Options are similar to #optixModuleCreateFromPTX(), aside from the return parameter, -/// firstTask. -/// -/// The memory used to hold the PTX should be live until all tasks are finished. -/// -/// It is illegal to call #optixModuleDestroy() if any OptixTask objects are currently -/// being executed. In that case OPTIX_ERROR_ILLEGAL_DURING_TASK_EXECUTE will be returned. -/// -/// If an invocation of optixTaskExecute fails, the OptixModule will be marked as -/// OPTIX_MODULE_COMPILE_STATE_IMPENDING_FAILURE if there are outstanding tasks or -/// OPTIX_MODULE_COMPILE_STATE_FAILURE if there are no outstanding tasks. Subsequent calls -/// to #optixTaskExecute() may execute additional work to collect compilation errors -/// generated from the input. Currently executing tasks will not necessarily be terminated -/// immediately but at the next opportunity. - -/// Logging will continue to be directed to the logger installed with the -/// OptixDeviceContext. If logString is provided to #optixModuleCreateFromPTXWithTasks(), -/// it will contain all the compiler feedback from all executed tasks. The lifetime of the -/// memory pointed to by logString should extend from calling -/// #optixModuleCreateFromPTXWithTasks() to when the compilation state is either -/// OPTIX_MODULE_COMPILE_STATE_FAILURE or OPTIX_MODULE_COMPILE_STATE_COMPLETED. OptiX will -/// not write to the logString outside of execution of -/// #optixModuleCreateFromPTXWithTasks() or #optixTaskExecute(). If the compilation state -/// is OPTIX_MODULE_COMPILE_STATE_IMPENDING_FAILURE and no further execution of -/// #optixTaskExecute() is performed the logString may be reclaimed by the application -/// before calling #optixModuleDestroy(). The contents of logString will contain output -/// from currently completed tasks. - -/// All OptixTask objects associated with a given OptixModule will be cleaned up when -/// #optixModuleDestroy() is called regardless of whether the compilation was successful -/// or not. If the compilation state is OPTIX_MODULE_COMPILE_STATE_IMPENDIND_FAILURE, any -/// unstarted OptixTask objects do not need to be executed though there is no harm doing -/// so. -/// -/// \see #optixModuleCreateFromPTX -OptixResult optixModuleCreateFromPTXWithTasks( OptixDeviceContext context, - const OptixModuleCompileOptions* moduleCompileOptions, - const OptixPipelineCompileOptions* pipelineCompileOptions, - const char* PTX, - size_t PTXsize, - char* logString, - size_t* logStringSize, - OptixModule* module, - OptixTask* firstTask ); - -/// When creating a module with tasks, the current state of the module can be queried -/// using this function. -/// -/// Thread safety: Safe to call from any thread until optixModuleDestroy is called. -/// -/// \see #optixModuleCreateFromPTXWithTasks -OptixResult optixModuleGetCompilationState( OptixModule module, OptixModuleCompileState* state ); - -/// Call for OptixModule objects created with optixModuleCreateFromPTX and optixModuleDeserialize. -/// -/// Modules must not be destroyed while they are still used by any program group. -/// -/// Thread safety: A module must not be destroyed while it is still in use by concurrent API calls in other threads. -OptixResult optixModuleDestroy( OptixModule module ); - -/// Returns a module containing the intersection program for the built-in primitive type specified -/// by the builtinISOptions. This module must be used as the moduleIS for the OptixProgramGroupHitgroup -/// in any SBT record for that primitive type. (The entryFunctionNameIS should be null.) -OptixResult optixBuiltinISModuleGet( OptixDeviceContext context, - const OptixModuleCompileOptions* moduleCompileOptions, - const OptixPipelineCompileOptions* pipelineCompileOptions, - const OptixBuiltinISOptions* builtinISOptions, - OptixModule* builtinModule ); - -//@} -/// \defgroup optix_host_api_tasks Tasks -/// \ingroup optix_host_api -//@{ - -/// Each OptixTask should be executed with #optixTaskExecute(). If additional parallel -/// work is found, new OptixTask objects will be returned in additionalTasks along with -/// the number of additional tasks in numAdditionalTasksCreated. The parameter -/// additionalTasks should point to a user allocated array of minimum size -/// maxNumAdditionalTasks. OptiX can generate upto maxNumAdditionalTasks additional tasks. -/// -/// Each task can be executed in parallel and in any order. -/// -/// Thread safety: Safe to call from any thread until #optixModuleDestroy() is called for -/// any associated task. -/// -/// \see #optixModuleCreateFromPTXWithTasks -/// -/// \param[in] task the OptixTask to execute -/// \param[in] additionalTasks pointer to array of OptixTask objects to be filled in -/// \param[in] maxNumAdditionalTasks maximum number of additional OptixTask objects -/// \param[out] numAdditionalTasksCreated number of OptixTask objects created by OptiX and written into #additionalTasks -OptixResult optixTaskExecute( OptixTask task, OptixTask* additionalTasks, unsigned int maxNumAdditionalTasks, unsigned int* numAdditionalTasksCreated ); - -//@} -/// \defgroup optix_host_api_program_groups Program groups -/// \ingroup optix_host_api -//@{ - -/// Returns the stack sizes for the given program group. -/// -/// \param[in] programGroup the program group -/// \param[out] stackSizes the corresponding stack sizes -OptixResult optixProgramGroupGetStackSize( OptixProgramGroup programGroup, OptixStackSizes* stackSizes ); - -/// logString is an optional buffer that contains compiler feedback and errors. This -/// information is also passed to the context logger (if enabled), however it may be -/// difficult to correlate output to the logger to specific API invocations when using -/// multiple threads. The output to logString will only contain feedback for this specific -/// invocation of this API call. -/// -/// logStringSize as input should be a pointer to the number of bytes backing logString. -/// Upon return it contains the length of the log message (including the null terminator) -/// which may be greater than the input value. In this case, the log message will be -/// truncated to fit into logString. -/// -/// If logString or logStringSize are NULL, no output is written to logString. If -/// logStringSize points to a value that is zero, no output is written. This does not -/// affect output to the context logger if enabled. -/// -/// Creates numProgramGroups OptiXProgramGroup objects from the specified -/// OptixProgramGroupDesc array. The size of the arrays must match. -/// -/// \param[in] context -/// \param[in] programDescriptions N * OptixProgramGroupDesc -/// \param[in] numProgramGroups N -/// \param[in] options -/// \param[out] logString Information will be written to this string. If logStringSize > 0 logString will be null terminated. -/// \param[in,out] logStringSize -/// \param[out] programGroups -OptixResult optixProgramGroupCreate( OptixDeviceContext context, - const OptixProgramGroupDesc* programDescriptions, - unsigned int numProgramGroups, - const OptixProgramGroupOptions* options, - char* logString, - size_t* logStringSize, - OptixProgramGroup* programGroups ); - -/// Thread safety: A program group must not be destroyed while it is still in use by concurrent API calls in other threads. -OptixResult optixProgramGroupDestroy( OptixProgramGroup programGroup ); - -//@} -/// \defgroup optix_host_api_launches Launches -/// \ingroup optix_host_api -//@{ - -/// Where the magic happens. -/// -/// The stream and pipeline must belong to the same device context. Multiple launches -/// may be issues in parallel from multiple threads to different streams. -/// -/// pipelineParamsSize number of bytes are copied from the device memory pointed to by -/// pipelineParams before launch. It is an error if pipelineParamsSize is greater than the -/// size of the variable declared in modules and identified by -/// OptixPipelineCompileOptions::pipelineLaunchParamsVariableName. If the launch params -/// variable was optimized out or not found in the modules linked to the pipeline then -/// the pipelineParams and pipelineParamsSize parameters are ignored. -/// -/// sbt points to the shader binding table, which defines shader -/// groupings and their resources. See the SBT spec. -/// -/// \param[in] pipeline -/// \param[in] stream -/// \param[in] pipelineParams -/// \param[in] pipelineParamsSize -/// \param[in] sbt -/// \param[in] width number of elements to compute -/// \param[in] height number of elements to compute -/// \param[in] depth number of elements to compute -/// -/// Thread safety: In the current implementation concurrent launches to the same pipeline are not -/// supported. Concurrent launches require separate OptixPipeline objects. -OptixResult optixLaunch( OptixPipeline pipeline, - CUstream stream, - CUdeviceptr pipelineParams, - size_t pipelineParamsSize, - const OptixShaderBindingTable* sbt, - unsigned int width, - unsigned int height, - unsigned int depth ); - -/// \param[in] programGroup the program group containing the program(s) -/// \param[out] sbtRecordHeaderHostPointer the result sbt record header -OptixResult optixSbtRecordPackHeader( OptixProgramGroup programGroup, void* sbtRecordHeaderHostPointer ); - -//@} -/// \defgroup optix_host_api_acceleration_structures Acceleration structures -/// \ingroup optix_host_api -//@{ - -/// \param[in] context -/// \param[in] accelOptions options for the accel build -/// \param[in] buildInputs an array of OptixBuildInput objects -/// \param[in] numBuildInputs number of elements in buildInputs (must be at least 1) -/// \param[out] bufferSizes fills in buffer sizes -OptixResult optixAccelComputeMemoryUsage( OptixDeviceContext context, - const OptixAccelBuildOptions* accelOptions, - const OptixBuildInput* buildInputs, - unsigned int numBuildInputs, - OptixAccelBufferSizes* bufferSizes ); - -/// \param[in] context -/// \param[in] stream -/// \param[in] accelOptions accel options -/// \param[in] buildInputs an array of OptixBuildInput objects -/// \param[in] numBuildInputs must be >= 1 for GAS, and == 1 for IAS -/// \param[in] tempBuffer must be a multiple of OPTIX_ACCEL_BUFFER_BYTE_ALIGNMENT -/// \param[in] tempBufferSizeInBytes -/// \param[in] outputBuffer must be a multiple of OPTIX_ACCEL_BUFFER_BYTE_ALIGNMENT -/// \param[in] outputBufferSizeInBytes -/// \param[out] outputHandle -/// \param[in] emittedProperties types of requested properties and output buffers -/// \param[in] numEmittedProperties number of post-build properties to populate (may be zero) -OptixResult optixAccelBuild( OptixDeviceContext context, - CUstream stream, - const OptixAccelBuildOptions* accelOptions, - const OptixBuildInput* buildInputs, - unsigned int numBuildInputs, - CUdeviceptr tempBuffer, - size_t tempBufferSizeInBytes, - CUdeviceptr outputBuffer, - size_t outputBufferSizeInBytes, - OptixTraversableHandle* outputHandle, - const OptixAccelEmitDesc* emittedProperties, - unsigned int numEmittedProperties ); - -/// Obtain relocation information, stored in OptixRelocationInfo, for a given context -/// and acceleration structure's traversable handle. -/// -/// The relocation information can be passed to optixCheckRelocationCompatibility to -/// determine if an acceleration structure, referenced by 'handle', can be relocated to a -/// different device's memory space (see #optixCheckRelocationCompatibility). -/// -/// When used with optixAccelRelocate, it provides data necessary for doing the relocation. -/// -/// If the acceleration structure data associated with 'handle' is copied multiple times, -/// the same OptixRelocationInfo can also be used on all copies. -/// -/// \param[in] context -/// \param[in] handle -/// \param[out] info -/// \return OPTIX_ERROR_INVALID_VALUE will be returned for traversable handles that are not from -/// acceleration structure builds. -OptixResult optixAccelGetRelocationInfo( OptixDeviceContext context, OptixTraversableHandle handle, OptixRelocationInfo* info ); - -/// Checks if an optix data structure built using another OptixDeviceContext (that was -/// used to fill in 'info') is compatible with the OptixDeviceContext specified in the -/// 'context' parameter. -/// -/// Any device is always compatible with itself. -/// -/// \param[in] context -/// \param[in] info -/// \param[out] compatible If OPTIX_SUCCESS is returned 'compatible' will have the value of either: -/// - 0: This context is not compatible with the optix data structure associated with 'info'. -/// - 1: This context is compatible. -OptixResult optixCheckRelocationCompatibility( OptixDeviceContext context, const OptixRelocationInfo* info, int* compatible ); - -/// optixAccelRelocate is called to update the acceleration structure after it has been -/// relocated. Relocation is necessary when the acceleration structure's location in device -/// memory has changed. optixAccelRelocate does not copy the memory. This function only -/// operates on the relocated memory whose new location is specified by 'targetAccel'. -/// optixAccelRelocate also returns the new OptixTraversableHandle associated with -/// 'targetAccel'. The original memory (source) is not required to be valid, only the -/// OptixRelocationInfo. -/// -/// Before calling optixAccelRelocate, optixCheckRelocationCompatibility should be -/// called to ensure the copy will be compatible with the destination device context. -/// -/// The memory pointed to by 'targetAccel' should be allocated with the same size as the -/// source acceleration. Similar to the 'outputBuffer' used in optixAccelBuild, this -/// pointer must be a multiple of OPTIX_ACCEL_BUFFER_BYTE_ALIGNMENT. -/// -/// The memory in 'targetAccel' must be allocated as long as the accel is in use. -/// -/// The instance traversables referenced by an IAS and the -/// micromaps referenced by a triangle GAS may themselves require relocation. -/// 'relocateInputs' and 'numRelocateInputs' should be used to specify the relocated -/// traversables and micromaps. After relocation, the relocated accel will reference -/// these relocated traversables and micromaps instead of their sources. -/// The number of relocate inputs 'numRelocateInputs' must match the number of build -/// inputs 'numBuildInputs' used to build the source accel. Relocation inputs -/// correspond with build inputs used to build the source accel and should appear in -/// the same order (see #optixAccelBuild). -/// 'relocateInputs' and 'numRelocateInputs' may be zero, preserving any references -/// to traversables and micromaps from the source accel. -/// -/// \param[in] context -/// \param[in] stream -/// \param[in] info -/// \param[in] relocateInputs -/// \param[in] numRelocateInputs -/// \param[in] targetAccel -/// \param[in] targetAccelSizeInBytes -/// \param[out] targetHandle -OptixResult optixAccelRelocate( OptixDeviceContext context, - CUstream stream, - const OptixRelocationInfo* info, - const OptixRelocateInput* relocateInputs, - size_t numRelocateInputs, - CUdeviceptr targetAccel, - size_t targetAccelSizeInBytes, - OptixTraversableHandle* targetHandle ); - -/// After building an acceleration structure, it can be copied in a compacted form to reduce -/// memory. In order to be compacted, OPTIX_BUILD_FLAG_ALLOW_COMPACTION must be supplied in -/// OptixAccelBuildOptions::buildFlags passed to optixAccelBuild. -/// -/// 'outputBuffer' is the pointer to where the compacted acceleration structure will be -/// written. This pointer must be a multiple of OPTIX_ACCEL_BUFFER_BYTE_ALIGNMENT. -/// -/// The size of the memory specified in 'outputBufferSizeInBytes' should be at least the -/// value computed using the OPTIX_PROPERTY_TYPE_COMPACTED_SIZE that was reported during -/// optixAccelBuild. -/// -/// \param[in] context -/// \param[in] stream -/// \param[in] inputHandle -/// \param[in] outputBuffer -/// \param[in] outputBufferSizeInBytes -/// \param[out] outputHandle -OptixResult optixAccelCompact( OptixDeviceContext context, - CUstream stream, - OptixTraversableHandle inputHandle, - CUdeviceptr outputBuffer, - size_t outputBufferSizeInBytes, - OptixTraversableHandle* outputHandle ); - -/// \param[in] onDevice -/// \param[in] pointer pointer to traversable allocated in OptixDeviceContext. This pointer must be a multiple of OPTIX_TRANSFORM_BYTE_ALIGNMENT -/// \param[in] traversableType Type of OptixTraversableHandle to create -/// \param[out] traversableHandle traversable handle. traversableHandle must be in host memory -OptixResult optixConvertPointerToTraversableHandle( OptixDeviceContext onDevice, - CUdeviceptr pointer, - OptixTraversableType traversableType, - OptixTraversableHandle* traversableHandle ); - - -/// Determine the amount of memory necessary for a Opacity Micromap Array build. -/// -/// \param[in] context -/// \param[in] buildInput -/// \param[out] bufferSizes -OptixResult optixOpacityMicromapArrayComputeMemoryUsage( OptixDeviceContext context, - const OptixOpacityMicromapArrayBuildInput* buildInput, - OptixMicromapBufferSizes* bufferSizes ); - -/// Construct an array of Opacity Micromaps. -/// -/// Each triangle within an instance/GAS may reference one opacity micromap to give finer -/// control over alpha behavior. A opacity micromap consists of a set of 4^N micro-triangles -/// in a triangular uniform barycentric grid. Multiple opacity micromaps are collected (built) -/// into a opacity micromap array with this function. Each geometry in a GAS may bind a -/// single opacity micromap array and can use opacity micromaps from that array only. -/// -/// Each micro-triangle within a opacity micromap can be in one of four states: Transparent, -/// Opaque, Unknown-Transparent or Unknown-Opaque. During traversal, if a triangle with a -/// opacity micromap attached is intersected, the opacity micromap is queried to categorize -/// the hit as either opaque, unknown (alpha) or a miss. Geometry, ray or instance flags that -/// modify the alpha/opaque behavior are applied _after_ this opacity micromap query. -/// -/// The opacity micromap query may operate in 2-state mode (alpha testing) or 4-state mode (AHS culling), -/// depending on the opacity micromap type and ray/instance flags. When operating in 2-state -/// mode, alpha hits will not be reported, and transparent and opaque hits must be accurate. -/// -/// \param[in] context -/// \param[in] stream -/// \param[in] buildInput a single build input object referencing many opacity micromaps -/// \param[in] buffers the buffers used for build -/// \param[in/out] emittedProperties types of requested properties and output buffers -/// \param[in] numEmittedProperties number of post-build properties to populate (may be zero) -OptixResult optixOpacityMicromapArrayBuild( OptixDeviceContext context, - CUstream stream, - const OptixOpacityMicromapArrayBuildInput* buildInput, - const OptixMicromapBuffers* buffers ); - -/// Obtain relocation information, stored in OptixRelocationInfo, for a given context -/// and opacity micromap array. -/// -/// The relocation information can be passed to optixCheckRelocationCompatibility to -/// determine if a opacity micromap array, referenced by buffers, can be relocated to a -/// different device's memory space (see #optixCheckRelocationCompatibility). -/// -/// When used with optixOpacityMicromapArrayRelocate, it provides data necessary for doing the relocation. -/// -/// If the opacity micromap array data associated with 'opacityMicromapArray' is copied multiple times, -/// the same OptixRelocationInfo can also be used on all copies. -/// -/// \param[in] context -/// \param[in] opacityMicromapArray -/// \param[out] info -OptixResult optixOpacityMicromapArrayGetRelocationInfo( OptixDeviceContext context, CUdeviceptr opacityMicromapArray, OptixRelocationInfo* info ); - -/// optixOpacityMicromapArrayRelocate is called to update the opacity micromap array after it has been -/// relocated. Relocation is necessary when the opacity micromap array's location in device -/// memory has changed. optixOpacityMicromapArrayRelocate does not copy the memory. This function only -/// operates on the relocated memory whose new location is specified by 'targetOpacityMicromapArray'. -/// The original memory (source) is not required to be valid, only the -/// OptixRelocationInfo. -/// -/// Before calling optixOpacityMicromapArrayRelocate, optixCheckRelocationCompatibility should be called -/// to ensure the copy will be compatible with the destination device context. -/// -/// The memory pointed to by 'targetOpacityMicromapArray' should be allocated with the same size as the -/// source opacity micromap array. Similar to the 'OptixMicromapBuffers::output' used in optixOpacityMicromapArrayBuild, -/// this pointer must be a multiple of OPTIX_OPACITY_MICROMAP_ARRAY_BUFFER_BYTE_ALIGNMENT. -/// -/// The memory in 'targetOpacityMicromapArray' must be allocated as long as the opacity micromap array is in use. -/// -/// Note that any Acceleration Structures build using the original memory (source) as input will -/// still be associated with this original memory. To associate an existing (possibly relocated) -/// Acceleration Structures with the relocated opacity micromap array, use optixAccelBuild -/// to update the existing Acceleration Structures (See OPTIX_BUILD_OPERATION_UPDATE) -/// -/// \param[in] context -/// \param[in] stream -/// \param[in] info -/// \param[in] targetOpacityMicromapArray -/// \param[in] targetOpacityMicromapArraySizeInBytes -OptixResult optixOpacityMicromapArrayRelocate( OptixDeviceContext context, - CUstream stream, - const OptixRelocationInfo* info, - CUdeviceptr targetOpacityMicromapArray, - size_t targetOpacityMicromapArraySizeInBytes ); - - - -//@} -/// \defgroup optix_host_api_denoiser Denoiser -/// \ingroup optix_host_api -//@{ - -/// Creates a denoiser object with the given options, using built-in inference models -/// -/// 'modelKind' selects the model used for inference. -/// Inference for the built-in models can be guided (giving hints to improve image quality) with -/// albedo and normal vector images in the guide layer (see 'optixDenoiserInvoke'). -/// Use of these images must be enabled in 'OptixDenoiserOptions'. -/// -/// \param[in] context -/// \param[in] modelKind -/// \param[in] options -/// \param[out] denoiser -OptixResult optixDenoiserCreate( OptixDeviceContext context, - OptixDenoiserModelKind modelKind, - const OptixDenoiserOptions* options, - OptixDenoiser* denoiser ); - -/// Creates a denoiser object with the given options, using a provided inference model -/// -/// 'userData' and 'userDataSizeInBytes' provide a user model for inference. -/// The memory passed in userData will be accessed only during the invocation of this function and -/// can be freed after it returns. -/// The user model must export only one weight set which determines both the model kind and the -/// required set of guide images. -/// -/// \param[in] context -/// \param[in] userData -/// \param[in] userDataSizeInBytes -/// \param[out] denoiser -OptixResult optixDenoiserCreateWithUserModel( OptixDeviceContext context, - const void* userData, size_t userDataSizeInBytes, OptixDenoiser* denoiser ); - -/// Destroys the denoiser object and any associated host resources. -OptixResult optixDenoiserDestroy( OptixDenoiser denoiser ); - -/// Computes the GPU memory resources required to execute the denoiser. -/// -/// Memory for state and scratch buffers must be allocated with the sizes in 'returnSizes' and scratch memory -/// passed to optixDenoiserSetup, optixDenoiserInvoke, -/// optixDenoiserComputeIntensity and optixDenoiserComputeAverageColor. -/// For tiled denoising an overlap area ('overlapWindowSizeInPixels') must be added to each tile on all sides -/// which increases the amount of -/// memory needed to denoise a tile. In case of tiling use withOverlapScratchSizeInBytes for scratch memory size. -/// If only full resolution images are denoised, withoutOverlapScratchSizeInBytes can be used which is always -/// smaller than withOverlapScratchSizeInBytes. -/// -/// 'outputWidth' and 'outputHeight' is the dimension of the image to be denoised (without overlap in case tiling -/// is being used). -/// 'outputWidth' and 'outputHeight' must be greater than or equal to the dimensions passed to optixDenoiserSetup. -/// -/// \param[in] denoiser -/// \param[in] outputWidth -/// \param[in] outputHeight -/// \param[out] returnSizes -OptixResult optixDenoiserComputeMemoryResources( const OptixDenoiser denoiser, - unsigned int outputWidth, - unsigned int outputHeight, - OptixDenoiserSizes* returnSizes ); - -/// Initializes the state required by the denoiser. -/// -/// 'inputWidth' and 'inputHeight' must include overlap on both sides of the image if tiling is being used. The overlap is -/// returned by #optixDenoiserComputeMemoryResources. -/// For subsequent calls to #optixDenoiserInvoke 'inputWidth' and 'inputHeight' are the maximum dimensions -/// of the input layers. Dimensions of the input layers passed to #optixDenoiserInvoke may be different in each -/// invocation however they always must be smaller than 'inputWidth' and 'inputHeight' passed to #optixDenoiserSetup. -/// -/// \param[in] denoiser -/// \param[in] stream -/// \param[in] inputWidth -/// \param[in] inputHeight -/// \param[in] denoiserState -/// \param[in] denoiserStateSizeInBytes -/// \param[in] scratch -/// \param[in] scratchSizeInBytes -OptixResult optixDenoiserSetup( OptixDenoiser denoiser, - CUstream stream, - unsigned int inputWidth, - unsigned int inputHeight, - CUdeviceptr denoiserState, - size_t denoiserStateSizeInBytes, - CUdeviceptr scratch, - size_t scratchSizeInBytes ); - -/// Invokes denoiser on a set of input data and produces at least one output image. -/// State memory must be available during the execution of the -/// denoiser (or until optixDenoiserSetup is called with a new state memory pointer). -/// Scratch memory passed is used only for the duration of this function. -/// Scratch and state memory sizes must have a size greater than or equal to the sizes as returned by -/// optixDenoiserComputeMemoryResources. -/// -/// 'inputOffsetX' and 'inputOffsetY' are pixel offsets in the 'inputLayers' image -/// specifying the beginning of the image without overlap. When denoising an entire image without tiling -/// there is no overlap and 'inputOffsetX' and 'inputOffsetY' must be zero. When denoising a tile which is -/// adjacent to one of the four sides of the entire image the corresponding offsets must also be zero since -/// there is no overlap at the side adjacent to the image border. -/// -/// 'guideLayer' provides additional information to the denoiser. When providing albedo and normal vector -/// guide images, the corresponding fields in the 'OptixDenoiserOptions' must be -/// enabled, see #optixDenoiserCreate. -/// 'guideLayer' must not be null. If a guide image in 'OptixDenoiserOptions' is not enabled, the -/// corresponding image in 'OptixDenoiserGuideLayer' is ignored. -/// -/// If OPTIX_DENOISER_MODEL_KIND_TEMPORAL or OPTIX_DENOISER_MODEL_KIND_TEMPORAL_AOV is selected, a 2d flow -/// image must be given in 'OptixDenoiserGuideLayer'. -/// It describes for each pixel the flow from the previous to the current frame (a 2d vector in pixel space). -/// The denoised beauty/AOV of the previous frame must be given in 'previousOutput'. -/// If this image is not available in the first frame of a sequence, the noisy beauty/AOV from the first frame -/// and zero flow vectors could be given as a substitute. -/// For non-temporal model kinds the flow image in 'OptixDenoiserGuideLayer' is ignored. -/// 'previousOutput' and -/// 'output' may refer to the same buffer, i.e. 'previousOutput' is first read by this function and later -/// overwritten with the denoised result. 'output' can be passed as 'previousOutput' to the next frame. -/// In other model kinds (not temporal) 'previousOutput' is ignored. -/// -/// The beauty layer must be given as the first entry in 'layers'. -/// In AOV type model kinds (OPTIX_DENOISER_MODEL_KIND_AOV or in user defined models implementing -/// kernel-prediction) additional layers for the AOV images can be given. -/// In each layer the noisy input image is given in 'input', the denoised output is written into the -/// 'output' image. input and output images may refer to the same buffer, with the restriction that -/// the pixel formats must be identical for input and output when the blend mode is selected (see -/// #OptixDenoiserParams). -/// -/// If OPTIX_DENOISER_MODEL_KIND_TEMPORAL or OPTIX_DENOISER_MODEL_KIND_TEMPORAL_AOV is selected, the denoised -/// image from the previous frame must be given in 'previousOutput' in the layer. 'previousOutput' and -/// 'output' may refer to the same buffer, i.e. 'previousOutput' is first read by this function and later -/// overwritten with the denoised result. 'output' can be passed as 'previousOutput' to the next frame. -/// In other model kinds (not temporal) 'previousOutput' is ignored. -/// -/// If OPTIX_DENOISER_MODEL_KIND_TEMPORAL or OPTIX_DENOISER_MODEL_KIND_TEMPORAL_AOV is selected, the -/// normal vector guide image must be given as 3d vectors in camera space. In the other models only -/// the x and y channels are used and other channels are ignored. -/// -/// \param[in] denoiser -/// \param[in] stream -/// \param[in] params -/// \param[in] denoiserState -/// \param[in] denoiserStateSizeInBytes -/// \param[in] guideLayer -/// \param[in] layers -/// \param[in] numLayers -/// \param[in] inputOffsetX -/// \param[in] inputOffsetY -/// \param[in] scratch -/// \param[in] scratchSizeInBytes -OptixResult optixDenoiserInvoke( OptixDenoiser denoiser, - CUstream stream, - const OptixDenoiserParams* params, - CUdeviceptr denoiserState, - size_t denoiserStateSizeInBytes, - const OptixDenoiserGuideLayer* guideLayer, - const OptixDenoiserLayer* layers, - unsigned int numLayers, - unsigned int inputOffsetX, - unsigned int inputOffsetY, - CUdeviceptr scratch, - size_t scratchSizeInBytes ); - -/// Computes the logarithmic average intensity of the given image. The returned value 'outputIntensity' -/// is multiplied with the RGB values of the input image/tile in optixDenoiserInvoke if given in the parameter -/// OptixDenoiserParams::hdrIntensity (otherwise 'hdrIntensity' must be a null pointer). This is useful for -/// denoising HDR images which are very dark or bright. -/// When denoising tiles the intensity of the entire image should be computed, i.e. not per tile to get -/// consistent results. -/// -/// For each RGB pixel in the inputImage the intensity is calculated and summed if it is greater than 1e-8f: -/// intensity = log(r * 0.212586f + g * 0.715170f + b * 0.072200f). -/// The function returns 0.18 / exp(sum of intensities / number of summed pixels). -/// More details could be found in the Reinhard tonemapping paper: -/// http://www.cmap.polytechnique.fr/~peyre/cours/x2005signal/hdr_photographic.pdf -/// -/// The size of scratch memory required can be queried with #optixDenoiserComputeMemoryResources. -/// -/// data type unsigned char is not supported for 'inputImage', it must be 3 or 4 component half/float. -/// -/// \param[in] denoiser -/// \param[in] stream -/// \param[in] inputImage -/// \param[out] outputIntensity single float -/// \param[in] scratch -/// \param[in] scratchSizeInBytes -OptixResult optixDenoiserComputeIntensity( OptixDenoiser denoiser, - CUstream stream, - const OptixImage2D* inputImage, - CUdeviceptr outputIntensity, - CUdeviceptr scratch, - size_t scratchSizeInBytes ); - -/// Compute average logarithmic for each of the first three channels for the given image. -/// When denoising tiles the intensity of the entire image should be computed, i.e. not per tile to get -/// consistent results. -/// -/// The size of scratch memory required can be queried with #optixDenoiserComputeMemoryResources. -/// -/// data type unsigned char is not supported for 'inputImage', it must be 3 or 4 component half/float. -/// -/// \param[in] denoiser -/// \param[in] stream -/// \param[in] inputImage -/// \param[out] outputAverageColor three floats -/// \param[in] scratch -/// \param[in] scratchSizeInBytes -OptixResult optixDenoiserComputeAverageColor( OptixDenoiser denoiser, - CUstream stream, - const OptixImage2D* inputImage, - CUdeviceptr outputAverageColor, - CUdeviceptr scratch, - size_t scratchSizeInBytes ); - -//@} - -#ifdef __cplusplus -} -#endif - -#include "optix_function_table.h" - -#endif // __optix_optix_7_host_h__ diff --git a/zenovis/xinxinoptix/include/optix_7_types.h b/zenovis/xinxinoptix/include/optix_7_types.h deleted file mode 100644 index e9dbcf6437..0000000000 --- a/zenovis/xinxinoptix/include/optix_7_types.h +++ /dev/null @@ -1,2250 +0,0 @@ - -/* - * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. - * - * NVIDIA Corporation and its licensors retain all intellectual property and proprietary - * rights in and to this software, related documentation and any modifications thereto. - * Any use, reproduction, disclosure or distribution of this software and related - * documentation without an express license agreement from NVIDIA Corporation is strictly - * prohibited. - * - * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* - * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, - * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY - * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT - * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF - * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR - * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGES - */ - -/// @file -/// @author NVIDIA Corporation -/// @brief OptiX public API header -/// -/// OptiX types include file -- defines types and enums used by the API. -/// For the math library routines include optix_math.h - -#if !defined( __OPTIX_INCLUDE_INTERNAL_HEADERS__ ) -#error("optix_7_types.h is an internal header file and must not be used directly. Please use optix_types.h, optix_host.h, optix_device.h or optix.h instead.") -#endif - -#ifndef __optix_optix_7_types_h__ -#define __optix_optix_7_types_h__ - -#if !defined(__CUDACC_RTC__) -#include /* for size_t */ -#endif - - - -/// \defgroup optix_types Types -/// \brief OptiX Types - -/** \addtogroup optix_types -@{ -*/ - -// This typedef should match the one in cuda.h in order to avoid compilation errors. -#if defined(_WIN64) || defined(__LP64__) -/// CUDA device pointer -typedef unsigned long long CUdeviceptr; -#else -/// CUDA device pointer -typedef unsigned int CUdeviceptr; -#endif - -/// Opaque type representing a device context -typedef struct OptixDeviceContext_t* OptixDeviceContext; - -/// Opaque type representing a module -typedef struct OptixModule_t* OptixModule; - -/// Opaque type representing a program group -typedef struct OptixProgramGroup_t* OptixProgramGroup; - -/// Opaque type representing a pipeline -typedef struct OptixPipeline_t* OptixPipeline; - -/// Opaque type representing a denoiser instance -typedef struct OptixDenoiser_t* OptixDenoiser; - -/// Opaque type representing a work task -typedef struct OptixTask_t* OptixTask; - -/// Traversable handle -typedef unsigned long long OptixTraversableHandle; - -/// Visibility mask -typedef unsigned int OptixVisibilityMask; - -/// Size of the SBT record headers. -#define OPTIX_SBT_RECORD_HEADER_SIZE ( (size_t)32 ) - -/// Alignment requirement for device pointers in OptixShaderBindingTable. -#define OPTIX_SBT_RECORD_ALIGNMENT 16ull - -/// Alignment requirement for output and temporay buffers for acceleration structures. -#define OPTIX_ACCEL_BUFFER_BYTE_ALIGNMENT 128ull - -/// Alignment requirement for OptixBuildInputInstanceArray::instances. -#define OPTIX_INSTANCE_BYTE_ALIGNMENT 16ull - -/// Alignment requirement for OptixBuildInputCustomPrimitiveArray::aabbBuffers -#define OPTIX_AABB_BUFFER_BYTE_ALIGNMENT 8ull - -/// Alignment requirement for OptixBuildInputTriangleArray::preTransform -#define OPTIX_GEOMETRY_TRANSFORM_BYTE_ALIGNMENT 16ull - -/// Alignment requirement for OptixStaticTransform, OptixMatrixMotionTransform, OptixSRTMotionTransform. -#define OPTIX_TRANSFORM_BYTE_ALIGNMENT 64ull - -/// Maximum number of registers allowed. Defaults to no explicit limit. -#define OPTIX_COMPILE_DEFAULT_MAX_REGISTER_COUNT 0 - -/// Maximum number of payload types allowed. -#define OPTIX_COMPILE_DEFAULT_MAX_PAYLOAD_TYPE_COUNT 8 - -/// Maximum number of payload values allowed. -#define OPTIX_COMPILE_DEFAULT_MAX_PAYLOAD_VALUE_COUNT 32 - -/// Opacity micromaps encode the states of microtriangles in either 1 bit (2-state) or 2 bits (4-state) using -/// the following values. -#define OPTIX_OPACITY_MICROMAP_STATE_TRANSPARENT ( 0 ) -#define OPTIX_OPACITY_MICROMAP_STATE_OPAQUE ( 1 ) -#define OPTIX_OPACITY_MICROMAP_STATE_UNKNOWN_TRANSPARENT ( 2 ) -#define OPTIX_OPACITY_MICROMAP_STATE_UNKNOWN_OPAQUE ( 3 ) - -/// Predefined index to indicate that a triangle in the BVH build doesn't have an associated opacity micromap, -/// and that it should revert to one of the four possible states for the full triangle. -#define OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_TRANSPARENT ( -1 ) -#define OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_OPAQUE ( -2 ) -#define OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_UNKNOWN_TRANSPARENT ( -3 ) -#define OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_UNKNOWN_OPAQUE ( -4 ) - -/// Alignment requirement for opacity micromap array buffers -#define OPTIX_OPACITY_MICROMAP_ARRAY_BUFFER_BYTE_ALIGNMENT 128ull - -/// Maximum subdivision level for opacity micromaps -#define OPTIX_OPACITY_MICROMAP_MAX_SUBDIVISION_LEVEL 12 - - -/// Result codes returned from API functions -/// -/// All host side API functions return OptixResult with the exception of optixGetErrorName -/// and optixGetErrorString. When successful OPTIX_SUCCESS is returned. All return codes -/// except for OPTIX_SUCCESS should be assumed to be errors as opposed to a warning. -/// -/// \see #optixGetErrorName(), #optixGetErrorString() -typedef enum OptixResult -{ - OPTIX_SUCCESS = 0, - OPTIX_ERROR_INVALID_VALUE = 7001, - OPTIX_ERROR_HOST_OUT_OF_MEMORY = 7002, - OPTIX_ERROR_INVALID_OPERATION = 7003, - OPTIX_ERROR_FILE_IO_ERROR = 7004, - OPTIX_ERROR_INVALID_FILE_FORMAT = 7005, - OPTIX_ERROR_DISK_CACHE_INVALID_PATH = 7010, - OPTIX_ERROR_DISK_CACHE_PERMISSION_ERROR = 7011, - OPTIX_ERROR_DISK_CACHE_DATABASE_ERROR = 7012, - OPTIX_ERROR_DISK_CACHE_INVALID_DATA = 7013, - OPTIX_ERROR_LAUNCH_FAILURE = 7050, - OPTIX_ERROR_INVALID_DEVICE_CONTEXT = 7051, - OPTIX_ERROR_CUDA_NOT_INITIALIZED = 7052, - OPTIX_ERROR_VALIDATION_FAILURE = 7053, - OPTIX_ERROR_INVALID_PTX = 7200, - OPTIX_ERROR_INVALID_LAUNCH_PARAMETER = 7201, - OPTIX_ERROR_INVALID_PAYLOAD_ACCESS = 7202, - OPTIX_ERROR_INVALID_ATTRIBUTE_ACCESS = 7203, - OPTIX_ERROR_INVALID_FUNCTION_USE = 7204, - OPTIX_ERROR_INVALID_FUNCTION_ARGUMENTS = 7205, - OPTIX_ERROR_PIPELINE_OUT_OF_CONSTANT_MEMORY = 7250, - OPTIX_ERROR_PIPELINE_LINK_ERROR = 7251, - OPTIX_ERROR_ILLEGAL_DURING_TASK_EXECUTE = 7270, - OPTIX_ERROR_INTERNAL_COMPILER_ERROR = 7299, - OPTIX_ERROR_DENOISER_MODEL_NOT_SET = 7300, - OPTIX_ERROR_DENOISER_NOT_INITIALIZED = 7301, - OPTIX_ERROR_NOT_COMPATIBLE = 7400, - OPTIX_ERROR_PAYLOAD_TYPE_MISMATCH = 7500, - OPTIX_ERROR_PAYLOAD_TYPE_RESOLUTION_FAILED = 7501, - OPTIX_ERROR_PAYLOAD_TYPE_ID_INVALID = 7502, - OPTIX_ERROR_NOT_SUPPORTED = 7800, - OPTIX_ERROR_UNSUPPORTED_ABI_VERSION = 7801, - OPTIX_ERROR_FUNCTION_TABLE_SIZE_MISMATCH = 7802, - OPTIX_ERROR_INVALID_ENTRY_FUNCTION_OPTIONS = 7803, - OPTIX_ERROR_LIBRARY_NOT_FOUND = 7804, - OPTIX_ERROR_ENTRY_SYMBOL_NOT_FOUND = 7805, - OPTIX_ERROR_LIBRARY_UNLOAD_FAILURE = 7806, - OPTIX_ERROR_DEVICE_OUT_OF_MEMORY = 7807, - OPTIX_ERROR_CUDA_ERROR = 7900, - OPTIX_ERROR_INTERNAL_ERROR = 7990, - OPTIX_ERROR_UNKNOWN = 7999, -} OptixResult; - -/// Parameters used for #optixDeviceContextGetProperty() -/// -/// \see #optixDeviceContextGetProperty() -typedef enum OptixDeviceProperty -{ - /// Maximum value for OptixPipelineLinkOptions::maxTraceDepth. sizeof( unsigned int ) - OPTIX_DEVICE_PROPERTY_LIMIT_MAX_TRACE_DEPTH = 0x2001, - - /// Maximum value to pass into optixPipelineSetStackSize for parameter - /// maxTraversableGraphDepth. sizeof( unsigned int ) - OPTIX_DEVICE_PROPERTY_LIMIT_MAX_TRAVERSABLE_GRAPH_DEPTH = 0x2002, - - /// The maximum number of primitives (over all build inputs) as input to a single - /// Geometry Acceleration Structure (GAS). sizeof( unsigned int ) - OPTIX_DEVICE_PROPERTY_LIMIT_MAX_PRIMITIVES_PER_GAS = 0x2003, - - /// The maximum number of instances (over all build inputs) as input to a single - /// Instance Acceleration Structure (IAS). sizeof( unsigned int ) - OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCES_PER_IAS = 0x2004, - - /// The RT core version supported by the device (0 for no support, 10 for version - /// 1.0). sizeof( unsigned int ) - OPTIX_DEVICE_PROPERTY_RTCORE_VERSION = 0x2005, - - /// The maximum value for #OptixInstance::instanceId. sizeof( unsigned int ) - OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID = 0x2006, - - /// The number of bits available for the #OptixInstance::visibilityMask. - /// Higher bits must be set to zero. sizeof( unsigned int ) - OPTIX_DEVICE_PROPERTY_LIMIT_NUM_BITS_INSTANCE_VISIBILITY_MASK = 0x2007, - - /// The maximum number of instances that can be added to a single Instance - /// Acceleration Structure (IAS). sizeof( unsigned int ) - OPTIX_DEVICE_PROPERTY_LIMIT_MAX_SBT_RECORDS_PER_GAS = 0x2008, - - /// The maximum value for #OptixInstance::sbtOffset. sizeof( unsigned int ) - OPTIX_DEVICE_PROPERTY_LIMIT_MAX_SBT_OFFSET = 0x2009, -} OptixDeviceProperty; - -/// Type of the callback function used for log messages. -/// -/// \param[in] level The log level indicates the severity of the message. See below for -/// possible values. -/// \param[in] tag A terse message category description (e.g., 'SCENE STAT'). -/// \param[in] message Null terminated log message (without newline at the end). -/// \param[in] cbdata Callback data that was provided with the callback pointer. -/// -/// It is the users responsibility to ensure thread safety within this function. -/// -/// The following log levels are defined. -/// -/// 0 disable Setting the callback level will disable all messages. The callback -/// function will not be called in this case. -/// 1 fatal A non-recoverable error. The context and/or OptiX itself might no longer -/// be in a usable state. -/// 2 error A recoverable error, e.g., when passing invalid call parameters. -/// 3 warning Hints that OptiX might not behave exactly as requested by the user or -/// may perform slower than expected. -/// 4 print Status or progress messages. -/// -/// Higher levels might occur. -/// -/// \see #optixDeviceContextSetLogCallback(), #OptixDeviceContextOptions -typedef void ( *OptixLogCallback )( unsigned int level, const char* tag, const char* message, void* cbdata ); - -/// Validation mode settings. -/// -/// When enabled, certain device code utilities will be enabled to provide as good debug and -/// error checking facilities as possible. -/// -/// -/// \see #optixDeviceContextCreate() -typedef enum OptixDeviceContextValidationMode -{ - OPTIX_DEVICE_CONTEXT_VALIDATION_MODE_OFF = 0, - OPTIX_DEVICE_CONTEXT_VALIDATION_MODE_ALL = 0xFFFFFFFF -} OptixDeviceContextValidationMode; - -/// Parameters used for #optixDeviceContextCreate() -/// -/// \see #optixDeviceContextCreate() -typedef struct OptixDeviceContextOptions -{ - /// Function pointer used when OptiX wishes to generate messages - OptixLogCallback logCallbackFunction; - /// Pointer stored and passed to logCallbackFunction when a message is generated - void* logCallbackData; - /// Maximum callback level to generate message for (see #OptixLogCallback) - int logCallbackLevel; - /// Validation mode of context. - OptixDeviceContextValidationMode validationMode; -} OptixDeviceContextOptions; - -/// Flags used by #OptixBuildInputTriangleArray::flags -/// and #OptixBuildInput::flag -/// and #OptixBuildInputCustomPrimitiveArray::flags -typedef enum OptixGeometryFlags -{ - /// No flags set - OPTIX_GEOMETRY_FLAG_NONE = 0, - - /// Disables the invocation of the anyhit program. - /// Can be overridden by OPTIX_INSTANCE_FLAG_ENFORCE_ANYHIT and OPTIX_RAY_FLAG_ENFORCE_ANYHIT. - OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT = 1u << 0, - - /// If set, an intersection with the primitive will trigger one and only one - /// invocation of the anyhit program. Otherwise, the anyhit program may be invoked - /// more than once. - OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL = 1u << 1, - - /// Prevent triangles from getting culled due to their orientation. - /// Effectively ignores ray flags - /// OPTIX_RAY_FLAG_CULL_BACK_FACING_TRIANGLES and OPTIX_RAY_FLAG_CULL_FRONT_FACING_TRIANGLES. - OPTIX_GEOMETRY_FLAG_DISABLE_TRIANGLE_FACE_CULLING = 1u << 2, -} OptixGeometryFlags; - -/// Legacy type: A subset of the hit kinds for built-in primitive intersections. -/// It is preferred to use optixGetPrimitiveType(), together with -/// optixIsFrontFaceHit() or optixIsBackFaceHit(). -/// -/// \see #optixGetHitKind() -typedef enum OptixHitKind -{ - /// Ray hit the triangle on the front face - OPTIX_HIT_KIND_TRIANGLE_FRONT_FACE = 0xFE, - /// Ray hit the triangle on the back face - OPTIX_HIT_KIND_TRIANGLE_BACK_FACE = 0xFF -} OptixHitKind; - -/// Format of indices used int #OptixBuildInputTriangleArray::indexFormat. -typedef enum OptixIndicesFormat -{ - /// No indices, this format must only be used in combination with triangle soups, i.e., numIndexTriplets must be zero - OPTIX_INDICES_FORMAT_NONE = 0, - /// Three shorts - OPTIX_INDICES_FORMAT_UNSIGNED_SHORT3 = 0x2102, - /// Three ints - OPTIX_INDICES_FORMAT_UNSIGNED_INT3 = 0x2103 -} OptixIndicesFormat; - -/// Format of vertices used in #OptixBuildInputTriangleArray::vertexFormat. -typedef enum OptixVertexFormat -{ - OPTIX_VERTEX_FORMAT_NONE = 0, ///< No vertices - OPTIX_VERTEX_FORMAT_FLOAT3 = 0x2121, ///< Vertices are represented by three floats - OPTIX_VERTEX_FORMAT_FLOAT2 = 0x2122, ///< Vertices are represented by two floats - OPTIX_VERTEX_FORMAT_HALF3 = 0x2123, ///< Vertices are represented by three halfs - OPTIX_VERTEX_FORMAT_HALF2 = 0x2124, ///< Vertices are represented by two halfs - OPTIX_VERTEX_FORMAT_SNORM16_3 = 0x2125, - OPTIX_VERTEX_FORMAT_SNORM16_2 = 0x2126 -} OptixVertexFormat; - -/// Format of transform used in #OptixBuildInputTriangleArray::transformFormat. -typedef enum OptixTransformFormat -{ - OPTIX_TRANSFORM_FORMAT_NONE = 0, ///< no transform, default for zero initialization - OPTIX_TRANSFORM_FORMAT_MATRIX_FLOAT12 = 0x21E1, ///< 3x4 row major affine matrix -} OptixTransformFormat; - - -/// Specifies whether to use a 2- or 4-state opacity micromap format. -typedef enum OptixOpacityMicromapFormat -{ - /// invalid format - OPTIX_OPACITY_MICROMAP_FORMAT_NONE = 0, - /// 0: Transparent, 1: Opaque - OPTIX_OPACITY_MICROMAP_FORMAT_2_STATE = 1, - /// 0: Transparent, 1: Opaque, 2: Unknown-Transparent, 3: Unknown-Opaque - OPTIX_OPACITY_MICROMAP_FORMAT_4_STATE = 2, -} OptixOpacityMicromapFormat; - -/// indexing mode of triangles to opacity micromaps in an array, used in #OptixBuildInputOpacityMicromap. -typedef enum OptixOpacityMicromapArrayIndexingMode -{ - /// No opacity micromap is used - OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_NONE = 0, - /// An implicit linear mapping of triangles to opacity micromaps in the - /// opacity micromap array is used. triangle[i] will use opacityMicromapArray[i]. - OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_LINEAR = 1, - /// OptixBuildInputVisibleMap::indexBuffer provides a per triangle array of predefined indices - /// and/or indices into OptixBuildInputVisibleMap::opacityMicromapArray. - /// See OptixBuildInputOpacityMicromap::indexBuffer for more details. - OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_INDEXED = 2, -} OptixOpacityMicromapArrayIndexingMode; - -/// Opacity micromap usage count for acceleration structure builds. -/// Specifies how many opacity micromaps of a specific type are referenced by triangles when building the AS. -/// Note that while this is similar to OptixOpacityMicromapHistogramEntry, the usage count specifies how many opacity micromaps -/// of a specific type are referenced by triangles in the AS. -typedef struct OptixOpacityMicromapUsageCount -{ - /// Number of opacity micromaps with this format and subdivision level referenced by triangles in the corresponding - /// triangle build input at AS build time. - unsigned int count; - /// Number of micro-triangles is 4^level. Valid levels are [0, 12] - unsigned int subdivisionLevel; - /// opacity micromap format. - OptixOpacityMicromapFormat format; -} OptixOpacityMicromapUsageCount; - -typedef struct OptixBuildInputOpacityMicromap -{ - /// Indexing mode of triangle to opacity micromap array mapping. - OptixOpacityMicromapArrayIndexingMode indexingMode; - - /// Device pointer to a opacity micromap array used by this build input array. - /// This buffer is required when #OptixBuildInputOpacityMicromap::indexingMode is - /// OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_LINEAR or OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_INDEXED. - /// Must be zero if #OptixBuildInputOpacityMicromap::indexingMode is OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_NONE. - CUdeviceptr opacityMicromapArray; - - /// int16 or int32 buffer specifying which opacity micromap index to use for each triangle. - /// Instead of an actual index, one of the predefined indices - /// OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_(FULLY_TRANSPARENT | FULLY_OPAQUE | FULLY_UNKNOWN_TRANSPARENT | FULLY_UNKNOWN_OPAQUE) - /// can be used to indicate that there is no opacity micromap for this particular triangle - /// but the triangle is in a uniform state and the selected behavior is applied - /// to the entire triangle. - /// This buffer is required when #OptixBuildInputOpacityMicromap::indexingMode is OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_INDEXED. - /// Must be zero if #OptixBuildInputOpacityMicromap::indexingMode is - /// OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_LINEAR or OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_NONE. - CUdeviceptr indexBuffer; - - /// 0, 2 or 4 (unused, 16 or 32 bit) - /// Must be non-zero when #OptixBuildInputOpacityMicromap::indexingMode is OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_INDEXED. - unsigned int indexSizeInBytes; - - /// Opacity micromap index buffer stride. If set to zero, indices are assumed to be tightly - /// packed and stride is inferred from #OptixBuildInputOpacityMicromap::indexSizeInBytes. - unsigned int indexStrideInBytes; - - /// Constant offset to non-negative opacity micromap indices - unsigned int indexOffset; - - /// Number of OptixOpacityMicromapUsageCount. - unsigned int numMicromapUsageCounts; - /// List of number of usages of opacity micromaps of format and subdivision combinations. - /// Counts with equal format and subdivision combination (duplicates) are added together. - const OptixOpacityMicromapUsageCount* micromapUsageCounts; -} OptixBuildInputOpacityMicromap; - -typedef struct OptixRelocateInputOpacityMicromap -{ - /// Device pointer to a reloated opacity micromap array used by the source build input array. - /// May be zero when no micromaps where used in the source accel, or the referenced opacity - /// micromaps don't require relocation (for example relocation of a GAS on the source device). - CUdeviceptr opacityMicromapArray; -} OptixRelocateInputOpacityMicromap; - - -/// Triangle inputs -/// -/// \see #OptixBuildInput::triangleArray -typedef struct OptixBuildInputTriangleArray -{ - /// Points to host array of device pointers, one per motion step. Host array size must match the number of - /// motion keys as set in #OptixMotionOptions (or an array of size 1 if OptixMotionOptions::numKeys is set - /// to 0 or 1). Each per motion key device pointer must point to an array of vertices of the - /// triangles in the format as described by vertexFormat. The minimum alignment must match the natural - /// alignment of the type as specified in the vertexFormat, i.e., for OPTIX_VERTEX_FORMAT_FLOATX 4-byte, - /// for all others a 2-byte alignment. However, an 16-byte stride (and buffer alignment) is recommended for - /// vertices of format OPTIX_VERTEX_FORMAT_FLOAT3 for GAS build performance. - const CUdeviceptr* vertexBuffers; - - /// Number of vertices in each of buffer in OptixBuildInputTriangleArray::vertexBuffers. - unsigned int numVertices; - - /// \see #OptixVertexFormat - OptixVertexFormat vertexFormat; - - /// Stride between vertices. If set to zero, vertices are assumed to be tightly - /// packed and stride is inferred from vertexFormat. - unsigned int vertexStrideInBytes; - - /// Optional pointer to array of 16 or 32-bit int triplets, one triplet per triangle. - /// The minimum alignment must match the natural alignment of the type as specified in the indexFormat, i.e., - /// for OPTIX_INDICES_FORMAT_UNSIGNED_INT3 4-byte and for OPTIX_INDICES_FORMAT_UNSIGNED_SHORT3 a 2-byte alignment. - CUdeviceptr indexBuffer; - - /// Size of array in OptixBuildInputTriangleArray::indexBuffer. For build, needs to be zero if indexBuffer is \c nullptr. - unsigned int numIndexTriplets; - - /// \see #OptixIndicesFormat - OptixIndicesFormat indexFormat; - - /// Stride between triplets of indices. If set to zero, indices are assumed to be tightly - /// packed and stride is inferred from indexFormat. - unsigned int indexStrideInBytes; - - /// Optional pointer to array of floats - /// representing a 3x4 row major affine - /// transformation matrix. This pointer must be a multiple of OPTIX_GEOMETRY_TRANSFORM_BYTE_ALIGNMENT - CUdeviceptr preTransform; - - /// Array of flags, to specify flags per sbt record, - /// combinations of OptixGeometryFlags describing the - /// primitive behavior, size must match numSbtRecords - const unsigned int* flags; - - /// Number of sbt records available to the sbt index offset override. - unsigned int numSbtRecords; - - /// Device pointer to per-primitive local sbt index offset buffer. May be NULL. - /// Every entry must be in range [0,numSbtRecords-1]. - /// Size needs to be the number of primitives. - CUdeviceptr sbtIndexOffsetBuffer; - - /// Size of type of the sbt index offset. Needs to be 0, 1, 2 or 4 (8, 16 or 32 bit). - unsigned int sbtIndexOffsetSizeInBytes; - - /// Stride between the index offsets. If set to zero, the offsets are assumed to be tightly - /// packed and the stride matches the size of the type (sbtIndexOffsetSizeInBytes). - unsigned int sbtIndexOffsetStrideInBytes; - - /// Primitive index bias, applied in optixGetPrimitiveIndex(). - /// Sum of primitiveIndexOffset and number of triangles must not overflow 32bits. - unsigned int primitiveIndexOffset; - - /// \see #OptixTransformFormat - OptixTransformFormat transformFormat; - - /// Optional opacity micromap inputs. - OptixBuildInputOpacityMicromap opacityMicromap; - -} OptixBuildInputTriangleArray; - -/// Triangle inputs -/// -/// \see #OptixRelocateInput::triangleArray -typedef struct OptixRelocateInputTriangleArray -{ - /// Number of sbt records available to the sbt index offset override. - /// Must match #OptixBuildInputTriangleArray::numSbtRecords of the source build input. - unsigned int numSbtRecords; - - /// Opacity micromap inputs. - OptixRelocateInputOpacityMicromap opacityMicromap; -} OptixRelocateInputTriangleArray; - -/// Builtin primitive types -/// -typedef enum OptixPrimitiveType -{ - /// Custom primitive. - OPTIX_PRIMITIVE_TYPE_CUSTOM = 0x2500, - /// B-spline curve of degree 2 with circular cross-section. - OPTIX_PRIMITIVE_TYPE_ROUND_QUADRATIC_BSPLINE = 0x2501, - /// B-spline curve of degree 3 with circular cross-section. - OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE = 0x2502, - /// Piecewise linear curve with circular cross-section. - OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR = 0x2503, - /// CatmullRom curve with circular cross-section. - OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM = 0x2504, - OPTIX_PRIMITIVE_TYPE_SPHERE = 0x2506, - /// Triangle. - OPTIX_PRIMITIVE_TYPE_TRIANGLE = 0x2531, -} OptixPrimitiveType; - -/// Builtin flags may be bitwise combined. -/// -/// \see #OptixPipelineCompileOptions::usesPrimitiveTypeFlags -typedef enum OptixPrimitiveTypeFlags -{ - /// Custom primitive. - OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM = 1 << 0, - /// B-spline curve of degree 2 with circular cross-section. - OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_QUADRATIC_BSPLINE = 1 << 1, - /// B-spline curve of degree 3 with circular cross-section. - OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE = 1 << 2, - /// Piecewise linear curve with circular cross-section. - OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_LINEAR = 1 << 3, - /// CatmullRom curve with circular cross-section. - OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CATMULLROM = 1 << 4, - OPTIX_PRIMITIVE_TYPE_FLAGS_SPHERE = 1 << 6, - /// Triangle. - OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE = 1 << 31, -} OptixPrimitiveTypeFlags; - -/// Curve end cap types, for non-linear curves -/// -typedef enum OptixCurveEndcapFlags -{ - /// Default end caps. Round end caps for linear, no end caps for quadratic/cubic. - OPTIX_CURVE_ENDCAP_DEFAULT = 0, - /// Flat end caps at both ends of quadratic/cubic curve segments. Not valid for linear. - OPTIX_CURVE_ENDCAP_ON = 1 << 0, -} OptixCurveEndcapFlags; - -/// Curve inputs -/// -/// A curve is a swept surface defined by a 3D spline curve and a varying width (radius). A curve (or "strand") of -/// degree d (3=cubic, 2=quadratic, 1=linear) is represented by N > d vertices and N width values, and comprises N - d segments. -/// Each segment is defined by d+1 consecutive vertices. Each curve may have a different number of vertices. -/// -/// OptiX describes the curve array as a list of curve segments. The primitive id is the segment number. -/// It is the user's responsibility to maintain a mapping between curves and curve segments. -/// Each index buffer entry i = indexBuffer[primid] specifies the start of a curve segment, -/// represented by d+1 consecutive vertices in the vertex buffer, -/// and d+1 consecutive widths in the width buffer. Width is interpolated the same -/// way vertices are interpolated, that is, using the curve basis. -/// -/// Each curves build input has only one SBT record. -/// To create curves with different materials in the same BVH, use multiple build inputs. -/// -/// \see #OptixBuildInput::curveArray -typedef struct OptixBuildInputCurveArray -{ - /// Curve degree and basis - /// \see #OptixPrimitiveType - OptixPrimitiveType curveType; - /// Number of primitives. Each primitive is a polynomial curve segment. - unsigned int numPrimitives; - - /// Pointer to host array of device pointers, one per motion step. Host array size must match number of - /// motion keys as set in #OptixMotionOptions (or an array of size 1 if OptixMotionOptions::numKeys is set - /// to 1). Each per-motion-key device pointer must point to an array of floats (the vertices of the - /// curves). - const CUdeviceptr* vertexBuffers; - /// Number of vertices in each buffer in vertexBuffers. - unsigned int numVertices; - /// Stride between vertices. If set to zero, vertices are assumed to be tightly - /// packed and stride is sizeof( float3 ). - unsigned int vertexStrideInBytes; - - /// Parallel to vertexBuffers: a device pointer per motion step, each with numVertices float values, - /// specifying the curve width (radius) corresponding to each vertex. - const CUdeviceptr* widthBuffers; - /// Stride between widths. If set to zero, widths are assumed to be tightly - /// packed and stride is sizeof( float ). - unsigned int widthStrideInBytes; - - /// Reserved for future use. - const CUdeviceptr* normalBuffers; - /// Reserved for future use. - unsigned int normalStrideInBytes; - - /// Device pointer to array of unsigned ints, one per curve segment. - /// This buffer is required (unlike for OptixBuildInputTriangleArray). - /// Each index is the start of degree+1 consecutive vertices in vertexBuffers, - /// and corresponding widths in widthBuffers and normals in normalBuffers. - /// These define a single segment. Size of array is numPrimitives. - CUdeviceptr indexBuffer; - /// Stride between indices. If set to zero, indices are assumed to be tightly - /// packed and stride is sizeof( unsigned int ). - unsigned int indexStrideInBytes; - - /// Combination of OptixGeometryFlags describing the - /// primitive behavior. - unsigned int flag; - - /// Primitive index bias, applied in optixGetPrimitiveIndex(). - /// Sum of primitiveIndexOffset and number of primitives must not overflow 32bits. - unsigned int primitiveIndexOffset; - - /// End cap flags, see OptixCurveEndcapFlags - unsigned int endcapFlags; -} OptixBuildInputCurveArray; - -/// Sphere inputs -/// -/// A sphere is defined by a center point and a radius. -/// Each center point is represented by a vertex in the vertex buffer. -/// There is either a single radius for all spheres, or the radii are represented by entries in the radius buffer. -/// -/// The vertex buffers and radius buffers point to a host array of device pointers, one per motion step. -/// Host array size must match the number of motion keys as set in #OptixMotionOptions (or an array of size 1 if OptixMotionOptions::numKeys is set -/// to 0 or 1). Each per motion key device pointer must point to an array of vertices corresponding to the center points of the spheres, or -/// an array of 1 or N radii. Format OPTIX_VERTEX_FORMAT_FLOAT3 is used for vertices, OPTIX_VERTEX_FORMAT_FLOAT for radii. -/// -/// \see #OptixBuildInput::sphereArray -typedef struct OptixBuildInputSphereArray -{ - /// Pointer to host array of device pointers, one per motion step. Host array size must match number of - /// motion keys as set in #OptixMotionOptions (or an array of size 1 if OptixMotionOptions::numKeys is set - /// to 1). Each per-motion-key device pointer must point to an array of floats (the center points of - /// the spheres). - const CUdeviceptr* vertexBuffers; - - /// Stride between vertices. If set to zero, vertices are assumed to be tightly - /// packed and stride is sizeof( float3 ). - unsigned int vertexStrideInBytes; - /// Number of vertices in each buffer in vertexBuffers. - unsigned int numVertices; - - /// Parallel to vertexBuffers: a device pointer per motion step, each with numRadii float values, - /// specifying the sphere radius corresponding to each vertex. - const CUdeviceptr* radiusBuffers; - /// Stride between radii. If set to zero, widths are assumed to be tightly - /// packed and stride is sizeof( float ). - unsigned int radiusStrideInBytes; - /// Boolean value indicating whether a single radius per radius buffer is used, - /// or the number of radii in radiusBuffers equals numVertices. - int singleRadius; - - /// Array of flags, to specify flags per sbt record, - /// combinations of OptixGeometryFlags describing the - /// primitive behavior, size must match numSbtRecords - const unsigned int* flags; - - /// Number of sbt records available to the sbt index offset override. - unsigned int numSbtRecords; - /// Device pointer to per-primitive local sbt index offset buffer. May be NULL. - /// Every entry must be in range [0,numSbtRecords-1]. - /// Size needs to be the number of primitives. - CUdeviceptr sbtIndexOffsetBuffer; - /// Size of type of the sbt index offset. Needs to be 0, 1, 2 or 4 (8, 16 or 32 bit). - unsigned int sbtIndexOffsetSizeInBytes; - /// Stride between the sbt index offsets. If set to zero, the offsets are assumed to be tightly - /// packed and the stride matches the size of the type (sbtIndexOffsetSizeInBytes). - unsigned int sbtIndexOffsetStrideInBytes; - - /// Primitive index bias, applied in optixGetPrimitiveIndex(). - /// Sum of primitiveIndexOffset and number of primitives must not overflow 32bits. - unsigned int primitiveIndexOffset; -} OptixBuildInputSphereArray; - -/// AABB inputs -typedef struct OptixAabb -{ - float minX; ///< Lower extent in X direction. - float minY; ///< Lower extent in Y direction. - float minZ; ///< Lower extent in Z direction. - float maxX; ///< Upper extent in X direction. - float maxY; ///< Upper extent in Y direction. - float maxZ; ///< Upper extent in Z direction. -} OptixAabb; - -/// Custom primitive inputs -/// -/// \see #OptixBuildInput::customPrimitiveArray -typedef struct OptixBuildInputCustomPrimitiveArray -{ - /// Points to host array of device pointers to AABBs (type OptixAabb), one per motion step. - /// Host array size must match number of motion keys as set in OptixMotionOptions (or an array of size 1 - /// if OptixMotionOptions::numKeys is set to 1). - /// Each device pointer must be a multiple of OPTIX_AABB_BUFFER_BYTE_ALIGNMENT. - const CUdeviceptr* aabbBuffers; - - /// Number of primitives in each buffer (i.e., per motion step) in - /// #OptixBuildInputCustomPrimitiveArray::aabbBuffers. - unsigned int numPrimitives; - - /// Stride between AABBs (per motion key). If set to zero, the aabbs are assumed to be tightly - /// packed and the stride is assumed to be sizeof( OptixAabb ). - /// If non-zero, the value must be a multiple of OPTIX_AABB_BUFFER_BYTE_ALIGNMENT. - unsigned int strideInBytes; - - /// Array of flags, to specify flags per sbt record, - /// combinations of OptixGeometryFlags describing the - /// primitive behavior, size must match numSbtRecords - const unsigned int* flags; - - /// Number of sbt records available to the sbt index offset override. - unsigned int numSbtRecords; - - /// Device pointer to per-primitive local sbt index offset buffer. May be NULL. - /// Every entry must be in range [0,numSbtRecords-1]. - /// Size needs to be the number of primitives. - CUdeviceptr sbtIndexOffsetBuffer; - - /// Size of type of the sbt index offset. Needs to be 0, 1, 2 or 4 (8, 16 or 32 bit). - unsigned int sbtIndexOffsetSizeInBytes; - - /// Stride between the index offsets. If set to zero, the offsets are assumed to be tightly - /// packed and the stride matches the size of the type (sbtIndexOffsetSizeInBytes). - unsigned int sbtIndexOffsetStrideInBytes; - - /// Primitive index bias, applied in optixGetPrimitiveIndex(). - /// Sum of primitiveIndexOffset and number of primitive must not overflow 32bits. - unsigned int primitiveIndexOffset; -} OptixBuildInputCustomPrimitiveArray; - -/// Instance and instance pointer inputs -/// -/// \see #OptixBuildInput::instanceArray -typedef struct OptixBuildInputInstanceArray -{ - /// If OptixBuildInput::type is OPTIX_BUILD_INPUT_TYPE_INSTANCE_POINTERS instances and - /// aabbs should be interpreted as arrays of pointers instead of arrays of structs. - /// - /// This pointer must be a multiple of OPTIX_INSTANCE_BYTE_ALIGNMENT if - /// OptixBuildInput::type is OPTIX_BUILD_INPUT_TYPE_INSTANCES. The array elements must - /// be a multiple of OPTIX_INSTANCE_BYTE_ALIGNMENT if OptixBuildInput::type is - /// OPTIX_BUILD_INPUT_TYPE_INSTANCE_POINTERS. - CUdeviceptr instances; - - /// Number of elements in #OptixBuildInputInstanceArray::instances. - unsigned int numInstances; - - /// Only valid for OPTIX_BUILD_INPUT_TYPE_INSTANCE - /// Defines the stride between instances. A stride of 0 indicates a tight packing, i.e., - /// stride = sizeof( OptixInstance ) - unsigned int instanceStride; -} OptixBuildInputInstanceArray; - -/// Instance and instance pointer inputs -/// -/// \see #OptixRelocateInput::instanceArray -typedef struct OptixRelocateInputInstanceArray -{ - /// Number of elements in #OptixRelocateInputInstanceArray::traversableHandles. - /// Must match #OptixBuildInputInstanceArray::numInstances of the source build input. - unsigned int numInstances; - - /// These are the traversable handles of the instances (See OptixInstance::traversableHandle) - /// These can be used when also relocating the instances. No updates to - /// the bounds are performed. Use optixAccelBuild to update the bounds. - /// 'traversableHandles' may be zero when the traversables are not relocated - /// (i.e. relocation of an IAS on the source device). - CUdeviceptr traversableHandles; - -} OptixRelocateInputInstanceArray; - -/// Enum to distinguish the different build input types. -/// -/// \see #OptixBuildInput::type -typedef enum OptixBuildInputType -{ - /// Triangle inputs. \see #OptixBuildInputTriangleArray - OPTIX_BUILD_INPUT_TYPE_TRIANGLES = 0x2141, - /// Custom primitive inputs. \see #OptixBuildInputCustomPrimitiveArray - OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES = 0x2142, - /// Instance inputs. \see #OptixBuildInputInstanceArray - OPTIX_BUILD_INPUT_TYPE_INSTANCES = 0x2143, - /// Instance pointer inputs. \see #OptixBuildInputInstanceArray - OPTIX_BUILD_INPUT_TYPE_INSTANCE_POINTERS = 0x2144, - /// Curve inputs. \see #OptixBuildInputCurveArray - OPTIX_BUILD_INPUT_TYPE_CURVES = 0x2145, - /// Sphere inputs. \see #OptixBuildInputSphereArray - OPTIX_BUILD_INPUT_TYPE_SPHERES = 0x2146 -} OptixBuildInputType; - -/// Build inputs. -/// -/// All of them support motion and the size of the data arrays needs to match the number of motion steps -/// -/// \see #optixAccelComputeMemoryUsage(), #optixAccelBuild() -typedef struct OptixBuildInput -{ - /// The type of the build input. - OptixBuildInputType type; - - union - { - /// Triangle inputs. - OptixBuildInputTriangleArray triangleArray; - /// Curve inputs. - OptixBuildInputCurveArray curveArray; - /// Sphere inputs. - OptixBuildInputSphereArray sphereArray; - /// Custom primitive inputs. - OptixBuildInputCustomPrimitiveArray customPrimitiveArray; - /// Instance and instance pointer inputs. - OptixBuildInputInstanceArray instanceArray; - char pad[1024]; - }; -} OptixBuildInput; - -/// Relocation inputs. -/// -/// \see #optixAccelRelocate() -typedef struct OptixRelocateInput -{ - /// The type of the build input to relocate. - OptixBuildInputType type; - - union - { - /// Instance and instance pointer inputs. - OptixRelocateInputInstanceArray instanceArray; - - /// Triangle inputs. - OptixRelocateInputTriangleArray triangleArray; - - /// Inputs of any of the other types don't require any relocation data. - }; -} OptixRelocateInput; - -// Some 32-bit tools use this header. This static_assert fails for them because -// the default enum size is 4 bytes, rather than 8, under 32-bit compilers. -// This #ifndef allows them to disable the static assert. - -// TODO Define a static assert for C/pre-C++-11 -#if defined( __cplusplus ) && __cplusplus >= 201103L -static_assert( sizeof( OptixBuildInput ) == 8 + 1024, "OptixBuildInput has wrong size" ); -#endif - -/// Flags set on the #OptixInstance::flags. -/// -/// These can be or'ed together to combine multiple flags. -typedef enum OptixInstanceFlags -{ - /// No special flag set - OPTIX_INSTANCE_FLAG_NONE = 0, - - /// Prevent triangles from getting culled due to their orientation. - /// Effectively ignores ray flags - /// OPTIX_RAY_FLAG_CULL_BACK_FACING_TRIANGLES and OPTIX_RAY_FLAG_CULL_FRONT_FACING_TRIANGLES. - OPTIX_INSTANCE_FLAG_DISABLE_TRIANGLE_FACE_CULLING = 1u << 0, - - /// Flip triangle orientation. - /// This affects front/backface culling as well as the reported face in case of a hit. - OPTIX_INSTANCE_FLAG_FLIP_TRIANGLE_FACING = 1u << 1, - - /// Disable anyhit programs for all geometries of the instance. - /// Can be overridden by OPTIX_RAY_FLAG_ENFORCE_ANYHIT. - /// This flag is mutually exclusive with OPTIX_INSTANCE_FLAG_ENFORCE_ANYHIT. - OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT = 1u << 2, - - /// Enables anyhit programs for all geometries of the instance. - /// Overrides OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT - /// Can be overridden by OPTIX_RAY_FLAG_DISABLE_ANYHIT. - /// This flag is mutually exclusive with OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT. - OPTIX_INSTANCE_FLAG_ENFORCE_ANYHIT = 1u << 3, - - - /// Force 4-state opacity micromaps to behave as 2-state opacity micromaps during traversal. - OPTIX_INSTANCE_FLAG_FORCE_OPACITY_MICROMAP_2_STATE = 1u << 4, - /// Don't perform opacity micromap query for this instance. GAS must be built with ALLOW_DISABLE_OPACITY_MICROMAPS for this to be valid. - /// This flag overrides FORCE_OPACTIY_MIXROMAP_2_STATE instance and ray flags. - OPTIX_INSTANCE_FLAG_DISABLE_OPACITY_MICROMAPS = 1u << 5, - -} OptixInstanceFlags; - -/// Instances -/// -/// \see #OptixBuildInputInstanceArray::instances -typedef struct OptixInstance -{ - /// affine object-to-world transformation as 3x4 matrix in row-major layout - float transform[12]; - - /// Application supplied ID. The maximal ID can be queried using OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID. - unsigned int instanceId; - - /// SBT record offset. Will only be used for instances of geometry acceleration structure (GAS) objects. - /// Needs to be set to 0 for instances of instance acceleration structure (IAS) objects. The maximal SBT offset - /// can be queried using OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_SBT_OFFSET. - unsigned int sbtOffset; - - /// Visibility mask. If rayMask & instanceMask == 0 the instance is culled. The number of available bits can be - /// queried using OPTIX_DEVICE_PROPERTY_LIMIT_NUM_BITS_INSTANCE_VISIBILITY_MASK. - unsigned int visibilityMask; - - /// Any combination of OptixInstanceFlags is allowed. - unsigned int flags; - - /// Set with an OptixTraversableHandle. - OptixTraversableHandle traversableHandle; - - /// round up to 80-byte, to ensure 16-byte alignment - unsigned int pad[2]; -} OptixInstance; - -/// Builder Options -/// -/// Used for #OptixAccelBuildOptions::buildFlags. Can be or'ed together. -typedef enum OptixBuildFlags -{ - /// No special flags set. - OPTIX_BUILD_FLAG_NONE = 0, - - /// Allow updating the build with new vertex positions with subsequent calls to - /// optixAccelBuild. - OPTIX_BUILD_FLAG_ALLOW_UPDATE = 1u << 0, - - OPTIX_BUILD_FLAG_ALLOW_COMPACTION = 1u << 1, - - OPTIX_BUILD_FLAG_PREFER_FAST_TRACE = 1u << 2, - - OPTIX_BUILD_FLAG_PREFER_FAST_BUILD = 1u << 3, - - /// Allow random access to build input vertices - /// See optixGetTriangleVertexData - /// optixGetLinearCurveVertexData - /// optixGetQuadraticBSplineVertexData - /// optixGetCubicBSplineVertexData - /// optixGetCatmullRomVertexData - /// optixGetSphereData - OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS = 1u << 4, - - /// Allow random access to instances - /// See optixGetInstanceTraversableFromIAS - OPTIX_BUILD_FLAG_ALLOW_RANDOM_INSTANCE_ACCESS = 1u << 5, - - /// Support updating the opacity micromap array and opacity micromap indices on refits. - /// May increase AS size and may have a small negative impact on traversal performance. - /// If this flag is absent, all opacity micromap inputs must remain unchanged between the initial AS builds and their subsequent refits. - OPTIX_BUILD_FLAG_ALLOW_OPACITY_MICROMAP_UPDATE = 1u << 6, - - /// If enabled, any instances referencing this GAS are allowed to disable the opacity micromap test through the DISABLE_OPACITY_MICROMAPS flag instance flag. - /// Note that the GAS will not be optimized for the attached opacity micromap Arrays if this flag is set, - /// which may result in reduced traversal performance. - OPTIX_BUILD_FLAG_ALLOW_DISABLE_OPACITY_MICROMAPS = 1u << 7, -} OptixBuildFlags; - - -/// Flags defining behavior of opacity micromaps in a opacity micromap array. -typedef enum OptixOpacityMicromapFlags -{ - OPTIX_OPACITY_MICROMAP_FLAG_NONE = 0, - OPTIX_OPACITY_MICROMAP_FLAG_PREFER_FAST_TRACE = 1 << 0, - OPTIX_OPACITY_MICROMAP_FLAG_PREFER_FAST_BUILD = 1 << 1, -} OptixOpacityMicromapFlags; - -/// Opacity micromap descriptor. -typedef struct OptixOpacityMicromapDesc -{ - /// Byte offset to opacity micromap in data input buffer of opacity micromap array build - unsigned int byteOffset; - /// Number of micro-triangles is 4^level. Valid levels are [0, 12] - unsigned short subdivisionLevel; - /// OptixOpacityMicromapFormat - unsigned short format; -} OptixOpacityMicromapDesc; - -/// Opacity micromap histogram entry. -/// Specifies how many opacity micromaps of a specific type are input to the opacity micromap array build. -/// Note that while this is similar to OptixOpacityMicromapUsageCount, the histogram entry specifies how many opacity micromaps -/// of a specific type are combined into a opacity micromap array. -typedef struct OptixOpacityMicromapHistogramEntry -{ - /// Number of opacity micromaps with the format and subdivision level that are input to the opacity micromap array build. - unsigned int count; - /// Number of micro-triangles is 4^level. Valid levels are [0, 12]. - unsigned int subdivisionLevel; - /// opacity micromap format. - OptixOpacityMicromapFormat format; -} OptixOpacityMicromapHistogramEntry; - -/// Inputs to opacity micromap array construction. -typedef struct OptixOpacityMicromapArrayBuildInput -{ - /// Applies to all opacity micromaps in array. - OptixOpacityMicromapFlags flags; - - /// 128B aligned base pointer for raw opacity micromap input data. - CUdeviceptr inputBuffer; - - /// One OptixOpacityMicromapDesc entry per opacity micromap. - CUdeviceptr perMicromapDescBuffer; - - /// Stride between OptixOpacityMicromapDescs in perOmDescBuffer. - /// If set to zero, the opacity micromap descriptors are assumed to be tightly packed and the stride is assumed to be sizeof( OptixOpacityMicromapDesc ). - unsigned int perMicromapDescStrideInBytes; - - /// Number of OptixOpacityMicromapHistogramEntry. - unsigned int numMicromapHistogramEntries; - /// Histogram over opacity micromaps of input format and subdivision combinations. - /// Counts of entries with equal format and subdivision combination (duplicates) are added together. - const OptixOpacityMicromapHistogramEntry* micromapHistogramEntries; -} OptixOpacityMicromapArrayBuildInput; - - -/// Conservative memory requirements for building a opacity micromap array -typedef struct OptixMicromapBufferSizes -{ - size_t outputSizeInBytes; - size_t tempSizeInBytes; -} OptixMicromapBufferSizes; - -/// Buffer inputs for opacity micromap array builds. -typedef struct OptixMicromapBuffers -{ - /// Output buffer - CUdeviceptr output; - /// Output buffer size - size_t outputSizeInBytes; - /// Temp buffer - CUdeviceptr temp; - /// Temp buffer size - size_t tempSizeInBytes; -} OptixMicromapBuffers; - - - -/// Enum to specify the acceleration build operation. -/// -/// Used in OptixAccelBuildOptions, which is then passed to optixAccelBuild and -/// optixAccelComputeMemoryUsage, this enum indicates whether to do a build or an update -/// of the acceleration structure. -/// -/// Acceleration structure updates utilize the same acceleration structure, but with -/// updated bounds. Updates are typically much faster than builds, however, large -/// perturbations can degrade the quality of the acceleration structure. -/// -/// \see #optixAccelComputeMemoryUsage(), #optixAccelBuild(), #OptixAccelBuildOptions -typedef enum OptixBuildOperation -{ - /// Perform a full build operation - OPTIX_BUILD_OPERATION_BUILD = 0x2161, - /// Perform an update using new bounds - OPTIX_BUILD_OPERATION_UPDATE = 0x2162, -} OptixBuildOperation; - -/// Enum to specify motion flags. -/// -/// \see #OptixMotionOptions::flags. -typedef enum OptixMotionFlags -{ - OPTIX_MOTION_FLAG_NONE = 0, - OPTIX_MOTION_FLAG_START_VANISH = 1u << 0, - OPTIX_MOTION_FLAG_END_VANISH = 1u << 1 -} OptixMotionFlags; - -/// Motion options -/// -/// \see #OptixAccelBuildOptions::motionOptions, #OptixMatrixMotionTransform::motionOptions, -/// #OptixSRTMotionTransform::motionOptions -typedef struct OptixMotionOptions -{ - /// If numKeys > 1, motion is enabled. timeBegin, - /// timeEnd and flags are all ignored when motion is disabled. - unsigned short numKeys; - - /// Combinations of #OptixMotionFlags - unsigned short flags; - - /// Point in time where motion starts. Must be lesser than timeEnd. - float timeBegin; - - /// Point in time where motion ends. Must be greater than timeBegin. - float timeEnd; -} OptixMotionOptions; - -/// Build options for acceleration structures. -/// -/// \see #optixAccelComputeMemoryUsage(), #optixAccelBuild() -typedef struct OptixAccelBuildOptions -{ - /// Combinations of OptixBuildFlags - unsigned int buildFlags; - - /// If OPTIX_BUILD_OPERATION_UPDATE the output buffer is assumed to contain the result - /// of a full build with OPTIX_BUILD_FLAG_ALLOW_UPDATE set and using the same number of - /// primitives. It is updated incrementally to reflect the current position of the - /// primitives. - /// If a BLAS has been built with OPTIX_BUILD_FLAG_ALLOW_OPACITY_MICROMAP_UPDATE, new opacity micromap arrays - /// and opacity micromap indices may be provided to the refit. - OptixBuildOperation operation; - - /// Options for motion. - OptixMotionOptions motionOptions; -} OptixAccelBuildOptions; - -/// Struct for querying builder allocation requirements. -/// -/// Once queried the sizes should be used to allocate device memory of at least these sizes. -/// -/// \see #optixAccelComputeMemoryUsage() -typedef struct OptixAccelBufferSizes -{ - /// The size in bytes required for the outputBuffer parameter to optixAccelBuild when - /// doing a build (OPTIX_BUILD_OPERATION_BUILD). - size_t outputSizeInBytes; - - /// The size in bytes required for the tempBuffer paramter to optixAccelBuild when - /// doing a build (OPTIX_BUILD_OPERATION_BUILD). - size_t tempSizeInBytes; - - /// The size in bytes required for the tempBuffer parameter to optixAccelBuild - /// when doing an update (OPTIX_BUILD_OPERATION_UPDATE). This value can be different - /// than tempSizeInBytes used for a full build. Only non-zero if - /// OPTIX_BUILD_FLAG_ALLOW_UPDATE flag is set in OptixAccelBuildOptions. - size_t tempUpdateSizeInBytes; -} OptixAccelBufferSizes; - -/// Properties which can be emitted during acceleration structure build. -/// -/// \see #OptixAccelEmitDesc::type. -typedef enum OptixAccelPropertyType -{ - /// Size of a compacted acceleration structure. The device pointer points to a uint64. - OPTIX_PROPERTY_TYPE_COMPACTED_SIZE = 0x2181, - - /// OptixAabb * numMotionSteps - OPTIX_PROPERTY_TYPE_AABBS = 0x2182, -} OptixAccelPropertyType; - -/// Specifies a type and output destination for emitted post-build properties. -/// -/// \see #optixAccelBuild() -typedef struct OptixAccelEmitDesc -{ - /// Output buffer for the properties - CUdeviceptr result; - - /// Requested property - OptixAccelPropertyType type; -} OptixAccelEmitDesc; - -/// Used to store information related to relocation of optix data structures. -/// -/// \see #optixOpacityMicromapArrayGetRelocationInfo(), #optixOpacityMicromapArrayRelocate(), -/// #optixAccelGetRelocationInfo(), #optixAccelRelocate(), #optixCheckRelocationCompatibility() -typedef struct OptixRelocationInfo -{ - /// Opaque data, used internally, should not be modified - unsigned long long info[4]; -} OptixRelocationInfo; - -/// Static transform -/// -/// The device address of instances of this type must be a multiple of OPTIX_TRANSFORM_BYTE_ALIGNMENT. -/// -/// \see #optixConvertPointerToTraversableHandle() -typedef struct OptixStaticTransform -{ - /// The traversable transformed by this transformation - OptixTraversableHandle child; - - /// Padding to make the transformations 16 byte aligned - unsigned int pad[2]; - - /// Affine object-to-world transformation as 3x4 matrix in row-major layout - float transform[12]; - - /// Affine world-to-object transformation as 3x4 matrix in row-major layout - /// Must be the inverse of the transform matrix - float invTransform[12]; -} OptixStaticTransform; - -/// Represents a matrix motion transformation. -/// -/// The device address of instances of this type must be a multiple of OPTIX_TRANSFORM_BYTE_ALIGNMENT. -/// -/// This struct, as defined here, handles only N=2 motion keys due to the fixed array length of its transform member. -/// The following example shows how to create instances for an arbitrary number N of motion keys: -/// -/// \code -/// float matrixData[N][12]; -/// ... // setup matrixData -/// -/// size_t transformSizeInBytes = sizeof( OptixMatrixMotionTransform ) + ( N-2 ) * 12 * sizeof( float ); -/// OptixMatrixMotionTransform* matrixMoptionTransform = (OptixMatrixMotionTransform*) malloc( transformSizeInBytes ); -/// memset( matrixMoptionTransform, 0, transformSizeInBytes ); -/// -/// ... // setup other members of matrixMoptionTransform -/// matrixMoptionTransform->motionOptions.numKeys/// = N; -/// memcpy( matrixMoptionTransform->transform, matrixData, N * 12 * sizeof( float ) ); -/// -/// ... // copy matrixMoptionTransform to device memory -/// free( matrixMoptionTransform ) -/// \endcode -/// -/// \see #optixConvertPointerToTraversableHandle() -typedef struct OptixMatrixMotionTransform -{ - /// The traversable that is transformed by this transformation - OptixTraversableHandle child; - - /// The motion options for this transformation. - /// Must have at least two motion keys. - OptixMotionOptions motionOptions; - - /// Padding to make the transformation 16 byte aligned - unsigned int pad[3]; - - /// Affine object-to-world transformation as 3x4 matrix in row-major layout - float transform[2][12]; -} OptixMatrixMotionTransform; - -/// Represents an SRT transformation. -/// -/// An SRT transformation can represent a smooth rotation with fewer motion keys than a matrix transformation. Each -/// motion key is constructed from elements taken from a matrix S, a quaternion R, and a translation T. -/// -/// The scaling matrix -/// \f$S = \begin{bmatrix} sx & a & b & pvx \\ 0 & sy & c & pvy \\ 0 & 0 & sz & pvz \end{bmatrix}\f$ -// [ sx a b pvx ] -// S = [ 0 sy c pvy ] -// [ 0 0 sz pvz ] -/// defines an affine transformation that can include scale, shear, and a translation. -/// The translation allows to define the pivot point for the subsequent rotation. -/// -/// The quaternion R = [ qx, qy, qz, qw ] describes a rotation with angular component qw = cos(theta/2) and other -/// components [ qx, qy, qz ] = sin(theta/2) * [ ax, ay, az ] where the axis [ ax, ay, az ] is normalized. -/// -/// The translation matrix -/// \f$T = \begin{bmatrix} 1 & 0 & 0 & tx \\ 0 & 1 & 0 & ty \\ 0 & 0 & 1 & tz \end{bmatrix}\f$ -// [ 1 0 0 tx ] -// T = [ 0 1 0 ty ] -// [ 0 0 1 tz ] -/// defines another translation that is applied after the rotation. Typically, this translation includes -/// the inverse translation from the matrix S to reverse the translation for the pivot point for R. -/// -/// To obtain the effective transformation at time t, the elements of the components of S, R, and T will be interpolated -/// linearly. The components are then multiplied to obtain the combined transformation C = T * R * S. The transformation -/// C is the effective object-to-world transformations at time t, and C^(-1) is the effective world-to-object -/// transformation at time t. -/// -/// \see #OptixSRTMotionTransform::srtData, #optixConvertPointerToTraversableHandle() -typedef struct OptixSRTData -{ - /// \name Parameters describing the SRT transformation - /// @{ - float sx, a, b, pvx, sy, c, pvy, sz, pvz, qx, qy, qz, qw, tx, ty, tz; - /// @} -} OptixSRTData; - -// TODO Define a static assert for C/pre-C++-11 -#if defined( __cplusplus ) && __cplusplus >= 201103L -static_assert( sizeof( OptixSRTData ) == 16 * 4, "OptixSRTData has wrong size" ); -#endif - -/// Represents an SRT motion transformation. -/// -/// The device address of instances of this type must be a multiple of OPTIX_TRANSFORM_BYTE_ALIGNMENT. -/// -/// This struct, as defined here, handles only N=2 motion keys due to the fixed array length of its srtData member. -/// The following example shows how to create instances for an arbitrary number N of motion keys: -/// -/// \code -/// OptixSRTData srtData[N]; -/// ... // setup srtData -/// -/// size_t transformSizeInBytes = sizeof( OptixSRTMotionTransform ) + ( N-2 ) * sizeof( OptixSRTData ); -/// OptixSRTMotionTransform* srtMotionTransform = (OptixSRTMotionTransform*) malloc( transformSizeInBytes ); -/// memset( srtMotionTransform, 0, transformSizeInBytes ); -/// -/// ... // setup other members of srtMotionTransform -/// srtMotionTransform->motionOptions.numKeys = N; -/// memcpy( srtMotionTransform->srtData, srtData, N * sizeof( OptixSRTData ) ); -/// -/// ... // copy srtMotionTransform to device memory -/// free( srtMotionTransform ) -/// \endcode -/// -/// \see #optixConvertPointerToTraversableHandle() -typedef struct OptixSRTMotionTransform -{ - /// The traversable transformed by this transformation - OptixTraversableHandle child; - - /// The motion options for this transformation - /// Must have at least two motion keys. - OptixMotionOptions motionOptions; - - /// Padding to make the SRT data 16 byte aligned - unsigned int pad[3]; - - /// The actual SRT data describing the transformation - OptixSRTData srtData[2]; -} OptixSRTMotionTransform; - -// TODO Define a static assert for C/pre-C++-11 -#if defined( __cplusplus ) && __cplusplus >= 201103L -static_assert( sizeof( OptixSRTMotionTransform ) == 8 + 12 + 12 + 2 * 16 * 4, "OptixSRTMotionTransform has wrong size" ); -#endif - -/// Traversable Handles -/// -/// \see #optixConvertPointerToTraversableHandle() -typedef enum OptixTraversableType -{ - /// Static transforms. \see #OptixStaticTransform - OPTIX_TRAVERSABLE_TYPE_STATIC_TRANSFORM = 0x21C1, - /// Matrix motion transform. \see #OptixMatrixMotionTransform - OPTIX_TRAVERSABLE_TYPE_MATRIX_MOTION_TRANSFORM = 0x21C2, - /// SRT motion transform. \see #OptixSRTMotionTransform - OPTIX_TRAVERSABLE_TYPE_SRT_MOTION_TRANSFORM = 0x21C3, -} OptixTraversableType; - -/// Pixel formats used by the denoiser. -/// -/// \see #OptixImage2D::format -typedef enum OptixPixelFormat -{ - OPTIX_PIXEL_FORMAT_HALF2 = 0x2207, ///< two halfs, XY - OPTIX_PIXEL_FORMAT_HALF3 = 0x2201, ///< three halfs, RGB - OPTIX_PIXEL_FORMAT_HALF4 = 0x2202, ///< four halfs, RGBA - OPTIX_PIXEL_FORMAT_FLOAT2 = 0x2208, ///< two floats, XY - OPTIX_PIXEL_FORMAT_FLOAT3 = 0x2203, ///< three floats, RGB - OPTIX_PIXEL_FORMAT_FLOAT4 = 0x2204, ///< four floats, RGBA - OPTIX_PIXEL_FORMAT_UCHAR3 = 0x2205, ///< three unsigned chars, RGB - OPTIX_PIXEL_FORMAT_UCHAR4 = 0x2206, ///< four unsigned chars, RGBA - OPTIX_PIXEL_FORMAT_INTERNAL_GUIDE_LAYER = 0x2209, ///< internal format -} OptixPixelFormat; - -/// Image descriptor used by the denoiser. -/// -/// \see #optixDenoiserInvoke(), #optixDenoiserComputeIntensity() -typedef struct OptixImage2D -{ - /// Pointer to the actual pixel data. - CUdeviceptr data; - /// Width of the image (in pixels) - unsigned int width; - /// Height of the image (in pixels) - unsigned int height; - /// Stride between subsequent rows of the image (in bytes). - unsigned int rowStrideInBytes; - /// Stride between subsequent pixels of the image (in bytes). - /// If set to 0, dense packing (no gaps) is assumed. - /// For pixel format OPTIX_PIXEL_FORMAT_INTERNAL_GUIDE_LAYER it must be set to - /// at least OptixDenoiserSizes::internalGuideLayerSizeInBytes. - unsigned int pixelStrideInBytes; - /// Pixel format. - OptixPixelFormat format; -} OptixImage2D; - -/// Model kind used by the denoiser. -/// -/// \see #optixDenoiserCreate -typedef enum OptixDenoiserModelKind -{ - /// Use the built-in model appropriate for low dynamic range input. - OPTIX_DENOISER_MODEL_KIND_LDR = 0x2322, - - /// Use the built-in model appropriate for high dynamic range input. - OPTIX_DENOISER_MODEL_KIND_HDR = 0x2323, - - /// Use the built-in model appropriate for high dynamic range input and support for AOVs - OPTIX_DENOISER_MODEL_KIND_AOV = 0x2324, - - /// Use the built-in model appropriate for high dynamic range input, temporally stable - OPTIX_DENOISER_MODEL_KIND_TEMPORAL = 0x2325, - - /// Use the built-in model appropriate for high dynamic range input and support for AOVs, temporally stable - OPTIX_DENOISER_MODEL_KIND_TEMPORAL_AOV = 0x2326, - - /// Use the built-in model appropriate for high dynamic range input and support for AOVs, upscaling 2x - OPTIX_DENOISER_MODEL_KIND_UPSCALE2X = 0x2327, - - /// Use the built-in model appropriate for high dynamic range input and support for AOVs, upscaling 2x, - /// temporally stable - OPTIX_DENOISER_MODEL_KIND_TEMPORAL_UPSCALE2X = 0x2328, -} OptixDenoiserModelKind; - -/// Options used by the denoiser -/// -/// \see #optixDenoiserCreate() -typedef struct OptixDenoiserOptions -{ - // if nonzero, albedo image must be given in OptixDenoiserGuideLayer - unsigned int guideAlbedo; - - // if nonzero, normal image must be given in OptixDenoiserGuideLayer - unsigned int guideNormal; -} OptixDenoiserOptions; - -/// Guide layer for the denoiser -/// -/// \see #optixDenoiserInvoke() -typedef struct OptixDenoiserGuideLayer -{ - // albedo/bsdf image - OptixImage2D albedo; - - // normal vector image (2d or 3d pixel format) - OptixImage2D normal; - - // 2d flow image, pixel flow from previous to current frame for each pixel - OptixImage2D flow; - - OptixImage2D previousOutputInternalGuideLayer; - OptixImage2D outputInternalGuideLayer; -} OptixDenoiserGuideLayer; - -/// Input/Output layers for the denoiser -/// -/// \see #optixDenoiserInvoke() -typedef struct OptixDenoiserLayer -{ - // input image (beauty or AOV) - OptixImage2D input; - - // denoised output image from previous frame if temporal model kind selected - OptixImage2D previousOutput; - - // denoised output for given input - OptixImage2D output; -} OptixDenoiserLayer; - -/// Various parameters used by the denoiser -/// -/// \see #optixDenoiserInvoke() -/// \see #optixDenoiserComputeIntensity() -/// \see #optixDenoiserComputeAverageColor() -typedef enum OptixDenoiserAlphaMode -{ - /// Copy alpha (if present) from input layer, no denoising. - OPTIX_DENOISER_ALPHA_MODE_COPY = 0, - - /// Denoise alpha separately. With AOV model kinds, treat alpha like an AOV. - OPTIX_DENOISER_ALPHA_MODE_ALPHA_AS_AOV = 1, - - /// With AOV model kinds, full denoise pass with alpha. - /// This is slower than OPTIX_DENOISER_ALPHA_MODE_ALPHA_AS_AOV. - OPTIX_DENOISER_ALPHA_MODE_FULL_DENOISE_PASS = 2 -} OptixDenoiserAlphaMode; -typedef struct OptixDenoiserParams -{ - /// alpha denoise mode - OptixDenoiserAlphaMode denoiseAlpha; - - /// average log intensity of input image (default null pointer). points to a single float. - /// with the default (null pointer) denoised results will not be optimal for very dark or - /// bright input images. - CUdeviceptr hdrIntensity; - - /// blend factor. - /// If set to 0 the output is 100% of the denoised input. If set to 1, the output is 100% of - /// the unmodified input. Values between 0 and 1 will linearly interpolate between the denoised - /// and unmodified input. - float blendFactor; - - /// this parameter is used when the OPTIX_DENOISER_MODEL_KIND_AOV model kind is set. - /// average log color of input image, separate for RGB channels (default null pointer). - /// points to three floats. with the default (null pointer) denoised results will not be - /// optimal. - CUdeviceptr hdrAverageColor; - - /// In temporal modes this parameter must be set to 1 if previous layers (e.g. - /// previousOutputInternalGuideLayer) contain valid data. This is the case in the - /// second and subsequent frames of a sequence (for example after a change of camera - /// angle). In the first frame of such a sequence this parameter must be set to 0. - unsigned int temporalModeUsePreviousLayers; -} OptixDenoiserParams; - -/// Various sizes related to the denoiser. -/// -/// \see #optixDenoiserComputeMemoryResources() -typedef struct OptixDenoiserSizes -{ - /// Size of state memory passed to #optixDenoiserSetup, #optixDenoiserInvoke. - size_t stateSizeInBytes; - - /// Size of scratch memory passed to #optixDenoiserSetup, #optixDenoiserInvoke. - /// Overlap added to dimensions passed to #optixDenoiserComputeMemoryResources. - size_t withOverlapScratchSizeInBytes; - - /// Size of scratch memory passed to #optixDenoiserSetup, #optixDenoiserInvoke. - /// No overlap added. - size_t withoutOverlapScratchSizeInBytes; - - /// Overlap on all four tile sides. - unsigned int overlapWindowSizeInPixels; - - /// Size of scratch memory passed to #optixDenoiserComputeAverageColor. - /// The size is independent of the tile/image resolution. - size_t computeAverageColorSizeInBytes; - - /// Size of scratch memory passed to #optixDenoiserComputeIntensity. - /// The size is independent of the tile/image resolution. - size_t computeIntensitySizeInBytes; - - /// Number of bytes for each pixel in internal guide layers. - size_t internalGuideLayerPixelSizeInBytes; -} OptixDenoiserSizes; - -/// Ray flags passed to the device function #optixTrace(). These affect the behavior of -/// traversal per invocation. -/// -/// \see #optixTrace() -typedef enum OptixRayFlags -{ - /// No change from the behavior configured for the individual AS. - OPTIX_RAY_FLAG_NONE = 0u, - - /// Disables anyhit programs for the ray. - /// Overrides OPTIX_INSTANCE_FLAG_ENFORCE_ANYHIT. - /// This flag is mutually exclusive with OPTIX_RAY_FLAG_ENFORCE_ANYHIT, - /// OPTIX_RAY_FLAG_CULL_DISABLED_ANYHIT, OPTIX_RAY_FLAG_CULL_ENFORCED_ANYHIT. - OPTIX_RAY_FLAG_DISABLE_ANYHIT = 1u << 0, - - /// Forces anyhit program execution for the ray. - /// Overrides OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT as well as OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT. - /// This flag is mutually exclusive with OPTIX_RAY_FLAG_DISABLE_ANYHIT, - /// OPTIX_RAY_FLAG_CULL_DISABLED_ANYHIT, OPTIX_RAY_FLAG_CULL_ENFORCED_ANYHIT. - OPTIX_RAY_FLAG_ENFORCE_ANYHIT = 1u << 1, - - /// Terminates the ray after the first hit and executes - /// the closesthit program of that hit. - OPTIX_RAY_FLAG_TERMINATE_ON_FIRST_HIT = 1u << 2, - - /// Disables closesthit programs for the ray, but still executes miss program in case of a miss. - OPTIX_RAY_FLAG_DISABLE_CLOSESTHIT = 1u << 3, - - /// Do not intersect triangle back faces - /// (respects a possible face change due to instance flag - /// OPTIX_INSTANCE_FLAG_FLIP_TRIANGLE_FACING). - /// This flag is mutually exclusive with OPTIX_RAY_FLAG_CULL_FRONT_FACING_TRIANGLES. - OPTIX_RAY_FLAG_CULL_BACK_FACING_TRIANGLES = 1u << 4, - - /// Do not intersect triangle front faces - /// (respects a possible face change due to instance flag - /// OPTIX_INSTANCE_FLAG_FLIP_TRIANGLE_FACING). - /// This flag is mutually exclusive with OPTIX_RAY_FLAG_CULL_BACK_FACING_TRIANGLES. - OPTIX_RAY_FLAG_CULL_FRONT_FACING_TRIANGLES = 1u << 5, - - /// Do not intersect geometry which disables anyhit programs - /// (due to setting geometry flag OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT or - /// instance flag OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT). - /// This flag is mutually exclusive with OPTIX_RAY_FLAG_CULL_ENFORCED_ANYHIT, - /// OPTIX_RAY_FLAG_ENFORCE_ANYHIT, OPTIX_RAY_FLAG_DISABLE_ANYHIT. - OPTIX_RAY_FLAG_CULL_DISABLED_ANYHIT = 1u << 6, - - /// Do not intersect geometry which have an enabled anyhit program - /// (due to not setting geometry flag OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT or - /// setting instance flag OPTIX_INSTANCE_FLAG_ENFORCE_ANYHIT). - /// This flag is mutually exclusive with OPTIX_RAY_FLAG_CULL_DISABLED_ANYHIT, - /// OPTIX_RAY_FLAG_ENFORCE_ANYHIT, OPTIX_RAY_FLAG_DISABLE_ANYHIT. - OPTIX_RAY_FLAG_CULL_ENFORCED_ANYHIT = 1u << 7, - - /// Force 4-state opacity micromaps to behave as 2-state opactiy micromaps during traversal. - OPTIX_RAY_FLAG_FORCE_OPACITY_MICROMAP_2_STATE = 1u << 10, -} OptixRayFlags; - -/// Transform -/// -/// OptixTransformType is used by the device function #optixGetTransformTypeFromHandle() to -/// determine the type of the OptixTraversableHandle returned from -/// optixGetTransformListHandle(). -typedef enum OptixTransformType -{ - OPTIX_TRANSFORM_TYPE_NONE = 0, ///< Not a transformation - OPTIX_TRANSFORM_TYPE_STATIC_TRANSFORM = 1, ///< \see #OptixStaticTransform - OPTIX_TRANSFORM_TYPE_MATRIX_MOTION_TRANSFORM = 2, ///< \see #OptixMatrixMotionTransform - OPTIX_TRANSFORM_TYPE_SRT_MOTION_TRANSFORM = 3, ///< \see #OptixSRTMotionTransform - OPTIX_TRANSFORM_TYPE_INSTANCE = 4, ///< \see #OptixInstance -} OptixTransformType; - -/// Specifies the set of valid traversable graphs that may be -/// passed to invocation of #optixTrace(). Flags may be bitwise combined. -typedef enum OptixTraversableGraphFlags -{ - /// Used to signal that any traversable graphs is valid. - /// This flag is mutually exclusive with all other flags. - OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY = 0, - - /// Used to signal that a traversable graph of a single Geometry Acceleration - /// Structure (GAS) without any transforms is valid. This flag may be combined with - /// other flags except for OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY. - OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_GAS = 1u << 0, - - /// Used to signal that a traversable graph of a single Instance Acceleration - /// Structure (IAS) directly connected to Geometry Acceleration Structure (GAS) - /// traversables without transform traversables in between is valid. This flag may - /// be combined with other flags except for OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY. - OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_LEVEL_INSTANCING = 1u << 1, -} OptixTraversableGraphFlags; - -/// Optimization levels -/// -/// \see #OptixModuleCompileOptions::optLevel -typedef enum OptixCompileOptimizationLevel -{ - /// Default is to run all optimizations - OPTIX_COMPILE_OPTIMIZATION_DEFAULT = 0, - /// No optimizations - OPTIX_COMPILE_OPTIMIZATION_LEVEL_0 = 0x2340, - /// Some optimizations - OPTIX_COMPILE_OPTIMIZATION_LEVEL_1 = 0x2341, - /// Most optimizations - OPTIX_COMPILE_OPTIMIZATION_LEVEL_2 = 0x2342, - /// All optimizations - OPTIX_COMPILE_OPTIMIZATION_LEVEL_3 = 0x2343, -} OptixCompileOptimizationLevel; - -/// Debug levels -/// -/// \see #OptixModuleCompileOptions::debugLevel -typedef enum OptixCompileDebugLevel -{ - /// Default currently is minimal - OPTIX_COMPILE_DEBUG_LEVEL_DEFAULT = 0, - /// No debug information - OPTIX_COMPILE_DEBUG_LEVEL_NONE = 0x2350, - /// Generate information that does not impact performance. - /// Note this replaces OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO. - OPTIX_COMPILE_DEBUG_LEVEL_MINIMAL = 0x2351, - /// Generate some debug information with slight performance cost - OPTIX_COMPILE_DEBUG_LEVEL_MODERATE = 0x2353, - /// Generate full debug information - OPTIX_COMPILE_DEBUG_LEVEL_FULL = 0x2352, -} OptixCompileDebugLevel; - -/// Module compilation state. -/// -/// \see #optixModuleGetCompilationState(), #optixModuleCreateFromPTXWithTasks() -typedef enum OptixModuleCompileState -{ - /// No OptixTask objects have started - OPTIX_MODULE_COMPILE_STATE_NOT_STARTED = 0x2360, - - /// Started, but not all OptixTask objects have completed. No detected failures. - OPTIX_MODULE_COMPILE_STATE_STARTED = 0x2361, - - /// Not all OptixTask objects have completed, but at least one has failed. - OPTIX_MODULE_COMPILE_STATE_IMPENDING_FAILURE = 0x2362, - - /// All OptixTask objects have completed, and at least one has failed - OPTIX_MODULE_COMPILE_STATE_FAILED = 0x2363, - - /// All OptixTask objects have completed. The OptixModule is ready to be used. - OPTIX_MODULE_COMPILE_STATE_COMPLETED = 0x2364, -} OptixModuleCompileState; - - - -/// Struct for specifying specializations for pipelineParams as specified in -/// OptixPipelineCompileOptions::pipelineLaunchParamsVariableName. -/// -/// The bound values are supposed to represent a constant value in the -/// pipelineParams. OptiX will attempt to locate all loads from the pipelineParams and -/// correlate them to the appropriate bound value, but there are cases where OptiX cannot -/// safely or reliably do this. For example if the pointer to the pipelineParams is passed -/// as an argument to a non-inline function or the offset of the load to the -/// pipelineParams cannot be statically determined (e.g. accessed in a loop). No module -/// should rely on the value being specialized in order to work correctly. The values in -/// the pipelineParams specified on optixLaunch should match the bound value. If -/// validation mode is enabled on the context, OptiX will verify that the bound values -/// specified matches the values in pipelineParams specified to optixLaunch. -/// -/// These values are compiled in to the module as constants. Once the constants are -/// inserted into the code, an optimization pass will be run that will attempt to -/// propagate the consants and remove unreachable code. -/// -/// If caching is enabled, changes in these values will result in newly compiled modules. -/// -/// The pipelineParamOffset and sizeInBytes must be within the bounds of the -/// pipelineParams variable. OPTIX_ERROR_INVALID_VALUE will be returned from -/// optixModuleCreateFromPTX otherwise. -/// -/// If more than one bound value overlaps or the size of a bound value is equal to 0, -/// an OPTIX_ERROR_INVALID_VALUE will be returned from optixModuleCreateFromPTX. -/// -/// The same set of bound values do not need to be used for all modules in a pipeline, but -/// overlapping values between modules must have the same value. -/// OPTIX_ERROR_INVALID_VALUE will be returned from optixPipelineCreate otherwise. -/// -/// \see #OptixModuleCompileOptions -typedef struct OptixModuleCompileBoundValueEntry { - size_t pipelineParamOffsetInBytes; - size_t sizeInBytes; - const void* boundValuePtr; - const char* annotation; // optional string to display, set to 0 if unused. If unused, - // OptiX will report the annotation as "No annotation" -} OptixModuleCompileBoundValueEntry; - -/// Payload type identifiers. -typedef enum OptixPayloadTypeID { - OPTIX_PAYLOAD_TYPE_DEFAULT = 0, - OPTIX_PAYLOAD_TYPE_ID_0 = (1 << 0u), - OPTIX_PAYLOAD_TYPE_ID_1 = (1 << 1u), - OPTIX_PAYLOAD_TYPE_ID_2 = (1 << 2u), - OPTIX_PAYLOAD_TYPE_ID_3 = (1 << 3u), - OPTIX_PAYLOAD_TYPE_ID_4 = (1 << 4u), - OPTIX_PAYLOAD_TYPE_ID_5 = (1 << 5u), - OPTIX_PAYLOAD_TYPE_ID_6 = (1 << 6u), - OPTIX_PAYLOAD_TYPE_ID_7 = (1 << 7u) -} OptixPayloadTypeID; - -/// Semantic flags for a single payload word. -/// -/// Used to specify the semantics of a payload word per shader type. -/// "read": Shader of this type may read the payload word. -/// "write": Shader of this type may write the payload word. -/// -/// "trace_caller_write": Shaders may consume the value of the payload word passed to optixTrace by the caller. -/// "trace_caller_read": The caller to optixTrace may read the payload word after the call to optixTrace. -/// -/// Semantics can be bitwise combined. -/// Combining "read" and "write" is equivalent to specifying "read_write". -/// A payload needs to be writable by the caller or at least one shader type. -/// A payload needs to be readable by the caller or at least one shader type after a being writable. -typedef enum OptixPayloadSemantics -{ - OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_NONE = 0, - OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ = 1u << 0, - OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_WRITE = 2u << 0, - OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ_WRITE = 3u << 0, - - OPTIX_PAYLOAD_SEMANTICS_CH_NONE = 0, - OPTIX_PAYLOAD_SEMANTICS_CH_READ = 1u << 2, - OPTIX_PAYLOAD_SEMANTICS_CH_WRITE = 2u << 2, - OPTIX_PAYLOAD_SEMANTICS_CH_READ_WRITE = 3u << 2, - - OPTIX_PAYLOAD_SEMANTICS_MS_NONE = 0, - OPTIX_PAYLOAD_SEMANTICS_MS_READ = 1u << 4, - OPTIX_PAYLOAD_SEMANTICS_MS_WRITE = 2u << 4, - OPTIX_PAYLOAD_SEMANTICS_MS_READ_WRITE = 3u << 4, - - OPTIX_PAYLOAD_SEMANTICS_AH_NONE = 0, - OPTIX_PAYLOAD_SEMANTICS_AH_READ = 1u << 6, - OPTIX_PAYLOAD_SEMANTICS_AH_WRITE = 2u << 6, - OPTIX_PAYLOAD_SEMANTICS_AH_READ_WRITE = 3u << 6, - - OPTIX_PAYLOAD_SEMANTICS_IS_NONE = 0, - OPTIX_PAYLOAD_SEMANTICS_IS_READ = 1u << 8, - OPTIX_PAYLOAD_SEMANTICS_IS_WRITE = 2u << 8, - OPTIX_PAYLOAD_SEMANTICS_IS_READ_WRITE = 3u << 8, -} OptixPayloadSemantics; - -/// Specifies a single payload type -typedef struct OptixPayloadType -{ - /// The number of 32b words the payload of this type holds - unsigned int numPayloadValues; - - /// Points to host array of payload word semantics, size must match numPayloadValues - const unsigned int *payloadSemantics; -} OptixPayloadType; - -/// Compilation options for module -/// -/// \see #optixModuleCreateFromPTX() -typedef struct OptixModuleCompileOptions -{ - /// Maximum number of registers allowed when compiling to SASS. - /// Set to 0 for no explicit limit. May vary within a pipeline. - int maxRegisterCount; - - /// Optimization level. May vary within a pipeline. - OptixCompileOptimizationLevel optLevel; - - /// Generate debug information. - OptixCompileDebugLevel debugLevel; - - /// Ingored if numBoundValues is set to 0 - const OptixModuleCompileBoundValueEntry* boundValues; - - /// set to 0 if unused - unsigned int numBoundValues; - - /// The number of different payload types available for compilation. - /// Must be zero if OptixPipelineCompileOptions::numPayloadValues is not zero. - unsigned int numPayloadTypes; - - /// Points to host array of payload type definitions, size must match numPayloadTypes - OptixPayloadType *payloadTypes; - -} OptixModuleCompileOptions; - -/// Distinguishes different kinds of program groups. -typedef enum OptixProgramGroupKind -{ - /// Program group containing a raygen (RG) program - /// \see #OptixProgramGroupSingleModule, #OptixProgramGroupDesc::raygen - OPTIX_PROGRAM_GROUP_KIND_RAYGEN = 0x2421, - - /// Program group containing a miss (MS) program - /// \see #OptixProgramGroupSingleModule, #OptixProgramGroupDesc::miss - OPTIX_PROGRAM_GROUP_KIND_MISS = 0x2422, - - /// Program group containing an exception (EX) program - /// \see OptixProgramGroupHitgroup, #OptixProgramGroupDesc::exception - OPTIX_PROGRAM_GROUP_KIND_EXCEPTION = 0x2423, - - /// Program group containing an intersection (IS), any hit (AH), and/or closest hit (CH) program - /// \see #OptixProgramGroupSingleModule, #OptixProgramGroupDesc::hitgroup - OPTIX_PROGRAM_GROUP_KIND_HITGROUP = 0x2424, - - /// Program group containing a direct (DC) or continuation (CC) callable program - /// \see OptixProgramGroupCallables, #OptixProgramGroupDesc::callables - OPTIX_PROGRAM_GROUP_KIND_CALLABLES = 0x2425 -} OptixProgramGroupKind; - -/// Flags for program groups -typedef enum OptixProgramGroupFlags -{ - /// Currently there are no flags - OPTIX_PROGRAM_GROUP_FLAGS_NONE = 0 -} OptixProgramGroupFlags; - -/// Program group representing a single module. -/// -/// Used for raygen, miss, and exception programs. In case of raygen and exception programs, module and entry -/// function name need to be valid. For miss programs, module and entry function name might both be \c nullptr. -/// -/// \see #OptixProgramGroupDesc::raygen, #OptixProgramGroupDesc::miss, #OptixProgramGroupDesc::exception -typedef struct OptixProgramGroupSingleModule -{ - /// Module holding single program. - OptixModule module; - /// Entry function name of the single program. - const char* entryFunctionName; -} OptixProgramGroupSingleModule; - -/// Program group representing the hitgroup. -/// -/// For each of the three program types, module and entry function name might both be \c nullptr. -/// -/// \see #OptixProgramGroupDesc::hitgroup -typedef struct OptixProgramGroupHitgroup -{ - /// Module holding the closest hit (CH) program. - OptixModule moduleCH; - /// Entry function name of the closest hit (CH) program. - const char* entryFunctionNameCH; - /// Module holding the any hit (AH) program. - OptixModule moduleAH; - /// Entry function name of the any hit (AH) program. - const char* entryFunctionNameAH; - /// Module holding the intersection (Is) program. - OptixModule moduleIS; - /// Entry function name of the intersection (IS) program. - const char* entryFunctionNameIS; -} OptixProgramGroupHitgroup; - -/// Program group representing callables. -/// -/// Module and entry function name need to be valid for at least one of the two callables. -/// -/// \see ##OptixProgramGroupDesc::callables -typedef struct OptixProgramGroupCallables -{ - /// Module holding the direct callable (DC) program. - OptixModule moduleDC; - /// Entry function name of the direct callable (DC) program. - const char* entryFunctionNameDC; - /// Module holding the continuation callable (CC) program. - OptixModule moduleCC; - /// Entry function name of the continuation callable (CC) program. - const char* entryFunctionNameCC; -} OptixProgramGroupCallables; - -/// Descriptor for program groups. -typedef struct OptixProgramGroupDesc -{ - /// The kind of program group. - OptixProgramGroupKind kind; - - /// See #OptixProgramGroupFlags - unsigned int flags; - - union - { - /// \see #OPTIX_PROGRAM_GROUP_KIND_RAYGEN - OptixProgramGroupSingleModule raygen; - /// \see #OPTIX_PROGRAM_GROUP_KIND_MISS - OptixProgramGroupSingleModule miss; - /// \see #OPTIX_PROGRAM_GROUP_KIND_EXCEPTION - OptixProgramGroupSingleModule exception; - /// \see #OPTIX_PROGRAM_GROUP_KIND_CALLABLES - OptixProgramGroupCallables callables; - /// \see #OPTIX_PROGRAM_GROUP_KIND_HITGROUP - OptixProgramGroupHitgroup hitgroup; - }; -} OptixProgramGroupDesc; - -/// Program group options -/// -/// \see #optixProgramGroupCreate() -typedef struct OptixProgramGroupOptions -{ - /// Specifies the payload type of this program group. - /// All programs in the group must support the payload type - /// (Program support for a type is specified by calling - /// \see #optixSetPayloadTypes or otherwise all types specified in - /// \see #OptixModuleCompileOptions are supported). - /// If a program is not available for the requested payload type, - /// optixProgramGroupCreate returns OPTIX_ERROR_PAYLOAD_TYPE_MISMATCH. - /// If the payloadType is left zero, a unique type is deduced. - /// The payload type can be uniquely deduced if there is exactly one payload type - /// for which all programs in the group are available. - /// If the payload type could not be deduced uniquely - /// optixProgramGroupCreate returns OPTIX_ERROR_PAYLOAD_TYPE_RESOLUTION_FAILED. - OptixPayloadType* payloadType; -} OptixProgramGroupOptions; - -/// The following values are used to indicate which exception was thrown. -typedef enum OptixExceptionCodes -{ - /// Stack overflow of the continuation stack. - /// no exception details. - OPTIX_EXCEPTION_CODE_STACK_OVERFLOW = -1, - - /// The trace depth is exceeded. - /// no exception details. - OPTIX_EXCEPTION_CODE_TRACE_DEPTH_EXCEEDED = -2, - - /// The traversal depth is exceeded. - /// Exception details: - /// optixGetTransformListSize() - /// optixGetTransformListHandle() - OPTIX_EXCEPTION_CODE_TRAVERSAL_DEPTH_EXCEEDED = -3, - - /// Traversal encountered an invalid traversable type. - /// Exception details: - /// optixGetTransformListSize() - /// optixGetTransformListHandle() - /// optixGetExceptionInvalidTraversable() - OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_TRAVERSABLE = -5, - - /// The miss SBT record index is out of bounds - /// A miss SBT record index is valid within the range [0, OptixShaderBindingTable::missRecordCount) (See optixLaunch) - /// Exception details: - /// optixGetExceptionInvalidSbtOffset() - OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_MISS_SBT = -6, - - /// The traversal hit SBT record index out of bounds. - /// - /// A traversal hit SBT record index is valid within the range [0, OptixShaderBindingTable::hitgroupRecordCount) (See optixLaunch) - /// The following formula relates the - // sbt-index (See optixGetExceptionInvalidSbtOffset), - // sbt-instance-offset (See OptixInstance::sbtOffset), - /// sbt-geometry-acceleration-structure-index (See optixGetSbtGASIndex), - /// sbt-stride-from-trace-call and sbt-offset-from-trace-call (See optixTrace) - /// - /// sbt-index = sbt-instance-offset + (sbt-geometry-acceleration-structure-index * sbt-stride-from-trace-call) + sbt-offset-from-trace-call - /// - /// Exception details: - /// optixGetTransformListSize() - /// optixGetTransformListHandle() - /// optixGetExceptionInvalidSbtOffset() - /// optixGetSbtGASIndex() - OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_HIT_SBT = -7, - - /// The shader encountered an unsupported primitive type (See OptixPipelineCompileOptions::usesPrimitiveTypeFlags). - /// no exception details. - OPTIX_EXCEPTION_CODE_UNSUPPORTED_PRIMITIVE_TYPE = -8, - - /// The shader encountered a call to optixTrace with at least - /// one of the float arguments being inf or nan, or the tmin argument is negative. - /// Exception details: - /// optixGetExceptionInvalidRay() - OPTIX_EXCEPTION_CODE_INVALID_RAY = -9, - - /// The shader encountered a call to either optixDirectCall or optixCallableCall - /// where the argument count does not match the parameter count of the callable - /// program which is called. - /// Exception details: - /// optixGetExceptionParameterMismatch - OPTIX_EXCEPTION_CODE_CALLABLE_PARAMETER_MISMATCH = -10, - - /// The invoked builtin IS does not match the current GAS - OPTIX_EXCEPTION_CODE_BUILTIN_IS_MISMATCH = -11, - - /// Tried to call a callable program using an SBT offset that is larger - /// than the number of passed in callable SBT records. - /// Exception details: - /// optixGetExceptionInvalidSbtOffset() - OPTIX_EXCEPTION_CODE_CALLABLE_INVALID_SBT = -12, - - /// Tried to call a direct callable using an SBT offset of a record that - /// was built from a program group that did not include a direct callable. - OPTIX_EXCEPTION_CODE_CALLABLE_NO_DC_SBT_RECORD = -13, - - /// Tried to call a continuation callable using an SBT offset of a record - /// that was built from a program group that did not include a continuation callable. - OPTIX_EXCEPTION_CODE_CALLABLE_NO_CC_SBT_RECORD = -14, - - /// Tried to directly traverse a single gas while single gas traversable graphs are not enabled - /// (see OptixTraversableGraphFlags::OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_GAS). - /// Exception details: - /// optixGetTransformListSize() - /// optixGetTransformListHandle() - /// optixGetExceptionInvalidTraversable() - OPTIX_EXCEPTION_CODE_UNSUPPORTED_SINGLE_LEVEL_GAS = -15, - - /// argument passed to an optix call is - /// not within an acceptable range of values. - OPTIX_EXCEPTION_CODE_INVALID_VALUE_ARGUMENT_0 = -16, - OPTIX_EXCEPTION_CODE_INVALID_VALUE_ARGUMENT_1 = -17, - OPTIX_EXCEPTION_CODE_INVALID_VALUE_ARGUMENT_2 = -18, - - /// Tried to access data on an AS without random data access support (See OptixBuildFlags). - OPTIX_EXCEPTION_CODE_UNSUPPORTED_DATA_ACCESS = -32, - - /// The program payload type doesn't match the trace payload type. - OPTIX_EXCEPTION_CODE_PAYLOAD_TYPE_MISMATCH = -33, -} OptixExceptionCodes; - -/// Exception flags. -/// -/// \see #OptixPipelineCompileOptions::exceptionFlags, #OptixExceptionCodes -typedef enum OptixExceptionFlags -{ - /// No exception are enabled. - OPTIX_EXCEPTION_FLAG_NONE = 0, - - /// Enables exceptions check related to the continuation stack. - OPTIX_EXCEPTION_FLAG_STACK_OVERFLOW = 1u << 0, - - /// Enables exceptions check related to trace depth. - OPTIX_EXCEPTION_FLAG_TRACE_DEPTH = 1u << 1, - - /// Enables user exceptions via optixThrowException(). This flag must be specified for all modules in a pipeline - /// if any module calls optixThrowException(). - OPTIX_EXCEPTION_FLAG_USER = 1u << 2, - - /// Enables various exceptions check related to traversal. - OPTIX_EXCEPTION_FLAG_DEBUG = 1u << 3 -} OptixExceptionFlags; - -/// Compilation options for all modules of a pipeline. -/// -/// Similar to #OptixModuleCompileOptions, but these options here need to be equal for all modules of a pipeline. -/// -/// \see #optixModuleCreateFromPTX(), #optixPipelineCreate() -typedef struct OptixPipelineCompileOptions -{ - /// Boolean value indicating whether motion blur could be used - int usesMotionBlur; - - /// Traversable graph bitfield. See OptixTraversableGraphFlags - unsigned int traversableGraphFlags; - - /// How much storage, in 32b words, to make available for the payload, [0..32] - /// Must be zero if numPayloadTypes is not zero. - int numPayloadValues; - - /// How much storage, in 32b words, to make available for the attributes. The - /// minimum number is 2. Values below that will automatically be changed to 2. [2..8] - int numAttributeValues; - - /// A bitmask of OptixExceptionFlags indicating which exceptions are enabled. - unsigned int exceptionFlags; - - /// The name of the pipeline parameter variable. If 0, no pipeline parameter - /// will be available. This will be ignored if the launch param variable was - /// optimized out or was not found in the modules linked to the pipeline. - const char* pipelineLaunchParamsVariableName; - - /// Bit field enabling primitive types. See OptixPrimitiveTypeFlags. - /// Setting to zero corresponds to enabling OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM and OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE. - unsigned int usesPrimitiveTypeFlags; - - /// Boolean value indicating whether opacity micromaps could be used - int allowOpacityMicromaps; -} OptixPipelineCompileOptions; - -/// Link options for a pipeline -/// -/// \see #optixPipelineCreate() -typedef struct OptixPipelineLinkOptions -{ - /// Maximum trace recursion depth. 0 means a ray generation program can be - /// launched, but can't trace any rays. The maximum allowed value is 31. - unsigned int maxTraceDepth; - - /// Generate debug information. - OptixCompileDebugLevel debugLevel; -} OptixPipelineLinkOptions; - -/// Describes the shader binding table (SBT) -/// -/// \see #optixLaunch() -typedef struct OptixShaderBindingTable -{ - /// Device address of the SBT record of the ray gen program to start launch at. The address must be a multiple of - /// OPTIX_SBT_RECORD_ALIGNMENT. - CUdeviceptr raygenRecord; - - /// Device address of the SBT record of the exception program. The address must be a multiple of - /// OPTIX_SBT_RECORD_ALIGNMENT. - CUdeviceptr exceptionRecord; - - /// Arrays of SBT records for miss programs. The base address and the stride must be a multiple of - /// OPTIX_SBT_RECORD_ALIGNMENT. - /// @{ - CUdeviceptr missRecordBase; - unsigned int missRecordStrideInBytes; - unsigned int missRecordCount; - /// @} - - /// Arrays of SBT records for hit groups. The base address and the stride must be a multiple of - /// OPTIX_SBT_RECORD_ALIGNMENT. - /// @{ - CUdeviceptr hitgroupRecordBase; - unsigned int hitgroupRecordStrideInBytes; - unsigned int hitgroupRecordCount; - /// @} - - /// Arrays of SBT records for callable programs. If the base address is not null, the stride and count must not be - /// zero. If the base address is null, then the count needs to zero. The base address and the stride must be a - /// multiple of OPTIX_SBT_RECORD_ALIGNMENT. - /// @{ - CUdeviceptr callablesRecordBase; - unsigned int callablesRecordStrideInBytes; - unsigned int callablesRecordCount; - /// @} - -} OptixShaderBindingTable; - -/// Describes the stack size requirements of a program group. -/// -/// \see optixProgramGroupGetStackSize() -typedef struct OptixStackSizes -{ - /// Continuation stack size of RG programs in bytes - unsigned int cssRG; - /// Continuation stack size of MS programs in bytes - unsigned int cssMS; - /// Continuation stack size of CH programs in bytes - unsigned int cssCH; - /// Continuation stack size of AH programs in bytes - unsigned int cssAH; - /// Continuation stack size of IS programs in bytes - unsigned int cssIS; - /// Continuation stack size of CC programs in bytes - unsigned int cssCC; - /// Direct stack size of DC programs in bytes - unsigned int dssDC; - -} OptixStackSizes; - -/// Options that can be passed to \c optixQueryFunctionTable() -typedef enum OptixQueryFunctionTableOptions -{ - /// Placeholder (there are no options yet) - OPTIX_QUERY_FUNCTION_TABLE_OPTION_DUMMY = 0 - -} OptixQueryFunctionTableOptions; - -/// Type of the function \c optixQueryFunctionTable() -typedef OptixResult( OptixQueryFunctionTable_t )( int abiId, - unsigned int numOptions, - OptixQueryFunctionTableOptions* /*optionKeys*/, - const void** /*optionValues*/, - void* functionTable, - size_t sizeOfTable ); - -/// Specifies the options for retrieving an intersection program for a built-in primitive type. -/// The primitive type must not be OPTIX_PRIMITIVE_TYPE_CUSTOM. -/// -/// \see #optixBuiltinISModuleGet() -typedef struct OptixBuiltinISOptions -{ - OptixPrimitiveType builtinISModuleType; - /// Boolean value indicating whether vertex motion blur is used (but not motion transform blur). - int usesMotionBlur; - /// Build flags, see OptixBuildFlags. - unsigned int buildFlags; - /// End cap properties of curves, see OptixCurveEndcapFlags, 0 for non-curve types. - unsigned int curveEndcapFlags; -} OptixBuiltinISOptions; - -#if defined( __CUDACC__ ) -/// Describes the ray that was passed into \c optixTrace() which caused an exception with -/// exception code OPTIX_EXCEPTION_CODE_INVALID_RAY. -/// -/// \see #optixGetExceptionInvalidRay() -typedef struct OptixInvalidRayExceptionDetails -{ - float3 origin; - float3 direction; - float tmin; - float tmax; - float time; -} OptixInvalidRayExceptionDetails; - -/// Describes the details of a call to a callable program which caused an exception with -/// exception code OPTIX_EXCEPTION_CODE_CALLABLE_PARAMETER_MISMATCH, -/// Note that OptiX packs the parameters into individual 32 bit values, so the number of -/// expected and passed values may not correspond to the number of arguments passed into -/// optixDirectCall or optixContinuationCall, or the number parameters in the definition -/// of the function that is called. -typedef struct OptixParameterMismatchExceptionDetails -{ - /// Number of 32 bit values expected by the callable program - unsigned int expectedParameterCount; - /// Number of 32 bit values that were passed to the callable program - unsigned int passedArgumentCount; - /// The offset of the SBT entry of the callable program relative to OptixShaderBindingTable::callablesRecordBase - unsigned int sbtIndex; - /// Pointer to a string that holds the name of the callable program that was called - char* callableName; -} OptixParameterMismatchExceptionDetails; -#endif - - -/*@}*/ // end group optix_types - -#endif // __optix_optix_7_types_h__ diff --git a/zenovis/xinxinoptix/include/optix_denoiser_tiling.h b/zenovis/xinxinoptix/include/optix_denoiser_tiling.h index 03dda26138..c285154a43 100644 --- a/zenovis/xinxinoptix/include/optix_denoiser_tiling.h +++ b/zenovis/xinxinoptix/include/optix_denoiser_tiling.h @@ -30,8 +30,8 @@ /// @author NVIDIA Corporation /// @brief OptiX public API header -#ifndef optix_denoiser_tiling_h -#define optix_denoiser_tiling_h +#ifndef OPTIX_DENOISER_TILING_H +#define OPTIX_DENOISER_TILING_H #include @@ -76,6 +76,9 @@ inline OptixResult optixUtilGetPixelStride( const OptixImage2D& image, unsigned { switch( image.format ) { + case OPTIX_PIXEL_FORMAT_HALF1: + pixelStrideInBytes = 1 * sizeof( short ); + break; case OPTIX_PIXEL_FORMAT_HALF2: pixelStrideInBytes = 2 * sizeof( short ); break; @@ -85,6 +88,9 @@ inline OptixResult optixUtilGetPixelStride( const OptixImage2D& image, unsigned case OPTIX_PIXEL_FORMAT_HALF4: pixelStrideInBytes = 4 * sizeof( short ); break; + case OPTIX_PIXEL_FORMAT_FLOAT1: + pixelStrideInBytes = 1 * sizeof( float ); + break; case OPTIX_PIXEL_FORMAT_FLOAT2: pixelStrideInBytes = 2 * sizeof( float ); break; @@ -271,6 +277,7 @@ inline OptixResult optixUtilDenoiserInvokeTiled( tileWidth, tileHeight, normalTiles ) ) return res; } + std::vector flowTiles; if( guideLayer->flow.data ) { @@ -281,6 +288,16 @@ inline OptixResult optixUtilDenoiserInvokeTiled( return res; } + std::vector flowTrustTiles; + if( guideLayer->flowTrustworthiness.data ) + { + OptixImage2D dummyOutput = guideLayer->flowTrustworthiness; + if( const OptixResult res = optixUtilDenoiserSplitImage( guideLayer->flowTrustworthiness, dummyOutput, + overlapWindowSizeInPixels, + tileWidth, tileHeight, flowTrustTiles ) ) + return res; + } + std::vector internalGuideLayerTiles; if( guideLayer->previousOutputInternalGuideLayer.data && guideLayer->outputInternalGuideLayer.data ) { @@ -301,6 +318,7 @@ inline OptixResult optixUtilDenoiserInvokeTiled( layer.output = ( tiles[l] )[t].output; if( layers[l].previousOutput.data ) layer.previousOutput = ( prevTiles[l] )[t].input; + layer.type = layers[l].type; tlayers.push_back( layer ); } @@ -314,6 +332,9 @@ inline OptixResult optixUtilDenoiserInvokeTiled( if( guideLayer->flow.data ) gl.flow = flowTiles[t].input; + if( guideLayer->flowTrustworthiness.data ) + gl.flowTrustworthiness = flowTrustTiles[t].input; + if( guideLayer->previousOutputInternalGuideLayer.data ) gl.previousOutputInternalGuideLayer = internalGuideLayerTiles[t].input; @@ -330,10 +351,10 @@ inline OptixResult optixUtilDenoiserInvokeTiled( return OPTIX_SUCCESS; } -/*@}*/ // end group optix_utilities +/**@}*/ // end group optix_utilities #ifdef __cplusplus } #endif -#endif // __optix_optix_stack_size_h__ +#endif // OPTIX_DENOISER_TILING_H diff --git a/zenovis/xinxinoptix/include/optix_device.h b/zenovis/xinxinoptix/include/optix_device.h index 6dcb280e23..179b887bfc 100644 --- a/zenovis/xinxinoptix/include/optix_device.h +++ b/zenovis/xinxinoptix/include/optix_device.h @@ -1,47 +1,1047 @@ - /* - * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. - * - * NVIDIA Corporation and its licensors retain all intellectual property and proprietary - * rights in and to this software, related documentation and any modifications thereto. - * Any use, reproduction, disclosure or distribution of this software and related - * documentation without an express license agreement from NVIDIA Corporation is strictly - * prohibited. - * - * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* - * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, - * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY - * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT - * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF - * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR - * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGES - */ - - /** - * @file optix_device.h - * @author NVIDIA Corporation - * @brief OptiX public API - * - * OptiX public API Reference - Host/Device side - */ - -/******************************************************************************\ - * optix_cuda.h - * - * This file provides the nvcc interface for generating PTX that the OptiX is - * capable of parsing and weaving into the final kernel. This is included by - * optix.h automatically if compiling device code. It can be included explicitly - * in host code if desired. - * -\******************************************************************************/ -#if !defined(__OPTIX_INCLUDE_INTERNAL_HEADERS__) -# define __OPTIX_INCLUDE_INTERNAL_HEADERS__ -# define __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_DEVICE_H__ -#endif -#include "optix_7_device.h" -#if defined( __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_DEVICE_H__ ) -# undef __OPTIX_INCLUDE_INTERNAL_HEADERS__ -# undef __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_DEVICE_H__ +* Copyright (c) 2021 NVIDIA Corporation. All rights reserved. +* +* NVIDIA Corporation and its licensors retain all intellectual property and proprietary +* rights in and to this software, related documentation and any modifications thereto. +* Any use, reproduction, disclosure or distribution of this software and related +* documentation without an express license agreement from NVIDIA Corporation is strictly +* prohibited. +* +* TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* +* AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, +* INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +* PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY +* SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT +* LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF +* BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR +* INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF +* SUCH DAMAGES +*/ + +/// @file +/// @author NVIDIA Corporation +/// @brief OptiX public API header +/// +/// OptiX public API Reference - Device API declarations + +#ifndef OPTIX_OPTIX_DEVICE_H +#define OPTIX_OPTIX_DEVICE_H + +#if defined( __cplusplus ) && ( __cplusplus < 201103L ) && !defined( _WIN32 ) +#error Device code for OptiX requires at least C++11. Consider adding "--std c++11" to the nvcc command-line. #endif + +#include "optix_types.h" + +/// \defgroup optix_device_api Device API +/// \brief OptiX Device API + +/** \addtogroup optix_device_api +@{ +*/ + + +/// Initiates a ray tracing query starting with the given traversable. +/// +/// \param[in] handle +/// \param[in] rayOrigin +/// \param[in] rayDirection +/// \param[in] tmin +/// \param[in] tmax +/// \param[in] rayTime +/// \param[in] visibilityMask really only 8 bits +/// \param[in] rayFlags really only 16 bits, combination of OptixRayFlags +/// \param[in] SBToffset really only 4 bits +/// \param[in] SBTstride really only 4 bits +/// \param[in] missSBTIndex specifies the miss program invoked on a miss +/// \param[in,out] payload up to 32 unsigned int values that hold the payload +template +static __forceinline__ __device__ void optixTrace( OptixTraversableHandle handle, + float3 rayOrigin, + float3 rayDirection, + float tmin, + float tmax, + float rayTime, + OptixVisibilityMask visibilityMask, + unsigned int rayFlags, + unsigned int SBToffset, + unsigned int SBTstride, + unsigned int missSBTIndex, + Payload&... payload ); + + +/// Initiates a ray tracing query starting with the given traversable. +/// +/// \param[in] type +/// \param[in] handle +/// \param[in] rayOrigin +/// \param[in] rayDirection +/// \param[in] tmin +/// \param[in] tmax +/// \param[in] rayTime +/// \param[in] visibilityMask really only 8 bits +/// \param[in] rayFlags really only 16 bits, combination of OptixRayFlags +/// \param[in] SBToffset really only 4 bits +/// \param[in] SBTstride really only 4 bits +/// \param[in] missSBTIndex specifies the miss program invoked on a miss +/// \param[in,out] payload up to 32 unsigned int values that hold the payload +template +static __forceinline__ __device__ void optixTrace( OptixPayloadTypeID type, + OptixTraversableHandle handle, + float3 rayOrigin, + float3 rayDirection, + float tmin, + float tmax, + float rayTime, + OptixVisibilityMask visibilityMask, + unsigned int rayFlags, + unsigned int SBToffset, + unsigned int SBTstride, + unsigned int missSBTIndex, + Payload&... payload ); + + +/// Writes the 32-bit payload value at slot 0. +static __forceinline__ __device__ void optixSetPayload_0( unsigned int p ); +/// Writes the 32-bit payload value at slot 1. +static __forceinline__ __device__ void optixSetPayload_1( unsigned int p ); +/// Writes the 32-bit payload value at slot 2. +static __forceinline__ __device__ void optixSetPayload_2( unsigned int p ); +/// Writes the 32-bit payload value at slot 3. +static __forceinline__ __device__ void optixSetPayload_3( unsigned int p ); +/// Writes the 32-bit payload value at slot 4. +static __forceinline__ __device__ void optixSetPayload_4( unsigned int p ); +/// Writes the 32-bit payload value at slot 5. +static __forceinline__ __device__ void optixSetPayload_5( unsigned int p ); +/// Writes the 32-bit payload value at slot 6. +static __forceinline__ __device__ void optixSetPayload_6( unsigned int p ); +/// Writes the 32-bit payload value at slot 7. +static __forceinline__ __device__ void optixSetPayload_7( unsigned int p ); + +/// Writes the 32-bit payload value at slot 8. +static __forceinline__ __device__ void optixSetPayload_8( unsigned int p ); +/// Writes the 32-bit payload value at slot 9. +static __forceinline__ __device__ void optixSetPayload_9( unsigned int p ); +/// Writes the 32-bit payload value at slot 10. +static __forceinline__ __device__ void optixSetPayload_10( unsigned int p ); +/// Writes the 32-bit payload value at slot 11. +static __forceinline__ __device__ void optixSetPayload_11( unsigned int p ); +/// Writes the 32-bit payload value at slot 12. +static __forceinline__ __device__ void optixSetPayload_12( unsigned int p ); +/// Writes the 32-bit payload value at slot 13. +static __forceinline__ __device__ void optixSetPayload_13( unsigned int p ); +/// Writes the 32-bit payload value at slot 14. +static __forceinline__ __device__ void optixSetPayload_14( unsigned int p ); +/// Writes the 32-bit payload value at slot 15. +static __forceinline__ __device__ void optixSetPayload_15( unsigned int p ); +/// Writes the 32-bit payload value at slot 16. +static __forceinline__ __device__ void optixSetPayload_16( unsigned int p ); +/// Writes the 32-bit payload value at slot 17. +static __forceinline__ __device__ void optixSetPayload_17( unsigned int p ); +/// Writes the 32-bit payload value at slot 18. +static __forceinline__ __device__ void optixSetPayload_18( unsigned int p ); +/// Writes the 32-bit payload value at slot 19. +static __forceinline__ __device__ void optixSetPayload_19( unsigned int p ); +/// Writes the 32-bit payload value at slot 20. +static __forceinline__ __device__ void optixSetPayload_20( unsigned int p ); +/// Writes the 32-bit payload value at slot 21. +static __forceinline__ __device__ void optixSetPayload_21( unsigned int p ); +/// Writes the 32-bit payload value at slot 22. +static __forceinline__ __device__ void optixSetPayload_22( unsigned int p ); +/// Writes the 32-bit payload value at slot 23. +static __forceinline__ __device__ void optixSetPayload_23( unsigned int p ); +/// Writes the 32-bit payload value at slot 24. +static __forceinline__ __device__ void optixSetPayload_24( unsigned int p ); +/// Writes the 32-bit payload value at slot 25. +static __forceinline__ __device__ void optixSetPayload_25( unsigned int p ); +/// Writes the 32-bit payload value at slot 26. +static __forceinline__ __device__ void optixSetPayload_26( unsigned int p ); +/// Writes the 32-bit payload value at slot 27. +static __forceinline__ __device__ void optixSetPayload_27( unsigned int p ); +/// Writes the 32-bit payload value at slot 28. +static __forceinline__ __device__ void optixSetPayload_28( unsigned int p ); +/// Writes the 32-bit payload value at slot 29. +static __forceinline__ __device__ void optixSetPayload_29( unsigned int p ); +/// Writes the 32-bit payload value at slot 30. +static __forceinline__ __device__ void optixSetPayload_30( unsigned int p ); +/// Writes the 32-bit payload value at slot 31. +static __forceinline__ __device__ void optixSetPayload_31( unsigned int p ); + +/// Reads the 32-bit payload value at slot 0. +static __forceinline__ __device__ unsigned int optixGetPayload_0(); +/// Reads the 32-bit payload value at slot 1. +static __forceinline__ __device__ unsigned int optixGetPayload_1(); +/// Reads the 32-bit payload value at slot 2. +static __forceinline__ __device__ unsigned int optixGetPayload_2(); +/// Reads the 32-bit payload value at slot 3. +static __forceinline__ __device__ unsigned int optixGetPayload_3(); +/// Reads the 32-bit payload value at slot 4. +static __forceinline__ __device__ unsigned int optixGetPayload_4(); +/// Reads the 32-bit payload value at slot 5. +static __forceinline__ __device__ unsigned int optixGetPayload_5(); +/// Reads the 32-bit payload value at slot 6. +static __forceinline__ __device__ unsigned int optixGetPayload_6(); +/// Reads the 32-bit payload value at slot 7. +static __forceinline__ __device__ unsigned int optixGetPayload_7(); + +/// Reads the 32-bit payload value at slot 8. +static __forceinline__ __device__ unsigned int optixGetPayload_8(); +/// Reads the 32-bit payload value at slot 9. +static __forceinline__ __device__ unsigned int optixGetPayload_9(); +/// Reads the 32-bit payload value at slot 10. +static __forceinline__ __device__ unsigned int optixGetPayload_10(); +/// Reads the 32-bit payload value at slot 11. +static __forceinline__ __device__ unsigned int optixGetPayload_11(); +/// Reads the 32-bit payload value at slot 12. +static __forceinline__ __device__ unsigned int optixGetPayload_12(); +/// Reads the 32-bit payload value at slot 13. +static __forceinline__ __device__ unsigned int optixGetPayload_13(); +/// Reads the 32-bit payload value at slot 14. +static __forceinline__ __device__ unsigned int optixGetPayload_14(); +/// Reads the 32-bit payload value at slot 15. +static __forceinline__ __device__ unsigned int optixGetPayload_15(); +/// Reads the 32-bit payload value at slot 16. +static __forceinline__ __device__ unsigned int optixGetPayload_16(); +/// Reads the 32-bit payload value at slot 17. +static __forceinline__ __device__ unsigned int optixGetPayload_17(); +/// Reads the 32-bit payload value at slot 18. +static __forceinline__ __device__ unsigned int optixGetPayload_18(); +/// Reads the 32-bit payload value at slot 19. +static __forceinline__ __device__ unsigned int optixGetPayload_19(); +/// Reads the 32-bit payload value at slot 20. +static __forceinline__ __device__ unsigned int optixGetPayload_20(); +/// Reads the 32-bit payload value at slot 21. +static __forceinline__ __device__ unsigned int optixGetPayload_21(); +/// Reads the 32-bit payload value at slot 22. +static __forceinline__ __device__ unsigned int optixGetPayload_22(); +/// Reads the 32-bit payload value at slot 23. +static __forceinline__ __device__ unsigned int optixGetPayload_23(); +/// Reads the 32-bit payload value at slot 24. +static __forceinline__ __device__ unsigned int optixGetPayload_24(); +/// Reads the 32-bit payload value at slot 25. +static __forceinline__ __device__ unsigned int optixGetPayload_25(); +/// Reads the 32-bit payload value at slot 26. +static __forceinline__ __device__ unsigned int optixGetPayload_26(); +/// Reads the 32-bit payload value at slot 27. +static __forceinline__ __device__ unsigned int optixGetPayload_27(); +/// Reads the 32-bit payload value at slot 28. +static __forceinline__ __device__ unsigned int optixGetPayload_28(); +/// Reads the 32-bit payload value at slot 29. +static __forceinline__ __device__ unsigned int optixGetPayload_29(); +/// Reads the 32-bit payload value at slot 30. +static __forceinline__ __device__ unsigned int optixGetPayload_30(); +/// Reads the 32-bit payload value at slot 31. +static __forceinline__ __device__ unsigned int optixGetPayload_31(); + +/// Specify the supported payload types for a program. +/// +/// The supported types are specified as a bitwise combination of payload types. (See OptixPayloadTypeID) +/// May only be called once per program. +/// Must be called at the top of the program. +/// Only available in IS, AH, CH, MS +static __forceinline__ __device__ void optixSetPayloadTypes( unsigned int typeMask ); + +/// Returns an undefined value. +static __forceinline__ __device__ unsigned int optixUndefinedValue(); + +/// Returns the rayOrigin passed into optixTrace. +/// +/// May be more expensive to call in IS and AH than their object space counterparts, +/// so effort should be made to use the object space ray in those programs. +/// Only available in IS, AH, CH, MS +static __forceinline__ __device__ float3 optixGetWorldRayOrigin(); + +/// Returns the rayDirection passed into optixTrace. +/// +/// May be more expensive to call in IS and AH than their object space counterparts, +/// so effort should be made to use the object space ray in those programs. +/// Only available in IS, AH, CH, MS +static __forceinline__ __device__ float3 optixGetWorldRayDirection(); + +/// Returns the current object space ray origin based on the current transform stack. +/// +/// Only available in IS and AH. +static __forceinline__ __device__ float3 optixGetObjectRayOrigin(); + +/// Returns the current object space ray direction based on the current transform stack. +/// +/// Only available in IS and AH. +static __forceinline__ __device__ float3 optixGetObjectRayDirection(); + +/// Returns the tmin passed into optixTrace. +/// +/// Only available in IS, AH, CH, MS +static __forceinline__ __device__ float optixGetRayTmin(); + +/// In IS and CH returns the current smallest reported hitT or the tmax passed into optixTrace if no hit has been reported +/// In AH returns the hitT value as passed in to optixReportIntersection +/// In MS returns the tmax passed into optixTrace +/// Only available in IS, AH, CH, MS +static __forceinline__ __device__ float optixGetRayTmax(); + +/// Returns the rayTime passed into optixTrace. +/// +/// Will return 0 if motion is disabled. +/// Only available in IS, AH, CH, MS +static __forceinline__ __device__ float optixGetRayTime(); + +/// Returns the rayFlags passed into optixTrace +/// +/// Only available in IS, AH, CH, MS +static __forceinline__ __device__ unsigned int optixGetRayFlags(); + +/// Returns the visibilityMask passed into optixTrace +/// +/// Only available in IS, AH, CH, MS +static __forceinline__ __device__ unsigned int optixGetRayVisibilityMask(); + +/// Return the traversable handle of a given instance in an Instance +/// Acceleration Structure (IAS) +/// To obtain instance traversables by index, the IAS must be built using the flag OPTIX_BUILD_FLAG_ALLOW_RANDOM_INSTANCE_ACCESS. +static __forceinline__ __device__ OptixTraversableHandle optixGetInstanceTraversableFromIAS( OptixTraversableHandle ias, unsigned int instIdx ); + +/// Return the object space triangle vertex positions of a given triangle in a Geometry +/// Acceleration Structure (GAS) at a given motion time. +/// To access vertex data, the GAS must be built using the flag OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS. +/// +/// If motion is disabled via OptixPipelineCompileOptions::usesMotionBlur, or the GAS does not contain motion, the +/// time parameter is ignored. +static __forceinline__ __device__ void optixGetTriangleVertexData( OptixTraversableHandle gas, unsigned int primIdx, unsigned int sbtGASIndex, float time, float3 data[3]); + +/// Return the object space micro triangle vertex positions of the current hit. +/// The current hit must be a displacement micromap triangle hit. +static __forceinline__ __device__ void optixGetMicroTriangleVertexData( float3 data[3] ); +/// Returns the barycentrics of the vertices of the currently intersected micro triangle with respect to the base triangle. +static __forceinline__ __device__ void optixGetMicroTriangleBarycentricsData( float2 data[3] ); + +/// Return the object space curve control vertex data of a linear curve in a Geometry +/// Acceleration Structure (GAS) at a given motion time. +/// To access vertex data, the GAS must be built using the flag OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS. +/// +/// data[i] = {x,y,z,w} with {x,y,z} the position and w the radius of control vertex i. +/// If motion is disabled via OptixPipelineCompileOptions::usesMotionBlur, or the GAS does not contain motion, the +/// time parameter is ignored. +static __forceinline__ __device__ void optixGetLinearCurveVertexData( OptixTraversableHandle gas, unsigned int primIdx, unsigned int sbtGASIndex, float time, float4 data[2] ); + +/// Return the object space curve control vertex data of a quadratic BSpline curve in a Geometry +/// Acceleration Structure (GAS) at a given motion time. +/// To access vertex data, the GAS must be built using the flag OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS. +/// +/// data[i] = {x,y,z,w} with {x,y,z} the position and w the radius of control vertex i. +/// If motion is disabled via OptixPipelineCompileOptions::usesMotionBlur, or the GAS does not contain motion, the +/// time parameter is ignored. +static __forceinline__ __device__ void optixGetQuadraticBSplineVertexData( OptixTraversableHandle gas, unsigned int primIdx, unsigned int sbtGASIndex, float time, float4 data[3] ); + +/// Return the object space curve control vertex data of a cubic BSpline curve in a Geometry +/// Acceleration Structure (GAS) at a given motion time. +/// To access vertex data, the GAS must be built using the flag OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS. +/// +/// data[i] = {x,y,z,w} with {x,y,z} the position and w the radius of control vertex i. +/// If motion is disabled via OptixPipelineCompileOptions::usesMotionBlur, or the GAS does not contain motion, the +/// time parameter is ignored. +static __forceinline__ __device__ void optixGetCubicBSplineVertexData( OptixTraversableHandle gas, unsigned int primIdx, unsigned int sbtGASIndex, float time, float4 data[4] ); + +/// Return the object space curve control vertex data of a CatmullRom spline curve in a Geometry +/// Acceleration Structure (GAS) at a given motion time. +/// To access vertex data, the GAS must be built using the flag OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS. +/// +/// data[i] = {x,y,z,w} with {x,y,z} the position and w the radius of control vertex i. +/// If motion is disabled via OptixPipelineCompileOptions::usesMotionBlur, or the GAS does not contain motion, the +/// time parameter is ignored. +static __forceinline__ __device__ void optixGetCatmullRomVertexData( OptixTraversableHandle gas, unsigned int primIdx, unsigned int sbtGASIndex, float time, float4 data[4] ); + +/// Return the object space curve control vertex data of a cubic Bezier curve in a Geometry +/// Acceleration Structure (GAS) at a given motion time. +/// To access vertex data, the GAS must be built using the flag OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS. +/// +/// data[i] = {x,y,z,w} with {x,y,z} the position and w the radius of control vertex i. +/// If motion is disabled via OptixPipelineCompileOptions::usesMotionBlur, or the GAS does not contain motion, the +/// time parameter is ignored. +static __forceinline__ __device__ void optixGetCubicBezierVertexData( OptixTraversableHandle gas, unsigned int primIdx, unsigned int sbtGASIndex, float time, float4 data[4] ); + +/// Return the object space curve control vertex data of a ribbon (flat quadratic BSpline) in a Geometry +/// Acceleration Structure (GAS) at a given motion time. +/// To access vertex data, the GAS must be built using the flag OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS. +/// +/// data[i] = {x,y,z,w} with {x,y,z} the position and w the radius of control vertex i. +/// If motion is disabled via OptixPipelineCompileOptions::usesMotionBlur, or the GAS does not contain motion, the +/// time parameter is ignored. +static __forceinline__ __device__ void optixGetRibbonVertexData( OptixTraversableHandle gas, unsigned int primIdx, unsigned int sbtGASIndex, float time, float4 data[3] ); + +/// Return ribbon normal at intersection reported by optixReportIntersection. +static __forceinline__ __device__ float3 optixGetRibbonNormal( OptixTraversableHandle gas, unsigned int primIdx, unsigned int sbtGASIndex, float time, float2 ribbonParameters ); + +/// Return the object space sphere data, center point and radius, in a Geometry Acceleration Structure (GAS) at a given motion time. +/// To access sphere data, the GAS must be built using the flag OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS. +/// +/// data[0] = {x,y,z,w} with {x,y,z} the position of the sphere center and w the radius. +/// If motion is disabled via OptixPipelineCompileOptions::usesMotionBlur, or the GAS does not contain motion, the +/// time parameter is ignored. +static __forceinline__ __device__ void optixGetSphereData( OptixTraversableHandle gas, unsigned int primIdx, unsigned int sbtGASIndex, float time, float4 data[1] ); + +/// Returns the traversable handle for the Geometry Acceleration Structure (GAS) containing +/// the current hit. May be called from IS, AH and CH. +static __forceinline__ __device__ OptixTraversableHandle optixGetGASTraversableHandle(); + +/// Returns the motion begin time of a GAS (see OptixMotionOptions) +static __forceinline__ __device__ float optixGetGASMotionTimeBegin( OptixTraversableHandle gas ); + +/// Returns the motion end time of a GAS (see OptixMotionOptions) +static __forceinline__ __device__ float optixGetGASMotionTimeEnd( OptixTraversableHandle gas ); + +/// Returns the number of motion steps of a GAS (see OptixMotionOptions) +static __forceinline__ __device__ unsigned int optixGetGASMotionStepCount( OptixTraversableHandle gas ); + +/// Returns the world-to-object transformation matrix resulting from the current active transformation list. +/// +/// The cost of this function may be proportional to the size of the transformation list. +static __forceinline__ __device__ void optixGetWorldToObjectTransformMatrix( float m[12] ); + +/// Returns the object-to-world transformation matrix resulting from the current active transformation list. +/// +/// The cost of this function may be proportional to the size of the transformation list. +static __forceinline__ __device__ void optixGetObjectToWorldTransformMatrix( float m[12] ); + +/// Transforms the point using world-to-object transformation matrix resulting from the current active transformation +/// list. +/// +/// The cost of this function may be proportional to the size of the transformation list. +static __forceinline__ __device__ float3 optixTransformPointFromWorldToObjectSpace( float3 point ); + +/// Transforms the vector using world-to-object transformation matrix resulting from the current active transformation +/// list. +/// +/// The cost of this function may be proportional to the size of the transformation list. +static __forceinline__ __device__ float3 optixTransformVectorFromWorldToObjectSpace( float3 vec ); + +/// Transforms the normal using world-to-object transformation matrix resulting from the current active transformation +/// list. +/// +/// The cost of this function may be proportional to the size of the transformation list. +static __forceinline__ __device__ float3 optixTransformNormalFromWorldToObjectSpace( float3 normal ); + +/// Transforms the point using object-to-world transformation matrix resulting from the current active transformation +/// list. +/// +/// The cost of this function may be proportional to the size of the transformation list. +static __forceinline__ __device__ float3 optixTransformPointFromObjectToWorldSpace( float3 point ); + +/// Transforms the vector using object-to-world transformation matrix resulting from the current active transformation +/// list. +/// +/// The cost of this function may be proportional to the size of the transformation list. +static __forceinline__ __device__ float3 optixTransformVectorFromObjectToWorldSpace( float3 vec ); + +/// Transforms the normal using object-to-world transformation matrix resulting from the current active transformation +/// list. +/// +/// The cost of this function may be proportional to the size of the transformation list. +static __forceinline__ __device__ float3 optixTransformNormalFromObjectToWorldSpace( float3 normal ); + +/// Returns the number of transforms on the current transform list. +/// +/// Only available in IS, AH, CH, EX +static __forceinline__ __device__ unsigned int optixGetTransformListSize(); + +/// Returns the traversable handle for a transform on the current transform list. +/// +/// Only available in IS, AH, CH, EX +static __forceinline__ __device__ OptixTraversableHandle optixGetTransformListHandle( unsigned int index ); + + +/// Returns the transform type of a traversable handle from a transform list. +static __forceinline__ __device__ OptixTransformType optixGetTransformTypeFromHandle( OptixTraversableHandle handle ); + +/// Returns a pointer to a OptixStaticTransform from its traversable handle. +/// +/// Returns 0 if the traversable is not of type OPTIX_TRANSFORM_TYPE_STATIC_TRANSFORM. +static __forceinline__ __device__ const OptixStaticTransform* optixGetStaticTransformFromHandle( OptixTraversableHandle handle ); + +/// Returns a pointer to a OptixSRTMotionTransform from its traversable handle. +/// +/// Returns 0 if the traversable is not of type OPTIX_TRANSFORM_TYPE_SRT_MOTION_TRANSFORM. +static __forceinline__ __device__ const OptixSRTMotionTransform* optixGetSRTMotionTransformFromHandle( OptixTraversableHandle handle ); + +/// Returns a pointer to a OptixMatrixMotionTransform from its traversable handle. +/// +/// Returns 0 if the traversable is not of type OPTIX_TRANSFORM_TYPE_MATRIX_MOTION_TRANSFORM. +static __forceinline__ __device__ const OptixMatrixMotionTransform* optixGetMatrixMotionTransformFromHandle( OptixTraversableHandle handle ); + +/// Returns instanceId from an OptixInstance traversable. +/// +/// Returns 0 if the traversable handle does not reference an OptixInstance. +static __forceinline__ __device__ unsigned int optixGetInstanceIdFromHandle( OptixTraversableHandle handle ); + +/// Returns child traversable handle from an OptixInstance traversable. +/// +/// Returns 0 if the traversable handle does not reference an OptixInstance. +static __forceinline__ __device__ OptixTraversableHandle optixGetInstanceChildFromHandle( OptixTraversableHandle handle ); + +/// Returns object-to-world transform from an OptixInstance traversable. +/// +/// Returns 0 if the traversable handle does not reference an OptixInstance. +static __forceinline__ __device__ const float4* optixGetInstanceTransformFromHandle( OptixTraversableHandle handle ); + +/// Returns world-to-object transform from an OptixInstance traversable. +/// +/// Returns 0 if the traversable handle does not reference an OptixInstance. +static __forceinline__ __device__ const float4* optixGetInstanceInverseTransformFromHandle( OptixTraversableHandle handle ); + +/// Reports an intersections (overload without attributes). +/// +/// If optixGetRayTmin() <= hitT <= optixGetRayTmax(), the any hit program associated with this intersection program (via the SBT entry) is called. +/// The AH program can do one of three things: +/// 1. call optixIgnoreIntersection - no hit is recorded, optixReportIntersection returns false +/// 2. call optixTerminateRay - hit is recorded, optixReportIntersection does not return, no further traversal occurs, +/// and the associated closest hit program is called +/// 3. neither - hit is recorded, optixReportIntersection returns true +/// hitKind - Only the 7 least significant bits should be written [0..127]. Any values above 127 are reserved for built in intersection. The value can be queried with optixGetHitKind() in AH and CH. +/// +/// The attributes specified with a0..a7 are available in the AH and CH programs. +/// Note that the attributes available in the CH program correspond to the closest recorded intersection. +/// The number of attributes in registers and memory can be configured in the pipeline. +/// +/// \param[in] hitT +/// \param[in] hitKind +static __forceinline__ __device__ bool optixReportIntersection( float hitT, unsigned int hitKind ); + +/// Reports an intersection (overload with 1 attribute register). +/// +/// \see #optixReportIntersection(float,unsigned int) +static __forceinline__ __device__ bool optixReportIntersection( float hitT, unsigned int hitKind, unsigned int a0 ); + +/// Reports an intersection (overload with 2 attribute registers). +/// +/// \see #optixReportIntersection(float,unsigned int) +static __forceinline__ __device__ bool optixReportIntersection( float hitT, unsigned int hitKind, unsigned int a0, unsigned int a1 ); + +/// Reports an intersection (overload with 3 attribute registers). +/// +/// \see #optixReportIntersection(float,unsigned int) +static __forceinline__ __device__ bool optixReportIntersection( float hitT, unsigned int hitKind, unsigned int a0, unsigned int a1, unsigned int a2 ); + +/// Reports an intersection (overload with 4 attribute registers). +/// +/// \see #optixReportIntersection(float,unsigned int) +static __forceinline__ __device__ bool optixReportIntersection( float hitT, + unsigned int hitKind, + unsigned int a0, + unsigned int a1, + unsigned int a2, + unsigned int a3 ); + +/// Reports an intersection (overload with 5 attribute registers). +/// +/// \see #optixReportIntersection(float,unsigned int) +static __forceinline__ __device__ bool optixReportIntersection( float hitT, + unsigned int hitKind, + unsigned int a0, + unsigned int a1, + unsigned int a2, + unsigned int a3, + unsigned int a4 ); + +/// Reports an intersection (overload with 6 attribute registers). +/// +/// \see #optixReportIntersection(float,unsigned int) +static __forceinline__ __device__ bool optixReportIntersection( float hitT, + unsigned int hitKind, + unsigned int a0, + unsigned int a1, + unsigned int a2, + unsigned int a3, + unsigned int a4, + unsigned int a5 ); + +/// Reports an intersection (overload with 7 attribute registers). +/// +/// \see #optixReportIntersection(float,unsigned int) +static __forceinline__ __device__ bool optixReportIntersection( float hitT, + unsigned int hitKind, + unsigned int a0, + unsigned int a1, + unsigned int a2, + unsigned int a3, + unsigned int a4, + unsigned int a5, + unsigned int a6 ); + +/// Reports an intersection (overload with 8 attribute registers). +/// +/// \see #optixReportIntersection(float,unsigned int) +static __forceinline__ __device__ bool optixReportIntersection( float hitT, + unsigned int hitKind, + unsigned int a0, + unsigned int a1, + unsigned int a2, + unsigned int a3, + unsigned int a4, + unsigned int a5, + unsigned int a6, + unsigned int a7 ); + +/// Returns the attribute at slot 0. +static __forceinline__ __device__ unsigned int optixGetAttribute_0(); +/// Returns the attribute at slot 1. +static __forceinline__ __device__ unsigned int optixGetAttribute_1(); +/// Returns the attribute at slot 2. +static __forceinline__ __device__ unsigned int optixGetAttribute_2(); +/// Returns the attribute at slot 3. +static __forceinline__ __device__ unsigned int optixGetAttribute_3(); +/// Returns the attribute at slot 4. +static __forceinline__ __device__ unsigned int optixGetAttribute_4(); +/// Returns the attribute at slot 5. +static __forceinline__ __device__ unsigned int optixGetAttribute_5(); +/// Returns the attribute at slot 6. +static __forceinline__ __device__ unsigned int optixGetAttribute_6(); +/// Returns the attribute at slot 7. +static __forceinline__ __device__ unsigned int optixGetAttribute_7(); + +/// Record the hit, stops traversal, and proceeds to CH. +/// +/// Available only in AH. +static __forceinline__ __device__ void optixTerminateRay(); + +/// Discards the hit, and returns control to the calling optixReportIntersection or built-in intersection routine. +/// +/// Available only in AH. +static __forceinline__ __device__ void optixIgnoreIntersection(); + + +/// For a given OptixBuildInputTriangleArray the number of primitives is defined as +/// "(OptixBuildInputTriangleArray::indexBuffer == 0) ? OptixBuildInputTriangleArray::numVertices/3 : +/// OptixBuildInputTriangleArray::numIndexTriplets;". +/// For a given OptixBuildInputCustomPrimitiveArray the number of primitives is defined as +/// numAabbs. +/// +/// The primitive index returns the index into the array of primitives +/// plus the primitiveIndexOffset. +/// +/// In IS and AH this corresponds to the currently intersected primitive. +/// In CH this corresponds to the primitive index of the closest intersected primitive. +static __forceinline__ __device__ unsigned int optixGetPrimitiveIndex(); + +/// Returns the Sbt GAS index of the primitive associated with the current intersection. +/// +/// In IS and AH this corresponds to the currently intersected primitive. +/// In CH this corresponds to the Sbt GAS index of the closest intersected primitive. +/// In EX with exception code OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_HIT_SBT corresponds to the sbt index within the hit GAS. Returns zero for all other exceptions. +static __forceinline__ __device__ unsigned int optixGetSbtGASIndex(); + + +/// Returns the OptixInstance::instanceId of the instance within the top level acceleration structure associated with the current intersection. +/// +/// When building an acceleration structure using OptixBuildInputInstanceArray each OptixInstance has a user supplied instanceId. +/// OptixInstance objects reference another acceleration structure. During traversal the acceleration structures are visited top down. +/// In the IS and AH programs the OptixInstance::instanceId corresponding to the most recently visited OptixInstance is returned when calling optixGetInstanceId(). +/// In CH optixGetInstanceId() returns the OptixInstance::instanceId when the hit was recorded with optixReportIntersection. +/// In the case where there is no OptixInstance visited, optixGetInstanceId returns ~0u +static __forceinline__ __device__ unsigned int optixGetInstanceId(); + +/// Returns the zero-based index of the instance within its instance acceleration structure associated with the current intersection. +/// +/// In the IS and AH programs the index corresponding to the most recently visited OptixInstance is returned when calling optixGetInstanceIndex(). +/// In CH optixGetInstanceIndex() returns the index when the hit was recorded with optixReportIntersection. +/// In the case where there is no OptixInstance visited, optixGetInstanceIndex returns 0 +static __forceinline__ __device__ unsigned int optixGetInstanceIndex(); + +/// Returns the 8 bit hit kind associated with the current hit. +/// +/// Use optixGetPrimitiveType() to interpret the hit kind. +/// For custom intersections (primitive type OPTIX_PRIMITIVE_TYPE_CUSTOM), +/// this is the 7-bit hitKind passed to optixReportIntersection(). +/// Hit kinds greater than 127 are reserved for built-in primitives. +/// +/// Available only in AH and CH. +static __forceinline__ __device__ unsigned int optixGetHitKind(); + +/// Function interpreting the result of #optixGetHitKind(). +static __forceinline__ __device__ OptixPrimitiveType optixGetPrimitiveType( unsigned int hitKind ); + +/// Function interpreting the result of #optixGetHitKind(). +static __forceinline__ __device__ bool optixIsFrontFaceHit( unsigned int hitKind ); + +/// Function interpreting the result of #optixGetHitKind(). +static __forceinline__ __device__ bool optixIsBackFaceHit( unsigned int hitKind ); + +/// Function interpreting the hit kind associated with the current optixReportIntersection. +static __forceinline__ __device__ OptixPrimitiveType optixGetPrimitiveType(); + +/// Function interpreting the hit kind associated with the current optixReportIntersection. +static __forceinline__ __device__ bool optixIsFrontFaceHit(); + +/// Function interpreting the hit kind associated with the current optixReportIntersection. +static __forceinline__ __device__ bool optixIsBackFaceHit(); + +/// Convenience function interpreting the result of #optixGetHitKind(). +static __forceinline__ __device__ bool optixIsTriangleHit(); + +/// Convenience function interpreting the result of #optixGetHitKind(). +static __forceinline__ __device__ bool optixIsTriangleFrontFaceHit(); + +/// Convenience function interpreting the result of #optixGetHitKind(). +static __forceinline__ __device__ bool optixIsTriangleBackFaceHit(); + +/// Convenience function interpreting the result of #optixGetHitKind(). +static __forceinline__ __device__ bool optixIsDisplacedMicromeshTriangleHit(); + +/// Convenience function interpreting the result of #optixGetHitKind(). +static __forceinline__ __device__ bool optixIsDisplacedMicromeshTriangleFrontFaceHit(); + +/// Convenience function interpreting the result of #optixGetHitKind(). +static __forceinline__ __device__ bool optixIsDisplacedMicromeshTriangleBackFaceHit(); + +/// Convenience function that returns the first two attributes as floats. +/// +/// When using OptixBuildInputTriangleArray objects, during intersection the barycentric +/// coordinates are stored into the first two attribute registers. +static __forceinline__ __device__ float2 optixGetTriangleBarycentrics(); + +/// Returns the curve parameter associated with the current intersection +/// when using OptixBuildInputCurveArray objects. +static __forceinline__ __device__ float optixGetCurveParameter(); + +/// Returns the ribbon parameters along directrix (length) and generator (width) of the current intersection +/// when using OptixBuildInputCurveArray objects with curveType OPTIX_PRIMITIVE_TYPE_FLAT_QUADRATIC_BSPLINE. +static __forceinline__ __device__ float2 optixGetRibbonParameters(); + +/// Available in any program, it returns the current launch index within the launch dimensions specified by optixLaunch on the host. +/// +/// The raygen program is typically only launched once per launch index. +static __forceinline__ __device__ uint3 optixGetLaunchIndex(); + +/// Available in any program, it returns the dimensions of the current launch specified by optixLaunch on the host. +static __forceinline__ __device__ uint3 optixGetLaunchDimensions(); + +/// Returns the generic memory space pointer to the data region (past the header) of the currently active SBT record corresponding to the current program. +static __forceinline__ __device__ CUdeviceptr optixGetSbtDataPointer(); + +/// Throws a user exception with the given exception code (overload without exception details). +/// +/// The exception code must be in the range from 0 to 2^30 - 1. Up to 8 optional exception details can be passed. They +/// can be queried in the EX program using optixGetExceptionDetail_0() to ..._8(). +/// +/// The exception details must not be used to encode pointers to the stack since the current stack is not preserved in +/// the EX program. +/// +/// Not available in EX. +/// +/// \param[in] exceptionCode The exception code to be thrown. +static __forceinline__ __device__ void optixThrowException( int exceptionCode ); + +/// Throws a user exception with the given exception code (overload with 1 exception detail). +/// +/// \see #optixThrowException(int) +static __forceinline__ __device__ void optixThrowException( int exceptionCode, unsigned int exceptionDetail0 ); + +/// Throws a user exception with the given exception code (overload with 2 exception details). +/// +/// \see #optixThrowException(int) +static __forceinline__ __device__ void optixThrowException( int exceptionCode, + unsigned int exceptionDetail0, + unsigned int exceptionDetail1 ); + +/// Throws a user exception with the given exception code (overload with 3 exception details). +/// +/// \see #optixThrowException(int) +static __forceinline__ __device__ void optixThrowException( int exceptionCode, + unsigned int exceptionDetail0, + unsigned int exceptionDetail1, + unsigned int exceptionDetail2 ); + +/// Throws a user exception with the given exception code (overload with 4 exception details). +/// +/// \see #optixThrowException(int) +static __forceinline__ __device__ void optixThrowException( int exceptionCode, + unsigned int exceptionDetail0, + unsigned int exceptionDetail1, + unsigned int exceptionDetail2, + unsigned int exceptionDetail3 ); + +/// Throws a user exception with the given exception code (overload with 5 exception details). +/// +/// \see #optixThrowException(int) +static __forceinline__ __device__ void optixThrowException( int exceptionCode, + unsigned int exceptionDetail0, + unsigned int exceptionDetail1, + unsigned int exceptionDetail2, + unsigned int exceptionDetail3, + unsigned int exceptionDetail4 ); + +/// Throws a user exception with the given exception code (overload with 6 exception details). +/// +/// \see #optixThrowException(int) +static __forceinline__ __device__ void optixThrowException( int exceptionCode, + unsigned int exceptionDetail0, + unsigned int exceptionDetail1, + unsigned int exceptionDetail2, + unsigned int exceptionDetail3, + unsigned int exceptionDetail4, + unsigned int exceptionDetail5 ); + +/// Throws a user exception with the given exception code (overload with 7 exception details). +/// +/// \see #optixThrowException(int) +static __forceinline__ __device__ void optixThrowException( int exceptionCode, + unsigned int exceptionDetail0, + unsigned int exceptionDetail1, + unsigned int exceptionDetail2, + unsigned int exceptionDetail3, + unsigned int exceptionDetail4, + unsigned int exceptionDetail5, + unsigned int exceptionDetail6 ); + +/// Throws a user exception with the given exception code (overload with 8 exception details). +/// +/// \see #optixThrowException(int) +static __forceinline__ __device__ void optixThrowException( int exceptionCode, + unsigned int exceptionDetail0, + unsigned int exceptionDetail1, + unsigned int exceptionDetail2, + unsigned int exceptionDetail3, + unsigned int exceptionDetail4, + unsigned int exceptionDetail5, + unsigned int exceptionDetail6, + unsigned int exceptionDetail7 ); + +/// Returns the exception code. +/// +/// Only available in EX. +static __forceinline__ __device__ int optixGetExceptionCode(); + +/// Returns the 32-bit exception detail at slot 0. +/// +/// The behavior is undefined if the exception is not a user exception, or the used overload #optixThrowException() did +/// not provide the queried exception detail. +/// +/// Only available in EX. +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_0(); + +/// Returns the 32-bit exception detail at slot 1. +/// +/// \see #optixGetExceptionDetail_0() +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_1(); + +/// Returns the 32-bit exception detail at slot 2. +/// +/// \see #optixGetExceptionDetail_0() +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_2(); + +/// Returns the 32-bit exception detail at slot 3. +/// +/// \see #optixGetExceptionDetail_0() +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_3(); + +/// Returns the 32-bit exception detail at slot 4. +/// +/// \see #optixGetExceptionDetail_0() +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_4(); + +/// Returns the 32-bit exception detail at slot 5. +/// +/// \see #optixGetExceptionDetail_0() +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_5(); + +/// Returns the 32-bit exception detail at slot 6. +/// +/// \see #optixGetExceptionDetail_0() +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_6(); + +/// Returns the 32-bit exception detail at slot 7. +/// +/// \see #optixGetExceptionDetail_0() +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_7(); + +/// Returns the invalid traversable handle for exceptions with exception code OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_TRAVERSABLE. +/// +/// Returns zero for all other exception codes. +/// +/// Only available in EX. +static __forceinline__ __device__ OptixTraversableHandle optixGetExceptionInvalidTraversable(); + +/// Returns the invalid sbt offset for exceptions with exception code OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_MISS_SBT and OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_HIT_SBT. +/// +/// Returns zero for all other exception codes. +/// +/// Only available in EX. +static __forceinline__ __device__ int optixGetExceptionInvalidSbtOffset(); + +/// Returns the invalid ray for exceptions with exception code OPTIX_EXCEPTION_CODE_INVALID_RAY. +/// Exceptions of type OPTIX_EXCEPTION_CODE_INVALID_RAY are thrown when one or more values that were +/// passed into optixTrace are either inf or nan. +/// +/// OptixInvalidRayExceptionDetails::rayTime will always be 0 if OptixPipelineCompileOptions::usesMotionBlur is 0. +/// Values in the returned struct are all zero for all other exception codes. +/// +/// Only available in EX. +static __forceinline__ __device__ OptixInvalidRayExceptionDetails optixGetExceptionInvalidRay(); + +/// Returns information about an exception with code OPTIX_EXCEPTION_CODE_CALLABLE_PARAMETER_MISMATCH. +/// +/// Exceptions of type OPTIX_EXCEPTION_CODE_CALLABLE_PARAMETER_MISMATCH are called when the number of +/// arguments that were passed into a call to optixDirectCall or optixContinuationCall does not match +/// the number of parameters of the callable that is called. +/// Note that the parameters are packed by OptiX into individual 32 bit values, so the number of +/// expected and passed values may not correspond to the number of arguments passed into optixDirectCall +/// or optixContinuationCall. +/// +/// Values in the returned struct are all zero for all other exception codes. +/// +/// Only available in EX. +static __forceinline__ __device__ OptixParameterMismatchExceptionDetails optixGetExceptionParameterMismatch(); + +/// Returns a string that includes information about the source location that caused the current exception. +/// +/// The source location is only available for exceptions of type OPTIX_EXCEPTION_CODE_CALLABLE_PARAMETER_MISMATCH, +/// OPTIX_EXCEPTION_CODE_UNSUPPORTED_PRIMITIVE_TYPE, OPTIX_EXCEPTION_CODE_INVALID_RAY, and for user exceptions. +/// Line information needs to be present in the input PTX and OptixModuleCompileOptions::debugLevel +/// may not be set to OPTIX_COMPILE_DEBUG_LEVEL_NONE. +/// +/// Returns a NULL pointer if no line information is available. +/// +/// Only available in EX. +static __forceinline__ __device__ char* optixGetExceptionLineInfo(); + +/// Creates a call to the direct callable program at the specified SBT entry. +/// +/// This will call the program that was specified in the OptixProgramGroupCallables::entryFunctionNameDC in the +/// module specified by OptixProgramGroupCallables::moduleDC. +/// The address of the SBT entry is calculated by OptixShaderBindingTable::callablesRecordBase + ( OptixShaderBindingTable::callablesRecordStrideInBytes * sbtIndex ). +/// +/// Behavior is undefined if there is no direct callable program at the specified SBT entry. +/// +/// Behavior is undefined if the number of arguments that are being passed in does not match the number of +/// parameters expected by the program that is called. In that case an exception of type OPTIX_EXCEPTION_CODE_CALLABLE_PARAMETER_MISMATCH +/// will be thrown if OPTIX_EXCEPTION_FLAG_DEBUG was specified for the OptixPipelineCompileOptions::exceptionFlags. +/// +/// \param[in] sbtIndex The offset of the SBT entry of the direct callable program to call relative to OptixShaderBindingTable::callablesRecordBase. +/// \param[in] args The arguments to pass to the direct callable program. +template +static __forceinline__ __device__ ReturnT optixDirectCall( unsigned int sbtIndex, ArgTypes... args ); + + +/// Creates a call to the continuation callable program at the specified SBT entry. +/// +/// This will call the program that was specified in the OptixProgramGroupCallables::entryFunctionNameCC in the +/// module specified by OptixProgramGroupCallables::moduleCC. +/// The address of the SBT entry is calculated by OptixShaderBindingTable::callablesRecordBase + ( OptixShaderBindingTable::callablesRecordStrideInBytes * sbtIndex ). +/// As opposed to direct callable programs, continuation callable programs are allowed to call optixTrace recursively. +/// +/// Behavior is undefined if there is no continuation callable program at the specified SBT entry. +/// +/// Behavior is undefined if the number of arguments that are being passed in does not match the number of +/// parameters expected by the program that is called. In that case an exception of type OPTIX_EXCEPTION_CODE_CALLABLE_PARAMETER_MISMATCH +/// will be thrown if OPTIX_EXCEPTION_FLAG_DEBUG was specified for the OptixPipelineCompileOptions::exceptionFlags. +/// +/// \param[in] sbtIndex The offset of the SBT entry of the continuation callable program to call relative to OptixShaderBindingTable::callablesRecordBase. +/// \param[in] args The arguments to pass to the continuation callable program. +template +static __forceinline__ __device__ ReturnT optixContinuationCall( unsigned int sbtIndex, ArgTypes... args ); + + +/// optixTexFootprint2D calculates the footprint of a corresponding 2D texture fetch (non-mipmapped). +/// +/// On Turing and subsequent architectures, a texture footprint instruction allows user programs to +/// determine the set of texels that would be accessed by an equivalent filtered texture lookup. +/// +/// \param[in] tex CUDA texture object (cast to 64-bit integer) +/// \param[in] texInfo Texture info packed into 32-bit integer, described below. +/// \param[in] x Texture coordinate +/// \param[in] y Texture coordinate +/// \param[out] singleMipLevel Result indicating whether the footprint spans only a single miplevel. +/// +/// The texture info argument is a packed 32-bit integer with the following layout: +/// +/// texInfo[31:29] = reserved (3 bits) +/// texInfo[28:24] = miplevel count (5 bits) +/// texInfo[23:20] = log2 of tile width (4 bits) +/// texInfo[19:16] = log2 of tile height (4 bits) +/// texInfo[15:10] = reserved (6 bits) +/// texInfo[9:8] = horizontal wrap mode (2 bits) (CUaddress_mode) +/// texInfo[7:6] = vertical wrap mode (2 bits) (CUaddress_mode) +/// texInfo[5] = mipmap filter mode (1 bit) (CUfilter_mode) +/// texInfo[4:0] = maximum anisotropy (5 bits) +/// +/// Returns a 16-byte structure (as a uint4) that stores the footprint of a texture request at a +/// particular "granularity", which has the following layout: +/// +/// struct Texture2DFootprint +/// { +/// unsigned long long mask; +/// unsigned int tileY : 12; +/// unsigned int reserved1 : 4; +/// unsigned int dx : 3; +/// unsigned int dy : 3; +/// unsigned int reserved2 : 2; +/// unsigned int granularity : 4; +/// unsigned int reserved3 : 4; +/// unsigned int tileX : 12; +/// unsigned int level : 4; +/// unsigned int reserved4 : 16; +/// }; +/// +/// The granularity indicates the size of texel groups that are represented by an 8x8 bitmask. For +/// example, a granularity of 12 indicates texel groups that are 128x64 texels in size. In a +/// footprint call, The returned granularity will either be the actual granularity of the result, or +/// 0 if the footprint call was able to honor the requested granularity (the usual case). +/// +/// level is the mip level of the returned footprint. Two footprint calls are needed to get the +/// complete footprint when a texture call spans multiple mip levels. +/// +/// mask is an 8x8 bitmask of texel groups that are covered, or partially covered, by the footprint. +/// tileX and tileY give the starting position of the mask in 8x8 texel-group blocks. For example, +/// suppose a granularity of 12 (128x64 texels), and tileX=3 and tileY=4. In this case, bit 0 of the +/// mask (the low order bit) corresponds to texel group coordinates (3*8, 4*8), and texel +/// coordinates (3*8*128, 4*8*64), within the specified mip level. +/// +/// If nonzero, dx and dy specify a "toroidal rotation" of the bitmask. Toroidal rotation of a +/// coordinate in the mask simply means that its value is reduced by 8. Continuing the example from +/// above, if dx=0 and dy=0 the mask covers texel groups (3*8, 4*8) to (3*8+7, 4*8+7) inclusive. +/// If, on the other hand, dx=2, the rightmost 2 columns in the mask have their x coordinates +/// reduced by 8, and similarly for dy. +/// +/// See the OptiX SDK for sample code that illustrates how to unpack the result. +static __forceinline__ __device__ uint4 optixTexFootprint2D( unsigned long long tex, unsigned int texInfo, float x, float y, unsigned int* singleMipLevel ); + +/// optixTexFootprint2DLod calculates the footprint of a corresponding 2D texture fetch (tex2DLod) +/// \param[in] tex CUDA texture object (cast to 64-bit integer) +/// \param[in] texInfo Texture info packed into 32-bit integer, described below. +/// \param[in] x Texture coordinate +/// \param[in] y Texture coordinate +/// \param[in] level Level of detail (lod) +/// \param[in] coarse Requests footprint from coarse miplevel, when the footprint spans two levels. +/// \param[out] singleMipLevel Result indicating whether the footprint spans only a single miplevel. +/// \see #optixTexFootprint2D(unsigned long long,unsigned int,float,float,unsigned int*) +static __forceinline__ __device__ uint4 +optixTexFootprint2DLod( unsigned long long tex, unsigned int texInfo, float x, float y, float level, bool coarse, unsigned int* singleMipLevel ); + +/// optixTexFootprint2DGrad calculates the footprint of a corresponding 2D texture fetch (tex2DGrad) +/// \param[in] tex CUDA texture object (cast to 64-bit integer) +/// \param[in] texInfo Texture info packed into 32-bit integer, described below. +/// \param[in] x Texture coordinate +/// \param[in] y Texture coordinate +/// \param[in] dPdx_x Derivative of x coordinte, which determines level of detail. +/// \param[in] dPdx_y Derivative of x coordinte, which determines level of detail. +/// \param[in] dPdy_x Derivative of y coordinte, which determines level of detail. +/// \param[in] dPdy_y Derivative of y coordinte, which determines level of detail. +/// \param[in] coarse Requests footprint from coarse miplevel, when the footprint spans two levels. +/// \param[out] singleMipLevel Result indicating whether the footprint spans only a single miplevel. +/// \see #optixTexFootprint2D(unsigned long long,unsigned int,float,float,unsigned int*) +static __forceinline__ __device__ uint4 optixTexFootprint2DGrad( unsigned long long tex, + unsigned int texInfo, + float x, + float y, + float dPdx_x, + float dPdx_y, + float dPdy_x, + float dPdy_y, + bool coarse, + unsigned int* singleMipLevel ); + +/**@}*/ // end group optix_device_api + +#define __OPTIX_INCLUDE_INTERNAL_HEADERS__ + +#include "internal/optix_device_impl.h" + +#endif // OPTIX_OPTIX_DEVICE_H diff --git a/zenovis/xinxinoptix/include/optix_function_table.h b/zenovis/xinxinoptix/include/optix_function_table.h index c43e819434..215186e27a 100644 --- a/zenovis/xinxinoptix/include/optix_function_table.h +++ b/zenovis/xinxinoptix/include/optix_function_table.h @@ -22,11 +22,11 @@ /// @author NVIDIA Corporation /// @brief OptiX public API header -#ifndef __optix_optix_function_table_h__ -#define __optix_optix_function_table_h__ +#ifndef OPTIX_OPTIX_FUNCTION_TABLE_H +#define OPTIX_OPTIX_FUNCTION_TABLE_H /// The OptiX ABI version. -#define OPTIX_ABI_VERSION 68 +#define OPTIX_ABI_VERSION 84 #ifndef OPTIX_DEFINE_ABI_VERSION_ONLY @@ -104,26 +104,26 @@ typedef struct OptixFunctionTable /// \name Modules //@ { - /// See ::optixModuleCreateFromPTX(). - OptixResult ( *optixModuleCreateFromPTX )( OptixDeviceContext context, - const OptixModuleCompileOptions* moduleCompileOptions, - const OptixPipelineCompileOptions* pipelineCompileOptions, - const char* PTX, - size_t PTXsize, - char* logString, - size_t* logStringSize, - OptixModule* module ); - - /// See ::optixModuleCreateFromPTXWithTasks(). - OptixResult ( *optixModuleCreateFromPTXWithTasks )( OptixDeviceContext context, - const OptixModuleCompileOptions* moduleCompileOptions, - const OptixPipelineCompileOptions* pipelineCompileOptions, - const char* PTX, - size_t PTXsize, - char* logString, - size_t* logStringSize, - OptixModule* module, - OptixTask* firstTask ); + /// See ::optixModuleCreate(). + OptixResult ( *optixModuleCreate )( OptixDeviceContext context, + const OptixModuleCompileOptions* moduleCompileOptions, + const OptixPipelineCompileOptions* pipelineCompileOptions, + const char* input, + size_t inputSize, + char* logString, + size_t* logStringSize, + OptixModule* module ); + + /// See ::optixModuleCreateWithTasks(). + OptixResult ( *optixModuleCreateWithTasks )( OptixDeviceContext context, + const OptixModuleCompileOptions* moduleCompileOptions, + const OptixPipelineCompileOptions* pipelineCompileOptions, + const char* input, + size_t inputSize, + char* logString, + size_t* logStringSize, + OptixModule* module, + OptixTask* firstTask ); /// See ::optixModuleGetCompilationState(). OptixResult ( *optixModuleGetCompilationState )( OptixModule module, OptixModuleCompileState* state ); @@ -164,7 +164,7 @@ typedef struct OptixFunctionTable OptixResult ( *optixProgramGroupDestroy )( OptixProgramGroup programGroup ); /// See ::optixProgramGroupGetStackSize(). - OptixResult ( *optixProgramGroupGetStackSize )( OptixProgramGroup programGroup, OptixStackSizes* stackSizes ); + OptixResult ( *optixProgramGroupGetStackSize )( OptixProgramGroup programGroup, OptixStackSizes* stackSizes, OptixPipeline pipeline ); //@ } /// \name Pipeline @@ -243,6 +243,11 @@ typedef struct OptixFunctionTable size_t outputBufferSizeInBytes, OptixTraversableHandle* outputHandle ); + OptixResult ( *optixAccelEmitProperty )( OptixDeviceContext context, + CUstream stream, + OptixTraversableHandle handle, + const OptixAccelEmitDesc* emittedProperty ); + /// See ::optixConvertPointerToTraversableHandle(). OptixResult ( *optixConvertPointerToTraversableHandle )( OptixDeviceContext onDevice, CUdeviceptr pointer, @@ -261,8 +266,8 @@ typedef struct OptixFunctionTable const OptixMicromapBuffers* buffers ); /// See ::optixOpacityMicromapArrayGetRelocationInfo(). - OptixResult ( *optixOpacityMicromapArrayGetRelocationInfo )( OptixDeviceContext context, - CUdeviceptr opacityMicromapArray, + OptixResult ( *optixOpacityMicromapArrayGetRelocationInfo )( OptixDeviceContext context, + CUdeviceptr opacityMicromapArray, OptixRelocationInfo* info ); /// See ::optixOpacityMicromapArrayRelocate(). @@ -272,8 +277,16 @@ typedef struct OptixFunctionTable CUdeviceptr targetOpacityMicromapArray, size_t targetOpacityMicromapArraySizeInBytes ); - void ( *reserved1 )( void ); - void ( *reserved2 )( void ); + /// See ::optixDisplacementMicromapArrayComputeMemoryUsage(). + OptixResult ( *optixDisplacementMicromapArrayComputeMemoryUsage )( OptixDeviceContext context, + const OptixDisplacementMicromapArrayBuildInput* buildInput, + OptixMicromapBufferSizes* bufferSizes ); + + /// See ::optixDisplacementMicromapArrayBuild(). + OptixResult ( *optixDisplacementMicromapArrayBuild )( OptixDeviceContext context, + CUstream stream, + const OptixDisplacementMicromapArrayBuildInput* buildInput, + const OptixMicromapBuffers* buffers ); //@ } /// \name Launch @@ -354,7 +367,7 @@ typedef struct OptixFunctionTable } OptixFunctionTable; -/*@}*/ // end group optix_function_table +/**@}*/ // end group optix_function_table #ifdef __cplusplus } @@ -362,4 +375,4 @@ typedef struct OptixFunctionTable #endif /* OPTIX_DEFINE_ABI_VERSION_ONLY */ -#endif /* __optix_optix_function_table_h__ */ +#endif /* OPTIX_OPTIX_FUNCTION_TABLE_H */ diff --git a/zenovis/xinxinoptix/include/optix_function_table_definition.h b/zenovis/xinxinoptix/include/optix_function_table_definition.h index 2d6f0bf6de..e1e11b307d 100644 --- a/zenovis/xinxinoptix/include/optix_function_table_definition.h +++ b/zenovis/xinxinoptix/include/optix_function_table_definition.h @@ -22,8 +22,8 @@ /// @author NVIDIA Corporation /// @brief OptiX public API header -#ifndef __optix_optix_function_table_definition_h__ -#define __optix_optix_function_table_definition_h__ +#ifndef OPTIX_OPTIX_FUNCTION_TABLE_DEFINITION_H +#define OPTIX_OPTIX_FUNCTION_TABLE_DEFINITION_H #include "optix_function_table.h" @@ -38,12 +38,12 @@ extern "C" { /// If the stubs in optix_stubs.h are used, then the function table needs to be defined in exactly /// one translation unit. This can be achieved by including this header file in that translation /// unit. -inline OptixFunctionTable g_optixFunctionTable; +OptixFunctionTable g_optixFunctionTable; -/*@}*/ // end group optix_function_table +/**@}*/ // end group optix_function_table #ifdef __cplusplus } #endif -#endif // __optix_optix_function_table_definition_h__ +#endif // OPTIX_OPTIX_FUNCTION_TABLE_DEFINITION_H diff --git a/zenovis/xinxinoptix/include/optix_host.h b/zenovis/xinxinoptix/include/optix_host.h index 2ed0c824fd..3efb53168c 100644 --- a/zenovis/xinxinoptix/include/optix_host.h +++ b/zenovis/xinxinoptix/include/optix_host.h @@ -1,4 +1,3 @@ - /* * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. * @@ -19,20 +18,1023 @@ * SUCH DAMAGES */ -/** - * @file optix_host.h - * @author NVIDIA Corporation - * @brief OptiX public API - * - * OptiX public API Reference - Host side - */ +/// @file +/// @author NVIDIA Corporation +/// @brief OptiX public API header +/// +/// OptiX host include file -- includes the host api if compiling host code. +/// For the math library routines include optix_math.h -#if !defined(__OPTIX_INCLUDE_INTERNAL_HEADERS__) -# define __OPTIX_INCLUDE_INTERNAL_HEADERS__ -# define __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_HOST_H__ +#ifndef OPTIX_OPTIX_HOST_H +#define OPTIX_OPTIX_HOST_H + +#include "optix_types.h" +#if !defined( OPTIX_DONT_INCLUDE_CUDA ) +// If OPTIX_DONT_INCLUDE_CUDA is defined, cuda driver types must be defined through other +// means before including optix headers. +#include #endif -#include "optix_7_host.h" -#if defined( __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_HOST_H__ ) -# undef __OPTIX_INCLUDE_INTERNAL_HEADERS__ -# undef __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_HOST_H__ + + + +#ifdef __cplusplus +extern "C" { #endif + +/// \defgroup optix_host_api Host API +/// \brief OptiX Host API + +/// \defgroup optix_host_api_error_handling Error handling +/// \ingroup optix_host_api +//@{ + +/// Returns a string containing the name of an error code in the enum. +/// +/// Output is a string representation of the enum. For example "OPTIX_SUCCESS" for +/// OPTIX_SUCCESS and "OPTIX_ERROR_INVALID_VALUE" for OPTIX_ERROR_INVALID_VALUE. +/// +/// If the error code is not recognized, "Unrecognized OptixResult code" is returned. +/// +/// \param[in] result OptixResult enum to generate string name for +/// +/// \see #optixGetErrorString +const char* optixGetErrorName( OptixResult result ); + +/// Returns the description string for an error code. +/// +/// Output is a string description of the enum. For example "Success" for +/// OPTIX_SUCCESS and "Invalid value" for OPTIX_ERROR_INVALID_VALUE. +/// +/// If the error code is not recognized, "Unrecognized OptixResult code" is returned. +/// +/// \param[in] result OptixResult enum to generate string description for +/// +/// \see #optixGetErrorName +const char* optixGetErrorString( OptixResult result ); + +//@} +/// \defgroup optix_host_api_device_context Device context +/// \ingroup optix_host_api +//@{ + +/// Create a device context associated with the CUDA context specified with 'fromContext'. +/// +/// If zero is specified for 'fromContext', OptiX will use the current CUDA context. The +/// CUDA context should be initialized before calling optixDeviceContextCreate. +/// +/// \param[in] fromContext +/// \param[in] options +/// \param[out] context +/// \return +/// - OPTIX_ERROR_CUDA_NOT_INITIALIZED +/// If using zero for 'fromContext' and CUDA has not been initialized yet on the calling +/// thread. +/// - OPTIX_ERROR_CUDA_ERROR +/// CUDA operation failed. +/// - OPTIX_ERROR_HOST_OUT_OF_MEMORY +/// Heap allocation failed. +/// - OPTIX_ERROR_INTERNAL_ERROR +/// Internal error +OptixResult optixDeviceContextCreate( CUcontext fromContext, const OptixDeviceContextOptions* options, OptixDeviceContext* context ); + +/// Destroys all CPU and GPU state associated with the device. +/// +/// It will attempt to block on CUDA streams that have launch work outstanding. +/// +/// Any API objects, such as OptixModule and OptixPipeline, not already destroyed will be +/// destroyed. +/// +/// Thread safety: A device context must not be destroyed while it is still in use by concurrent API calls in other threads. +OptixResult optixDeviceContextDestroy( OptixDeviceContext context ); + +/// Query properties of a device context. +/// +/// \param[in] context the device context to query the property for +/// \param[in] property the property to query +/// \param[out] value pointer to the returned +/// \param[in] sizeInBytes size of output +OptixResult optixDeviceContextGetProperty( OptixDeviceContext context, OptixDeviceProperty property, void* value, size_t sizeInBytes ); + +/// Sets the current log callback method. +/// +/// See #OptixLogCallback for more details. +/// +/// Thread safety: It is guaranteed that the callback itself (callbackFunction and callbackData) are updated atomically. +/// It is not guaranteed that the callback itself (callbackFunction and callbackData) and the callbackLevel are updated +/// atomically. It is unspecified when concurrent API calls using the same context start to make use of the new +/// callback method. +/// +/// \param[in] context the device context +/// \param[in] callbackFunction the callback function to call +/// \param[in] callbackData pointer to data passed to callback function while invoking it +/// \param[in] callbackLevel callback level +OptixResult optixDeviceContextSetLogCallback( OptixDeviceContext context, + OptixLogCallback callbackFunction, + void* callbackData, + unsigned int callbackLevel ); + +/// Enables or disables the disk cache. +/// +/// If caching was previously disabled, enabling it will attempt to initialize +/// the disk cache database using the currently configured cache location. An +/// error will be returned if initialization fails. +/// +/// Note that no in-memory cache is used, so no caching behavior will be observed if the disk cache +/// is disabled. +/// +/// The cache can be disabled by setting the environment variable OPTIX_CACHE_MAXSIZE=0. +/// The environment variable takes precedence over this setting. +/// See #optixDeviceContextSetCacheDatabaseSizes for additional information. +/// +/// Note that the disk cache can be disabled by the environment variable, but it cannot be enabled +/// via the environment if it is disabled via the API. +/// +/// \param[in] context the device context +/// \param[in] enabled 1 to enabled, 0 to disable +OptixResult optixDeviceContextSetCacheEnabled( OptixDeviceContext context, + int enabled ); + +/// Sets the location of the disk cache. +/// +/// The location is specified by a directory. This directory should not be used for other purposes +/// and will be created if it does not exist. An error will be returned if is not possible to +/// create the disk cache at the specified location for any reason (e.g., the path is invalid or +/// the directory is not writable). Caching will be disabled if the disk cache cannot be +/// initialized in the new location. If caching is disabled, no error will be returned until caching +/// is enabled. If the disk cache is located on a network file share, behavior is undefined. +/// +/// The location of the disk cache can be overridden with the environment variable OPTIX_CACHE_PATH. +/// The environment variable takes precedence over this setting. +/// +/// The default location depends on the operating system: +/// - Windows: %LOCALAPPDATA%\\NVIDIA\\OptixCache +/// - Linux: /var/tmp/OptixCache_\ (or /tmp/OptixCache_\ if the first choice is not usable), +/// the underscore and username suffix are omitted if the username cannot be obtained +/// - MacOS X: /Library/Application Support/NVIDIA/OptixCache +/// +/// \param[in] context the device context +/// \param[in] location directory of disk cache +OptixResult optixDeviceContextSetCacheLocation( OptixDeviceContext context, const char* location ); + +/// Sets the low and high water marks for disk cache garbage collection. +/// +/// Garbage collection is triggered when a new entry is written to the cache and +/// the current cache data size plus the size of the cache entry that is about +/// to be inserted exceeds the high water mark. Garbage collection proceeds until +/// the size reaches the low water mark. Garbage collection will always free enough +/// space to insert the new entry without exceeding the low water mark. Setting +/// either limit to zero will disable garbage collection. An error will be returned +/// if both limits are non-zero and the high water mark is smaller than the low water mark. +/// +/// Note that garbage collection is performed only on writes to the disk cache. No garbage +/// collection is triggered on disk cache initialization or immediately when calling this function, +/// but on subsequent inserting of data into the database. +/// +/// If the size of a compiled module exceeds the value configured for the high water +/// mark and garbage collection is enabled, the module will not be added to the cache +/// and a warning will be added to the log. +/// +/// The high water mark can be overridden with the environment variable OPTIX_CACHE_MAXSIZE. +/// The environment variable takes precedence over the function parameters. The low water mark +/// will be set to half the value of OPTIX_CACHE_MAXSIZE. Setting OPTIX_CACHE_MAXSIZE to 0 will +/// disable the disk cache, but will not alter the contents of the cache. Negative and non-integer +/// values will be ignored. +/// +/// \param[in] context the device context +/// \param[in] lowWaterMark the low water mark +/// \param[in] highWaterMark the high water mark +OptixResult optixDeviceContextSetCacheDatabaseSizes( OptixDeviceContext context, size_t lowWaterMark, size_t highWaterMark ); + +/// Indicates whether the disk cache is enabled or disabled. +/// +/// \param[in] context the device context +/// \param[out] enabled 1 if enabled, 0 if disabled +OptixResult optixDeviceContextGetCacheEnabled( OptixDeviceContext context, int* enabled ); +/// Returns the location of the disk cache. If the cache has been disabled by setting the environment +/// variable OPTIX_CACHE_MAXSIZE=0, this function will return an empy string. +/// +/// \param[in] context the device context +/// \param[out] location directory of disk cache, null terminated if locationSize > 0 +/// \param[in] locationSize locationSize +OptixResult optixDeviceContextGetCacheLocation( OptixDeviceContext context, char* location, size_t locationSize ); + +/// Returns the low and high water marks for disk cache garbage collection. If the cache has been disabled by +/// setting the environment variable OPTIX_CACHE_MAXSIZE=0, this function will return 0 for the low and high +/// water marks. +/// +/// \param[in] context the device context +/// \param[out] lowWaterMark the low water mark +/// \param[out] highWaterMark the high water mark +OptixResult optixDeviceContextGetCacheDatabaseSizes( OptixDeviceContext context, size_t* lowWaterMark, size_t* highWaterMark ); + +//@} +/// \defgroup optix_host_api_pipelines Pipelines +/// \ingroup optix_host_api +//@{ + +/// logString is an optional buffer that contains compiler feedback and errors. This +/// information is also passed to the context logger (if enabled), however it may be +/// difficult to correlate output to the logger to specific API invocations when using +/// multiple threads. The output to logString will only contain feedback for this specific +/// invocation of this API call. +/// +/// logStringSize as input should be a pointer to the number of bytes backing logString. +/// Upon return it contains the length of the log message (including the null terminator) +/// which may be greater than the input value. In this case, the log message will be +/// truncated to fit into logString. +/// +/// If logString or logStringSize are NULL, no output is written to logString. If +/// logStringSize points to a value that is zero, no output is written. This does not +/// affect output to the context logger if enabled. +/// +/// \param[in] context +/// \param[in] pipelineCompileOptions +/// \param[in] pipelineLinkOptions +/// \param[in] programGroups array of ProgramGroup objects +/// \param[in] numProgramGroups number of ProgramGroup objects +/// \param[out] logString Information will be written to this string. If logStringSize > 0 logString will be null terminated. +/// \param[in,out] logStringSize +/// \param[out] pipeline +OptixResult optixPipelineCreate( OptixDeviceContext context, + const OptixPipelineCompileOptions* pipelineCompileOptions, + const OptixPipelineLinkOptions* pipelineLinkOptions, + const OptixProgramGroup* programGroups, + unsigned int numProgramGroups, + char* logString, + size_t* logStringSize, + OptixPipeline* pipeline ); + +/// Thread safety: A pipeline must not be destroyed while it is still in use by concurrent API calls in other threads. +OptixResult optixPipelineDestroy( OptixPipeline pipeline ); + +/// Sets the stack sizes for a pipeline. +/// +/// Users are encouraged to see the programming guide and the implementations of the helper functions +/// to understand how to construct the stack sizes based on their particular needs. +/// +/// If this method is not used, an internal default implementation is used. The default implementation is correct (but +/// not necessarily optimal) as long as the maximum depth of call trees of CC and DC programs is at most 2 and no motion transforms are used. +/// +/// The maxTraversableGraphDepth responds to the maximal number of traversables visited when calling trace. +/// Every acceleration structure and motion transform count as one level of traversal. +/// E.g., for a simple IAS (instance acceleration structure) -> GAS (geometry acceleration structure) +/// traversal graph, the maxTraversableGraphDepth is two. +/// For IAS -> MT (motion transform) -> GAS, the maxTraversableGraphDepth is three. +/// Note that it does not matter whether a IAS or GAS has motion or not, it always counts as one. +/// Launching optix with exceptions turned on (see #OPTIX_EXCEPTION_FLAG_TRACE_DEPTH) will throw an exception +/// if the specified maxTraversableGraphDepth is too small. +/// +/// \param[in] pipeline The pipeline to configure the stack size for. +/// \param[in] directCallableStackSizeFromTraversal The direct stack size requirement for direct callables invoked from IS or AH. +/// \param[in] directCallableStackSizeFromState The direct stack size requirement for direct callables invoked from RG, MS, or CH. +/// \param[in] continuationStackSize The continuation stack requirement. +/// \param[in] maxTraversableGraphDepth The maximum depth of a traversable graph passed to trace. +OptixResult optixPipelineSetStackSize( OptixPipeline pipeline, + unsigned int directCallableStackSizeFromTraversal, + unsigned int directCallableStackSizeFromState, + unsigned int continuationStackSize, + unsigned int maxTraversableGraphDepth ); + +//@} +/// \defgroup optix_host_api_modules Modules +/// \ingroup optix_host_api +//@{ + +/// Compiling programs into a module. These programs can be passed in as either PTX or OptiX-IR. +/// +/// See the Programming Guide for details, as well as how to generate these encodings from CUDA sources. +/// +/// logString is an optional buffer that contains compiler feedback and errors. This +/// information is also passed to the context logger (if enabled), however it may be +/// difficult to correlate output to the logger to specific API invocations when using +/// multiple threads. The output to logString will only contain feedback for this specific +/// invocation of this API call. +/// +/// logStringSize as input should be a pointer to the number of bytes backing logString. +/// Upon return it contains the length of the log message (including the null terminator) +/// which may be greater than the input value. In this case, the log message will be +/// truncated to fit into logString. +/// +/// If logString or logStringSize are NULL, no output is written to logString. If +/// logStringSize points to a value that is zero, no output is written. This does not +/// affect output to the context logger if enabled. +/// +/// \param[in] context +/// \param[in] moduleCompileOptions +/// \param[in] pipelineCompileOptions All modules in a pipeline need to use the same values for the pipeline compile options. +/// \param[in] input Pointer to the input code. +/// \param[in] inputSize Parsing proceeds up to inputSize characters. Or, when reading PTX input, the first NUL byte, whichever occurs first. +/// \param[out] logString Information will be written to this string. If logStringSize > 0 logString will be null terminated. +/// \param[in,out] logStringSize +/// \param[out] module +/// +/// \return OPTIX_ERROR_INVALID_VALUE - context is 0, moduleCompileOptions is 0, pipelineCompileOptions is 0, input is 0, module is 0. +OptixResult optixModuleCreate( OptixDeviceContext context, + const OptixModuleCompileOptions* moduleCompileOptions, + const OptixPipelineCompileOptions* pipelineCompileOptions, + const char* input, + size_t inputSize, + char* logString, + size_t* logStringSize, + OptixModule* module ); + +/// This function is designed to do just enough work to create the OptixTask return +/// parameter and is expected to be fast enough run without needing parallel execution. A +/// single thread could generate all the OptixTask objects for further processing in a +/// work pool. +/// +/// Options are similar to #optixModuleCreate(), aside from the return parameter, +/// firstTask. +/// +/// The memory used to hold the input should be live until all tasks are finished. +/// +/// It is illegal to call #optixModuleDestroy() if any OptixTask objects are currently +/// being executed. In that case OPTIX_ERROR_ILLEGAL_DURING_TASK_EXECUTE will be returned. +/// +/// If an invocation of optixTaskExecute fails, the OptixModule will be marked as +/// OPTIX_MODULE_COMPILE_STATE_IMPENDING_FAILURE if there are outstanding tasks or +/// OPTIX_MODULE_COMPILE_STATE_FAILURE if there are no outstanding tasks. Subsequent calls +/// to #optixTaskExecute() may execute additional work to collect compilation errors +/// generated from the input. Currently executing tasks will not necessarily be terminated +/// immediately but at the next opportunity. + +/// Logging will continue to be directed to the logger installed with the +/// OptixDeviceContext. If logString is provided to #optixModuleCreateWithTasks(), +/// it will contain all the compiler feedback from all executed tasks. The lifetime of the +/// memory pointed to by logString should extend from calling +/// #optixModuleCreateWithTasks() to when the compilation state is either +/// OPTIX_MODULE_COMPILE_STATE_FAILURE or OPTIX_MODULE_COMPILE_STATE_COMPLETED. OptiX will +/// not write to the logString outside of execution of +/// #optixModuleCreateWithTasks() or #optixTaskExecute(). If the compilation state +/// is OPTIX_MODULE_COMPILE_STATE_IMPENDING_FAILURE and no further execution of +/// #optixTaskExecute() is performed the logString may be reclaimed by the application +/// before calling #optixModuleDestroy(). The contents of logString will contain output +/// from currently completed tasks. + +/// All OptixTask objects associated with a given OptixModule will be cleaned up when +/// #optixModuleDestroy() is called regardless of whether the compilation was successful +/// or not. If the compilation state is OPTIX_MODULE_COMPILE_STATE_IMPENDIND_FAILURE, any +/// unstarted OptixTask objects do not need to be executed though there is no harm doing +/// so. +/// +/// \see #optixModuleCreate +OptixResult optixModuleCreateWithTasks( OptixDeviceContext context, + const OptixModuleCompileOptions* moduleCompileOptions, + const OptixPipelineCompileOptions* pipelineCompileOptions, + const char* input, + size_t inputSize, + char* logString, + size_t* logStringSize, + OptixModule* module, + OptixTask* firstTask ); + +/// When creating a module with tasks, the current state of the module can be queried +/// using this function. +/// +/// Thread safety: Safe to call from any thread until optixModuleDestroy is called. +/// +/// \see #optixModuleCreateWithTasks +OptixResult optixModuleGetCompilationState( OptixModule module, OptixModuleCompileState* state ); + +/// Call for OptixModule objects created with optixModuleCreate and optixModuleDeserialize. +/// +/// Modules must not be destroyed while they are still used by any program group. +/// +/// Thread safety: A module must not be destroyed while it is still in use by concurrent API calls in other threads. +OptixResult optixModuleDestroy( OptixModule module ); + +/// Returns a module containing the intersection program for the built-in primitive type specified +/// by the builtinISOptions. This module must be used as the moduleIS for the OptixProgramGroupHitgroup +/// in any SBT record for that primitive type. (The entryFunctionNameIS should be null.) +OptixResult optixBuiltinISModuleGet( OptixDeviceContext context, + const OptixModuleCompileOptions* moduleCompileOptions, + const OptixPipelineCompileOptions* pipelineCompileOptions, + const OptixBuiltinISOptions* builtinISOptions, + OptixModule* builtinModule ); + +//@} +/// \defgroup optix_host_api_tasks Tasks +/// \ingroup optix_host_api +//@{ + +/// Each OptixTask should be executed with #optixTaskExecute(). If additional parallel +/// work is found, new OptixTask objects will be returned in additionalTasks along with +/// the number of additional tasks in numAdditionalTasksCreated. The parameter +/// additionalTasks should point to a user allocated array of minimum size +/// maxNumAdditionalTasks. OptiX can generate upto maxNumAdditionalTasks additional tasks. +/// +/// Each task can be executed in parallel and in any order. +/// +/// Thread safety: Safe to call from any thread until #optixModuleDestroy() is called for +/// any associated task. +/// +/// \see #optixModuleCreateWithTasks +/// +/// \param[in] task the OptixTask to execute +/// \param[in] additionalTasks pointer to array of OptixTask objects to be filled in +/// \param[in] maxNumAdditionalTasks maximum number of additional OptixTask objects +/// \param[out] numAdditionalTasksCreated number of OptixTask objects created by OptiX and written into #additionalTasks +OptixResult optixTaskExecute( OptixTask task, OptixTask* additionalTasks, unsigned int maxNumAdditionalTasks, unsigned int* numAdditionalTasksCreated ); + +//@} +/// \defgroup optix_host_api_program_groups Program groups +/// \ingroup optix_host_api +//@{ + +/// Returns the stack sizes for the given program group. When programs in this \p programGroup are relying on external functions, +/// the corresponding stack sizes can only be correctly retrieved when all functions are known after linking, i.e. when a pipeline +/// has been created. When \p pipeline is set to NULL, the stack size will be calculated excluding external functions. In this case +/// a warning will be issued if external functions are referenced by the OptixModule. +/// +/// \param[in] programGroup the program group +/// \param[out] stackSizes the corresponding stack sizes +/// \param[in] pipeline considering the program group within the given pipeline, can be NULL +OptixResult optixProgramGroupGetStackSize( OptixProgramGroup programGroup, OptixStackSizes* stackSizes, OptixPipeline pipeline ); + +/// logString is an optional buffer that contains compiler feedback and errors. This +/// information is also passed to the context logger (if enabled), however it may be +/// difficult to correlate output to the logger to specific API invocations when using +/// multiple threads. The output to logString will only contain feedback for this specific +/// invocation of this API call. +/// +/// logStringSize as input should be a pointer to the number of bytes backing logString. +/// Upon return it contains the length of the log message (including the null terminator) +/// which may be greater than the input value. In this case, the log message will be +/// truncated to fit into logString. +/// +/// If logString or logStringSize are NULL, no output is written to logString. If +/// logStringSize points to a value that is zero, no output is written. This does not +/// affect output to the context logger if enabled. +/// +/// Creates numProgramGroups OptiXProgramGroup objects from the specified +/// OptixProgramGroupDesc array. The size of the arrays must match. +/// +/// \param[in] context +/// \param[in] programDescriptions N * OptixProgramGroupDesc +/// \param[in] numProgramGroups N +/// \param[in] options +/// \param[out] logString Information will be written to this string. If logStringSize > 0 logString will be null terminated. +/// \param[in,out] logStringSize +/// \param[out] programGroups +OptixResult optixProgramGroupCreate( OptixDeviceContext context, + const OptixProgramGroupDesc* programDescriptions, + unsigned int numProgramGroups, + const OptixProgramGroupOptions* options, + char* logString, + size_t* logStringSize, + OptixProgramGroup* programGroups ); + +/// Thread safety: A program group must not be destroyed while it is still in use by concurrent API calls in other threads. +OptixResult optixProgramGroupDestroy( OptixProgramGroup programGroup ); + +//@} +/// \defgroup optix_host_api_launches Launches +/// \ingroup optix_host_api +//@{ + +/// Where the magic happens. +/// +/// The stream and pipeline must belong to the same device context. Multiple launches +/// may be issues in parallel from multiple threads to different streams. +/// +/// pipelineParamsSize number of bytes are copied from the device memory pointed to by +/// pipelineParams before launch. It is an error if pipelineParamsSize is greater than the +/// size of the variable declared in modules and identified by +/// OptixPipelineCompileOptions::pipelineLaunchParamsVariableName. If the launch params +/// variable was optimized out or not found in the modules linked to the pipeline then +/// the pipelineParams and pipelineParamsSize parameters are ignored. +/// +/// sbt points to the shader binding table, which defines shader +/// groupings and their resources. See the SBT spec. +/// +/// \param[in] pipeline +/// \param[in] stream +/// \param[in] pipelineParams +/// \param[in] pipelineParamsSize +/// \param[in] sbt +/// \param[in] width number of elements to compute +/// \param[in] height number of elements to compute +/// \param[in] depth number of elements to compute +/// +/// Thread safety: In the current implementation concurrent launches to the same pipeline are not +/// supported. Concurrent launches require separate OptixPipeline objects. +OptixResult optixLaunch( OptixPipeline pipeline, + CUstream stream, + CUdeviceptr pipelineParams, + size_t pipelineParamsSize, + const OptixShaderBindingTable* sbt, + unsigned int width, + unsigned int height, + unsigned int depth ); + +/// \param[in] programGroup the program group containing the program(s) +/// \param[out] sbtRecordHeaderHostPointer the result sbt record header +OptixResult optixSbtRecordPackHeader( OptixProgramGroup programGroup, void* sbtRecordHeaderHostPointer ); + +//@} +/// \defgroup optix_host_api_acceleration_structures Acceleration structures +/// \ingroup optix_host_api +//@{ + +/// \param[in] context +/// \param[in] accelOptions options for the accel build +/// \param[in] buildInputs an array of OptixBuildInput objects +/// \param[in] numBuildInputs number of elements in buildInputs (must be at least 1) +/// \param[out] bufferSizes fills in buffer sizes +OptixResult optixAccelComputeMemoryUsage( OptixDeviceContext context, + const OptixAccelBuildOptions* accelOptions, + const OptixBuildInput* buildInputs, + unsigned int numBuildInputs, + OptixAccelBufferSizes* bufferSizes ); + +/// \param[in] context +/// \param[in] stream +/// \param[in] accelOptions accel options +/// \param[in] buildInputs an array of OptixBuildInput objects +/// \param[in] numBuildInputs must be >= 1 for GAS, and == 1 for IAS +/// \param[in] tempBuffer must be a multiple of OPTIX_ACCEL_BUFFER_BYTE_ALIGNMENT +/// \param[in] tempBufferSizeInBytes +/// \param[in] outputBuffer must be a multiple of OPTIX_ACCEL_BUFFER_BYTE_ALIGNMENT +/// \param[in] outputBufferSizeInBytes +/// \param[out] outputHandle +/// \param[in] emittedProperties types of requested properties and output buffers +/// \param[in] numEmittedProperties number of post-build properties to populate (may be zero) +OptixResult optixAccelBuild( OptixDeviceContext context, + CUstream stream, + const OptixAccelBuildOptions* accelOptions, + const OptixBuildInput* buildInputs, + unsigned int numBuildInputs, + CUdeviceptr tempBuffer, + size_t tempBufferSizeInBytes, + CUdeviceptr outputBuffer, + size_t outputBufferSizeInBytes, + OptixTraversableHandle* outputHandle, + const OptixAccelEmitDesc* emittedProperties, + unsigned int numEmittedProperties ); + +/// Obtain relocation information, stored in OptixRelocationInfo, for a given context +/// and acceleration structure's traversable handle. +/// +/// The relocation information can be passed to optixCheckRelocationCompatibility to +/// determine if an acceleration structure, referenced by 'handle', can be relocated to a +/// different device's memory space (see #optixCheckRelocationCompatibility). +/// +/// When used with optixAccelRelocate, it provides data necessary for doing the relocation. +/// +/// If the acceleration structure data associated with 'handle' is copied multiple times, +/// the same OptixRelocationInfo can also be used on all copies. +/// +/// \param[in] context +/// \param[in] handle +/// \param[out] info +/// \return OPTIX_ERROR_INVALID_VALUE will be returned for traversable handles that are not from +/// acceleration structure builds. +OptixResult optixAccelGetRelocationInfo( OptixDeviceContext context, OptixTraversableHandle handle, OptixRelocationInfo* info ); + +/// Checks if an optix data structure built using another OptixDeviceContext (that was +/// used to fill in 'info') is compatible with the OptixDeviceContext specified in the +/// 'context' parameter. +/// +/// Any device is always compatible with itself. +/// +/// \param[in] context +/// \param[in] info +/// \param[out] compatible If OPTIX_SUCCESS is returned 'compatible' will have the value of either: +/// - 0: This context is not compatible with the optix data structure associated with 'info'. +/// - 1: This context is compatible. +OptixResult optixCheckRelocationCompatibility( OptixDeviceContext context, const OptixRelocationInfo* info, int* compatible ); + +/// optixAccelRelocate is called to update the acceleration structure after it has been +/// relocated. Relocation is necessary when the acceleration structure's location in device +/// memory has changed. optixAccelRelocate does not copy the memory. This function only +/// operates on the relocated memory whose new location is specified by 'targetAccel'. +/// optixAccelRelocate also returns the new OptixTraversableHandle associated with +/// 'targetAccel'. The original memory (source) is not required to be valid, only the +/// OptixRelocationInfo. +/// +/// Before calling optixAccelRelocate, optixCheckRelocationCompatibility should be +/// called to ensure the copy will be compatible with the destination device context. +/// +/// The memory pointed to by 'targetAccel' should be allocated with the same size as the +/// source acceleration. Similar to the 'outputBuffer' used in optixAccelBuild, this +/// pointer must be a multiple of OPTIX_ACCEL_BUFFER_BYTE_ALIGNMENT. +/// +/// The memory in 'targetAccel' must be allocated as long as the accel is in use. +/// +/// The instance traversables referenced by an IAS and the +/// micromaps referenced by a triangle GAS may themselves require relocation. +/// 'relocateInputs' and 'numRelocateInputs' should be used to specify the relocated +/// traversables and micromaps. After relocation, the relocated accel will reference +/// these relocated traversables and micromaps instead of their sources. +/// The number of relocate inputs 'numRelocateInputs' must match the number of build +/// inputs 'numBuildInputs' used to build the source accel. Relocation inputs +/// correspond with build inputs used to build the source accel and should appear in +/// the same order (see #optixAccelBuild). +/// 'relocateInputs' and 'numRelocateInputs' may be zero, preserving any references +/// to traversables and micromaps from the source accel. +/// +/// \param[in] context +/// \param[in] stream +/// \param[in] info +/// \param[in] relocateInputs +/// \param[in] numRelocateInputs +/// \param[in] targetAccel +/// \param[in] targetAccelSizeInBytes +/// \param[out] targetHandle +OptixResult optixAccelRelocate( OptixDeviceContext context, + CUstream stream, + const OptixRelocationInfo* info, + const OptixRelocateInput* relocateInputs, + size_t numRelocateInputs, + CUdeviceptr targetAccel, + size_t targetAccelSizeInBytes, + OptixTraversableHandle* targetHandle ); + +/// After building an acceleration structure, it can be copied in a compacted form to reduce +/// memory. In order to be compacted, OPTIX_BUILD_FLAG_ALLOW_COMPACTION must be supplied in +/// OptixAccelBuildOptions::buildFlags passed to optixAccelBuild. +/// +/// 'outputBuffer' is the pointer to where the compacted acceleration structure will be +/// written. This pointer must be a multiple of OPTIX_ACCEL_BUFFER_BYTE_ALIGNMENT. +/// +/// The size of the memory specified in 'outputBufferSizeInBytes' should be at least the +/// value computed using the OPTIX_PROPERTY_TYPE_COMPACTED_SIZE that was reported during +/// optixAccelBuild. +/// +/// \param[in] context +/// \param[in] stream +/// \param[in] inputHandle +/// \param[in] outputBuffer +/// \param[in] outputBufferSizeInBytes +/// \param[out] outputHandle +OptixResult optixAccelCompact( OptixDeviceContext context, + CUstream stream, + OptixTraversableHandle inputHandle, + CUdeviceptr outputBuffer, + size_t outputBufferSizeInBytes, + OptixTraversableHandle* outputHandle ); + +/// Emit a single property after an acceleration structure was built. +/// The result buffer of the ' emittedProperty' needs to be large enough to hold the +/// requested property (\see #OptixAccelPropertyType). +/// +/// \param[in] context +/// \param[in] stream +/// \param[in] handle +/// \param[in] emittedProperty type of requested property and output buffer +OptixResult optixAccelEmitProperty( OptixDeviceContext context, + CUstream stream, + OptixTraversableHandle handle, + const OptixAccelEmitDesc* emittedProperty ); + +/// \param[in] onDevice +/// \param[in] pointer pointer to traversable allocated in OptixDeviceContext. This pointer must be a multiple of OPTIX_TRANSFORM_BYTE_ALIGNMENT +/// \param[in] traversableType Type of OptixTraversableHandle to create +/// \param[out] traversableHandle traversable handle. traversableHandle must be in host memory +OptixResult optixConvertPointerToTraversableHandle( OptixDeviceContext onDevice, + CUdeviceptr pointer, + OptixTraversableType traversableType, + OptixTraversableHandle* traversableHandle ); + + +/// Determine the amount of memory necessary for a Opacity Micromap Array build. +/// +/// \param[in] context +/// \param[in] buildInput +/// \param[out] bufferSizes +OptixResult optixOpacityMicromapArrayComputeMemoryUsage( OptixDeviceContext context, + const OptixOpacityMicromapArrayBuildInput* buildInput, + OptixMicromapBufferSizes* bufferSizes ); + +/// Construct an array of Opacity Micromaps. +/// +/// Each triangle within an instance/GAS may reference one opacity micromap to give finer +/// control over alpha behavior. A opacity micromap consists of a set of 4^N micro-triangles +/// in a triangular uniform barycentric grid. Multiple opacity micromaps are collected (built) +/// into a opacity micromap array with this function. Each geometry in a GAS may bind a +/// single opacity micromap array and can use opacity micromaps from that array only. +/// +/// Each micro-triangle within a opacity micromap can be in one of four states: Transparent, +/// Opaque, Unknown-Transparent or Unknown-Opaque. During traversal, if a triangle with a +/// opacity micromap attached is intersected, the opacity micromap is queried to categorize +/// the hit as either opaque, unknown (alpha) or a miss. Geometry, ray or instance flags that +/// modify the alpha/opaque behavior are applied _after_ this opacity micromap query. +/// +/// The opacity micromap query may operate in 2-state mode (alpha testing) or 4-state mode (AHS culling), +/// depending on the opacity micromap type and ray/instance flags. When operating in 2-state +/// mode, alpha hits will not be reported, and transparent and opaque hits must be accurate. +/// +/// \param[in] context +/// \param[in] stream +/// \param[in] buildInput a single build input object referencing many opacity micromaps +/// \param[in] buffers the buffers used for build +/// \param[in/out] emittedProperties types of requested properties and output buffers +/// \param[in] numEmittedProperties number of post-build properties to populate (may be zero) +OptixResult optixOpacityMicromapArrayBuild( OptixDeviceContext context, + CUstream stream, + const OptixOpacityMicromapArrayBuildInput* buildInput, + const OptixMicromapBuffers* buffers ); + +/// Obtain relocation information, stored in OptixRelocationInfo, for a given context +/// and opacity micromap array. +/// +/// The relocation information can be passed to optixCheckRelocationCompatibility to +/// determine if a opacity micromap array, referenced by buffers, can be relocated to a +/// different device's memory space (see #optixCheckRelocationCompatibility). +/// +/// When used with optixOpacityMicromapArrayRelocate, it provides data necessary for doing the relocation. +/// +/// If the opacity micromap array data associated with 'opacityMicromapArray' is copied multiple times, +/// the same OptixRelocationInfo can also be used on all copies. +/// +/// \param[in] context +/// \param[in] opacityMicromapArray +/// \param[out] info +OptixResult optixOpacityMicromapArrayGetRelocationInfo( OptixDeviceContext context, CUdeviceptr opacityMicromapArray, OptixRelocationInfo* info ); + +/// optixOpacityMicromapArrayRelocate is called to update the opacity micromap array after it has been +/// relocated. Relocation is necessary when the opacity micromap array's location in device +/// memory has changed. optixOpacityMicromapArrayRelocate does not copy the memory. This function only +/// operates on the relocated memory whose new location is specified by 'targetOpacityMicromapArray'. +/// The original memory (source) is not required to be valid, only the +/// OptixRelocationInfo. +/// +/// Before calling optixOpacityMicromapArrayRelocate, optixCheckRelocationCompatibility should be called +/// to ensure the copy will be compatible with the destination device context. +/// +/// The memory pointed to by 'targetOpacityMicromapArray' should be allocated with the same size as the +/// source opacity micromap array. Similar to the 'OptixMicromapBuffers::output' used in optixOpacityMicromapArrayBuild, +/// this pointer must be a multiple of OPTIX_OPACITY_MICROMAP_ARRAY_BUFFER_BYTE_ALIGNMENT. +/// +/// The memory in 'targetOpacityMicromapArray' must be allocated as long as the opacity micromap array is in use. +/// +/// Note that any Acceleration Structures build using the original memory (source) as input will +/// still be associated with this original memory. To associate an existing (possibly relocated) +/// Acceleration Structures with the relocated opacity micromap array, use optixAccelBuild +/// to update the existing Acceleration Structures (See OPTIX_BUILD_OPERATION_UPDATE) +/// +/// \param[in] context +/// \param[in] stream +/// \param[in] info +/// \param[in] targetOpacityMicromapArray +/// \param[in] targetOpacityMicromapArraySizeInBytes +OptixResult optixOpacityMicromapArrayRelocate( OptixDeviceContext context, + CUstream stream, + const OptixRelocationInfo* info, + CUdeviceptr targetOpacityMicromapArray, + size_t targetOpacityMicromapArraySizeInBytes ); + +/// Determine the amount of memory necessary for a Displacement Micromap Array build. +/// +/// \param[in] context +/// \param[in] buildInput +/// \param[out] bufferSizes +OptixResult optixDisplacementMicromapArrayComputeMemoryUsage( OptixDeviceContext context, + const OptixDisplacementMicromapArrayBuildInput* buildInput, + OptixMicromapBufferSizes* bufferSizes ); + +/// FIXME +/// Construct an array of Displacement Micromap (DMMs). +/// +/// Each triangle within a DMM GAS geometry references one DMM that specifies how to subdivide it into micro-triangles. +/// A DMM gives a subdivision resolution into 4^N micro-triangles, and displacement values for each of the vertices +/// in the subdivided mesh. The values are combined with e.g. normal vectors, scale and bias given as AS build inputs, +/// to get the final geometry. A DMM is encoded in one or more compressed blocks, each block having displacement values +/// for a subtriangle of 64..1024 micro-triangles. +/// +/// \param[in] context +/// \param[in] stream +/// \param[in] buildInput a single build input object referencing many DMMs +/// \param[in] buffers the buffers used for build +OptixResult optixDisplacementMicromapArrayBuild( OptixDeviceContext context, + CUstream stream, + const OptixDisplacementMicromapArrayBuildInput* buildInput, + const OptixMicromapBuffers* buffers ); + + +//@} +/// \defgroup optix_host_api_denoiser Denoiser +/// \ingroup optix_host_api +//@{ + +/// Creates a denoiser object with the given options, using built-in inference models +/// +/// 'modelKind' selects the model used for inference. +/// Inference for the built-in models can be guided (giving hints to improve image quality) with +/// albedo and normal vector images in the guide layer (see 'optixDenoiserInvoke'). +/// Use of these images must be enabled in 'OptixDenoiserOptions'. +/// +/// \param[in] context +/// \param[in] modelKind +/// \param[in] options +/// \param[out] denoiser +OptixResult optixDenoiserCreate( OptixDeviceContext context, + OptixDenoiserModelKind modelKind, + const OptixDenoiserOptions* options, + OptixDenoiser* denoiser ); + +/// Creates a denoiser object with the given options, using a provided inference model +/// +/// 'userData' and 'userDataSizeInBytes' provide a user model for inference. +/// The memory passed in userData will be accessed only during the invocation of this function and +/// can be freed after it returns. +/// The user model must export only one weight set which determines both the model kind and the +/// required set of guide images. +/// +/// \param[in] context +/// \param[in] userData +/// \param[in] userDataSizeInBytes +/// \param[out] denoiser +OptixResult optixDenoiserCreateWithUserModel( OptixDeviceContext context, + const void* userData, size_t userDataSizeInBytes, OptixDenoiser* denoiser ); + +/// Destroys the denoiser object and any associated host resources. +OptixResult optixDenoiserDestroy( OptixDenoiser denoiser ); + +/// Computes the GPU memory resources required to execute the denoiser. +/// +/// Memory for state and scratch buffers must be allocated with the sizes in 'returnSizes' and scratch memory +/// passed to optixDenoiserSetup, optixDenoiserInvoke, +/// optixDenoiserComputeIntensity and optixDenoiserComputeAverageColor. +/// For tiled denoising an overlap area ('overlapWindowSizeInPixels') must be added to each tile on all sides +/// which increases the amount of +/// memory needed to denoise a tile. In case of tiling use withOverlapScratchSizeInBytes for scratch memory size. +/// If only full resolution images are denoised, withoutOverlapScratchSizeInBytes can be used which is always +/// smaller than withOverlapScratchSizeInBytes. +/// +/// 'outputWidth' and 'outputHeight' is the dimension of the image to be denoised (without overlap in case tiling +/// is being used). +/// 'outputWidth' and 'outputHeight' must be greater than or equal to the dimensions passed to optixDenoiserSetup. +/// +/// \param[in] denoiser +/// \param[in] outputWidth +/// \param[in] outputHeight +/// \param[out] returnSizes +OptixResult optixDenoiserComputeMemoryResources( const OptixDenoiser denoiser, + unsigned int outputWidth, + unsigned int outputHeight, + OptixDenoiserSizes* returnSizes ); + +/// Initializes the state required by the denoiser. +/// +/// 'inputWidth' and 'inputHeight' must include overlap on both sides of the image if tiling is being used. The overlap is +/// returned by #optixDenoiserComputeMemoryResources. +/// For subsequent calls to #optixDenoiserInvoke 'inputWidth' and 'inputHeight' are the maximum dimensions +/// of the input layers. Dimensions of the input layers passed to #optixDenoiserInvoke may be different in each +/// invocation however they always must be smaller than 'inputWidth' and 'inputHeight' passed to #optixDenoiserSetup. +/// +/// \param[in] denoiser +/// \param[in] stream +/// \param[in] inputWidth +/// \param[in] inputHeight +/// \param[in] denoiserState +/// \param[in] denoiserStateSizeInBytes +/// \param[in] scratch +/// \param[in] scratchSizeInBytes +OptixResult optixDenoiserSetup( OptixDenoiser denoiser, + CUstream stream, + unsigned int inputWidth, + unsigned int inputHeight, + CUdeviceptr denoiserState, + size_t denoiserStateSizeInBytes, + CUdeviceptr scratch, + size_t scratchSizeInBytes ); + +/// Invokes denoiser on a set of input data and produces at least one output image. +/// State memory must be available during the execution of the +/// denoiser (or until optixDenoiserSetup is called with a new state memory pointer). +/// Scratch memory passed is used only for the duration of this function. +/// Scratch and state memory sizes must have a size greater than or equal to the sizes as returned by +/// optixDenoiserComputeMemoryResources. +/// +/// 'inputOffsetX' and 'inputOffsetY' are pixel offsets in the 'inputLayers' image +/// specifying the beginning of the image without overlap. When denoising an entire image without tiling +/// there is no overlap and 'inputOffsetX' and 'inputOffsetY' must be zero. When denoising a tile which is +/// adjacent to one of the four sides of the entire image the corresponding offsets must also be zero since +/// there is no overlap at the side adjacent to the image border. +/// +/// 'guideLayer' provides additional information to the denoiser. When providing albedo and normal vector +/// guide images, the corresponding fields in the 'OptixDenoiserOptions' must be +/// enabled, see #optixDenoiserCreate. +/// 'guideLayer' must not be null. If a guide image in 'OptixDenoiserOptions' is not enabled, the +/// corresponding image in 'OptixDenoiserGuideLayer' is ignored. +/// +/// If OPTIX_DENOISER_MODEL_KIND_TEMPORAL or OPTIX_DENOISER_MODEL_KIND_TEMPORAL_AOV is selected, a 2d flow +/// image must be given in 'OptixDenoiserGuideLayer'. +/// It describes for each pixel the flow from the previous to the current frame (a 2d vector in pixel space). +/// The denoised beauty/AOV of the previous frame must be given in 'previousOutput'. +/// If this image is not available in the first frame of a sequence, the noisy beauty/AOV from the first frame +/// and zero flow vectors could be given as a substitute. +/// For non-temporal model kinds the flow image in 'OptixDenoiserGuideLayer' is ignored. +/// 'previousOutput' and +/// 'output' may refer to the same buffer if tiling is not used, i.e. 'previousOutput' is first read by this function and later +/// overwritten with the denoised result. 'output' can be passed as 'previousOutput' to the next frame. +/// In other model kinds (not temporal) 'previousOutput' is ignored. +/// +/// The beauty layer must be given as the first entry in 'layers'. +/// In AOV type model kinds (OPTIX_DENOISER_MODEL_KIND_AOV or in user defined models implementing +/// kernel-prediction) additional layers for the AOV images can be given. +/// In each layer the noisy input image is given in 'input', the denoised output is written into the +/// 'output' image. input and output images may refer to the same buffer, with the restriction that +/// the pixel formats must be identical for input and output when the blend mode is selected (see +/// #OptixDenoiserParams). +/// +/// If OPTIX_DENOISER_MODEL_KIND_TEMPORAL or OPTIX_DENOISER_MODEL_KIND_TEMPORAL_AOV is selected, the denoised +/// image from the previous frame must be given in 'previousOutput' in the layer. 'previousOutput' and +/// 'output' may refer to the same buffer if tiling is not used, i.e. 'previousOutput' is first read by this function and later +/// overwritten with the denoised result. 'output' can be passed as 'previousOutput' to the next frame. +/// In addition, 'previousOutputInternalGuideLayer' and 'outputInternalGuideLayer' must both be allocated regardless +/// of tiling mode. The pixel format must be OPTIX_PIXEL_FORMAT_INTERNAL_GUIDE_LAYER and the dimension must be identical to +/// to the other input layers. In the first frame memory in 'previousOutputInternalGuideLayer' must either contain valid +/// data from previous denoiser runs or set to zero. +/// In other model kinds (not temporal) 'previousOutput' and the internal guide layers are ignored. +/// +/// If OPTIX_DENOISER_MODEL_KIND_TEMPORAL or OPTIX_DENOISER_MODEL_KIND_TEMPORAL_AOV is selected, the +/// normal vector guide image must be given as 3d vectors in camera space. In the other models only +/// the x and y channels are used and other channels are ignored. +/// +/// \param[in] denoiser +/// \param[in] stream +/// \param[in] params +/// \param[in] denoiserState +/// \param[in] denoiserStateSizeInBytes +/// \param[in] guideLayer +/// \param[in] layers +/// \param[in] numLayers +/// \param[in] inputOffsetX +/// \param[in] inputOffsetY +/// \param[in] scratch +/// \param[in] scratchSizeInBytes +OptixResult optixDenoiserInvoke( OptixDenoiser denoiser, + CUstream stream, + const OptixDenoiserParams* params, + CUdeviceptr denoiserState, + size_t denoiserStateSizeInBytes, + const OptixDenoiserGuideLayer* guideLayer, + const OptixDenoiserLayer* layers, + unsigned int numLayers, + unsigned int inputOffsetX, + unsigned int inputOffsetY, + CUdeviceptr scratch, + size_t scratchSizeInBytes ); + +/// Computes the logarithmic average intensity of the given image. The returned value 'outputIntensity' +/// is multiplied with the RGB values of the input image/tile in optixDenoiserInvoke if given in the parameter +/// OptixDenoiserParams::hdrIntensity (otherwise 'hdrIntensity' must be a null pointer). This is useful for +/// denoising HDR images which are very dark or bright. +/// When denoising tiles the intensity of the entire image should be computed, i.e. not per tile to get +/// consistent results. +/// +/// For each RGB pixel in the inputImage the intensity is calculated and summed if it is greater than 1e-8f: +/// intensity = log(r * 0.212586f + g * 0.715170f + b * 0.072200f). +/// The function returns 0.18 / exp(sum of intensities / number of summed pixels). +/// More details could be found in the Reinhard tonemapping paper: +/// http://www.cmap.polytechnique.fr/~peyre/cours/x2005signal/hdr_photographic.pdf +/// +/// The size of scratch memory required can be queried with #optixDenoiserComputeMemoryResources. +/// +/// data type unsigned char is not supported for 'inputImage', it must be 3 or 4 component half/float. +/// +/// \param[in] denoiser +/// \param[in] stream +/// \param[in] inputImage +/// \param[out] outputIntensity single float +/// \param[in] scratch +/// \param[in] scratchSizeInBytes +OptixResult optixDenoiserComputeIntensity( OptixDenoiser denoiser, + CUstream stream, + const OptixImage2D* inputImage, + CUdeviceptr outputIntensity, + CUdeviceptr scratch, + size_t scratchSizeInBytes ); + +/// Compute average logarithmic for each of the first three channels for the given image. +/// When denoising tiles the intensity of the entire image should be computed, i.e. not per tile to get +/// consistent results. +/// +/// The size of scratch memory required can be queried with #optixDenoiserComputeMemoryResources. +/// +/// data type unsigned char is not supported for 'inputImage', it must be 3 or 4 component half/float. +/// +/// \param[in] denoiser +/// \param[in] stream +/// \param[in] inputImage +/// \param[out] outputAverageColor three floats +/// \param[in] scratch +/// \param[in] scratchSizeInBytes +OptixResult optixDenoiserComputeAverageColor( OptixDenoiser denoiser, + CUstream stream, + const OptixImage2D* inputImage, + CUdeviceptr outputAverageColor, + CUdeviceptr scratch, + size_t scratchSizeInBytes ); + +//@} + +#ifdef __cplusplus +} +#endif + +#include "optix_function_table.h" + +#endif // OPTIX_OPTIX_HOST_H diff --git a/zenovis/xinxinoptix/include/optix_micromap.h b/zenovis/xinxinoptix/include/optix_micromap.h index 85ddfb3c26..87866ff3b9 100644 --- a/zenovis/xinxinoptix/include/optix_micromap.h +++ b/zenovis/xinxinoptix/include/optix_micromap.h @@ -34,8 +34,8 @@ * OptiX micromap helper functions. Useable on either host or device. */ -#ifndef __optix_optix_micromap_h__ -#define __optix_optix_micromap_h__ +#ifndef OPTIX_OPTIX_MICROMAP_H +#define OPTIX_OPTIX_MICROMAP_H #if !defined( OPTIX_DONT_INCLUDE_CUDA ) // If OPTIX_DONT_INCLUDE_CUDA is defined, cuda driver type float2 must be defined through other @@ -44,23 +44,31 @@ #endif #include "internal/optix_micromap_impl.h" -/// Convert a micromap triangle index to three base-triangle barycentric coordinates of the micro triangle vertices. +/// Converts a micromap triangle index to the three base-triangle barycentric coordinates of the micro-triangle vertices in the base triangle. /// The base triangle is the triangle that the micromap is applied to. +/// Note that for displaced micro-meshes this function can be used to compute a UV mapping from sub triangle to base triangle. /// -/// \param[in] microTriangleIndex Index of a micro triangle withing a micromap. -/// \param[in] subdivisionLevel Subdivision level of the micromap. -/// \param[out] baseBarycentrics0 Barycentric coordinates in the space of the base triangle of vertex 0 of the micro triangle. -/// \param[out] baseBarycentrics1 Barycentric coordinates in the space of the base triangle of vertex 1 of the micro triangle. -/// \param[out] baseBarycentrics2 Barycentric coordinates in the space of the base triangle of vertex 2 of the micro triangle. -OPTIX_MICROMAP_INLINE_FUNC void optixMicromapIndexToBaseBarycentrics( uint32_t microTriangleIndex, +/// \param[in] micromapTriangleIndex Index of a micro- or sub triangle within a micromap. +/// \param[in] subdivisionLevel Number of subdivision levels of the micromap or number of subdivision levels being considered (for sub triangles). +/// \param[out] baseBarycentrics0 Barycentric coordinates in the space of the base triangle of vertex 0 of the micromap triangle. +/// \param[out] baseBarycentrics1 Barycentric coordinates in the space of the base triangle of vertex 1 of the micromap triangle. +/// \param[out] baseBarycentrics2 Barycentric coordinates in the space of the base triangle of vertex 2 of the micromap triangle. +OPTIX_MICROMAP_INLINE_FUNC void optixMicromapIndexToBaseBarycentrics( uint32_t micromapTriangleIndex, uint32_t subdivisionLevel, float2& baseBarycentrics0, float2& baseBarycentrics1, float2& baseBarycentrics2 ) { - optix_impl:: - micro2bary( microTriangleIndex, subdivisionLevel, baseBarycentrics0, baseBarycentrics1, baseBarycentrics2 ); + optix_impl::micro2bary( micromapTriangleIndex, subdivisionLevel, baseBarycentrics0, baseBarycentrics1, baseBarycentrics2 ); } +/// Maps barycentrics in the space of the base triangle to barycentrics of a micro triangle. +/// The vertices of the micro triangle are defined by its barycentrics in the space of the base triangle. +/// These can be queried for a DMM hit by using optixGetMicroTriangleBarycentricsData(). +OPTIX_MICROMAP_INLINE_FUNC float2 optixBaseBarycentricsToMicroBarycentrics( float2 baseBarycentrics, + float2 microVertexBaseBarycentrics[3] ) +{ + return optix_impl::base2micro( baseBarycentrics, microVertexBaseBarycentrics ); +} -#endif // __optix_optix_micromap_h__ +#endif // OPTIX_OPTIX_MICROMAP_H diff --git a/zenovis/xinxinoptix/include/optix_stack_size.h b/zenovis/xinxinoptix/include/optix_stack_size.h index a3ab70dfc2..eafd4a4295 100644 --- a/zenovis/xinxinoptix/include/optix_stack_size.h +++ b/zenovis/xinxinoptix/include/optix_stack_size.h @@ -30,8 +30,8 @@ /// @author NVIDIA Corporation /// @brief OptiX public API header -#ifndef __optix_optix_stack_size_h__ -#define __optix_optix_stack_size_h__ +#ifndef OPTIX_OPTIX_STACK_SIZE_H +#define OPTIX_OPTIX_STACK_SIZE_H #include "optix.h" @@ -49,13 +49,15 @@ extern "C" { /// Retrieves direct and continuation stack sizes for each program in the program group and accumulates the upper bounds /// in the correponding output variables based on the semantic type of the program. Before the first invocation of this /// function with a given instance of #OptixStackSizes, the members of that instance should be set to 0. -inline OptixResult optixUtilAccumulateStackSizes( OptixProgramGroup programGroup, OptixStackSizes* stackSizes ) +/// If the programs rely on external functions, passing the current pipeline will consider these as well. Otherwise, a null pointer +/// can be passed instead. When external functions are present, a warning will be issued for these cases. +inline OptixResult optixUtilAccumulateStackSizes( OptixProgramGroup programGroup, OptixStackSizes* stackSizes, OptixPipeline pipeline ) { if( !stackSizes ) return OPTIX_ERROR_INVALID_VALUE; OptixStackSizes localStackSizes; - OptixResult result = optixProgramGroupGetStackSize( programGroup, &localStackSizes ); + OptixResult result = optixProgramGroupGetStackSize( programGroup, &localStackSizes, pipeline ); if( result != OPTIX_SUCCESS ) return result; @@ -260,6 +262,9 @@ inline OptixResult optixUtilComputeStackSizesCssCCTree( const OptixStackSizes* s /// groups, and compute the maximas of the stack size requirements per array. /// /// See programming guide for an explanation of the formula. +/// +/// If the programs rely on external functions, passing the current pipeline will consider these as well. Otherwise, a null pointer +/// can be passed instead. When external functions are present, a warning will be issued for these cases. inline OptixResult optixUtilComputeStackSizesSimplePathTracer( OptixProgramGroup programGroupRG, OptixProgramGroup programGroupMS1, const OptixProgramGroup* programGroupCH1, @@ -269,7 +274,8 @@ inline OptixResult optixUtilComputeStackSizesSimplePathTracer( OptixProgramGroup unsigned int programGroupCH2Count, unsigned int* directCallableStackSizeFromTraversal, unsigned int* directCallableStackSizeFromState, - unsigned int* continuationStackSize ) + unsigned int* continuationStackSize, + OptixPipeline pipeline ) { if( !programGroupCH1 && ( programGroupCH1Count > 0 ) ) return OPTIX_ERROR_INVALID_VALUE; @@ -279,25 +285,25 @@ inline OptixResult optixUtilComputeStackSizesSimplePathTracer( OptixProgramGroup OptixResult result; OptixStackSizes stackSizesRG = {}; - result = optixProgramGroupGetStackSize( programGroupRG, &stackSizesRG ); + result = optixProgramGroupGetStackSize( programGroupRG, &stackSizesRG, pipeline ); if( result != OPTIX_SUCCESS ) return result; OptixStackSizes stackSizesMS1 = {}; - result = optixProgramGroupGetStackSize( programGroupMS1, &stackSizesMS1 ); + result = optixProgramGroupGetStackSize( programGroupMS1, &stackSizesMS1, pipeline ); if( result != OPTIX_SUCCESS ) return result; OptixStackSizes stackSizesCH1 = {}; for( unsigned int i = 0; i < programGroupCH1Count; ++i ) { - result = optixUtilAccumulateStackSizes( programGroupCH1[i], &stackSizesCH1 ); + result = optixUtilAccumulateStackSizes( programGroupCH1[i], &stackSizesCH1, pipeline ); if( result != OPTIX_SUCCESS ) return result; } OptixStackSizes stackSizesMS2 = {}; - result = optixProgramGroupGetStackSize( programGroupMS2, &stackSizesMS2 ); + result = optixProgramGroupGetStackSize( programGroupMS2, &stackSizesMS2, pipeline ); if( result != OPTIX_SUCCESS ) return result; @@ -305,7 +311,7 @@ inline OptixResult optixUtilComputeStackSizesSimplePathTracer( OptixProgramGroup memset( &stackSizesCH2, 0, sizeof( OptixStackSizes ) ); for( unsigned int i = 0; i < programGroupCH2Count; ++i ) { - result = optixUtilAccumulateStackSizes( programGroupCH2[i], &stackSizesCH2 ); + result = optixUtilAccumulateStackSizes( programGroupCH2[i], &stackSizesCH2, pipeline ); if( result != OPTIX_SUCCESS ) return result; } @@ -328,10 +334,10 @@ inline OptixResult optixUtilComputeStackSizesSimplePathTracer( OptixProgramGroup return OPTIX_SUCCESS; } -/*@}*/ // end group optix_utilities +/**@}*/ // end group optix_utilities #ifdef __cplusplus } #endif -#endif // __optix_optix_stack_size_h__ +#endif // OPTIX_OPTIX_STACK_SIZE_H diff --git a/zenovis/xinxinoptix/include/optix_stubs.h b/zenovis/xinxinoptix/include/optix_stubs.h index 469825822e..8ae035b75c 100644 --- a/zenovis/xinxinoptix/include/optix_stubs.h +++ b/zenovis/xinxinoptix/include/optix_stubs.h @@ -30,8 +30,8 @@ /// @author NVIDIA Corporation /// @brief OptiX public API header -#ifndef __optix_optix_stubs_h__ -#define __optix_optix_stubs_h__ +#ifndef OPTIX_OPTIX_STUBS_H +#define OPTIX_OPTIX_STUBS_H #include "optix_function_table.h" @@ -259,7 +259,7 @@ inline OptixResult optixUninitWithHandle( void* handle ) } -/*@}*/ // end group optix_utilities +/**@}*/ // end group optix_utilities #ifndef OPTIX_DOXYGEN_SHOULD_SKIP_THIS @@ -378,31 +378,31 @@ inline OptixResult optixDeviceContextGetCacheDatabaseSizes( OptixDeviceContext c return g_optixFunctionTable.optixDeviceContextGetCacheDatabaseSizes( context, lowWaterMark, highWaterMark ); } -inline OptixResult optixModuleCreateFromPTX( OptixDeviceContext context, - const OptixModuleCompileOptions* moduleCompileOptions, - const OptixPipelineCompileOptions* pipelineCompileOptions, - const char* PTX, - size_t PTXsize, - char* logString, - size_t* logStringSize, - OptixModule* module ) +inline OptixResult optixModuleCreate( OptixDeviceContext context, + const OptixModuleCompileOptions* moduleCompileOptions, + const OptixPipelineCompileOptions* pipelineCompileOptions, + const char* input, + size_t inputSize, + char* logString, + size_t* logStringSize, + OptixModule* module ) { - return g_optixFunctionTable.optixModuleCreateFromPTX( context, moduleCompileOptions, pipelineCompileOptions, PTX, - PTXsize, logString, logStringSize, module ); + return g_optixFunctionTable.optixModuleCreate( context, moduleCompileOptions, pipelineCompileOptions, input, inputSize, + logString, logStringSize, module ); } -inline OptixResult optixModuleCreateFromPTXWithTasks( OptixDeviceContext context, - const OptixModuleCompileOptions* moduleCompileOptions, - const OptixPipelineCompileOptions* pipelineCompileOptions, - const char* PTX, - size_t PTXsize, - char* logString, - size_t* logStringSize, - OptixModule* module, - OptixTask* firstTask ) +inline OptixResult optixModuleCreateWithTasks( OptixDeviceContext context, + const OptixModuleCompileOptions* moduleCompileOptions, + const OptixPipelineCompileOptions* pipelineCompileOptions, + const char* input, + size_t inputSize, + char* logString, + size_t* logStringSize, + OptixModule* module, + OptixTask* firstTask ) { - return g_optixFunctionTable.optixModuleCreateFromPTXWithTasks( context, moduleCompileOptions, pipelineCompileOptions, PTX, - PTXsize, logString, logStringSize, module, firstTask ); + return g_optixFunctionTable.optixModuleCreateWithTasks( context, moduleCompileOptions, pipelineCompileOptions, input, + inputSize, logString, logStringSize, module, firstTask ); } inline OptixResult optixModuleGetCompilationState( OptixModule module, OptixModuleCompileState* state ) @@ -447,9 +447,9 @@ inline OptixResult optixProgramGroupDestroy( OptixProgramGroup programGroup ) return g_optixFunctionTable.optixProgramGroupDestroy( programGroup ); } -inline OptixResult optixProgramGroupGetStackSize( OptixProgramGroup programGroup, OptixStackSizes* stackSizes ) +inline OptixResult optixProgramGroupGetStackSize( OptixProgramGroup programGroup, OptixStackSizes* stackSizes, OptixPipeline pipeline ) { - return g_optixFunctionTable.optixProgramGroupGetStackSize( programGroup, stackSizes ); + return g_optixFunctionTable.optixProgramGroupGetStackSize( programGroup, stackSizes, pipeline ); } inline OptixResult optixPipelineCreate( OptixDeviceContext context, @@ -542,6 +542,14 @@ inline OptixResult optixAccelCompact( OptixDeviceContext context, return g_optixFunctionTable.optixAccelCompact( context, stream, inputHandle, outputBuffer, outputBufferSizeInBytes, outputHandle ); } +inline OptixResult optixAccelEmitProperty( OptixDeviceContext context, + CUstream stream, + OptixTraversableHandle handle, + const OptixAccelEmitDesc* emittedProperty ) +{ + return g_optixFunctionTable.optixAccelEmitProperty( context, stream, handle, emittedProperty ); +} + inline OptixResult optixConvertPointerToTraversableHandle( OptixDeviceContext onDevice, CUdeviceptr pointer, OptixTraversableType traversableType, @@ -581,6 +589,20 @@ inline OptixResult optixOpacityMicromapArrayRelocate( OptixDeviceContext return g_optixFunctionTable.optixOpacityMicromapArrayRelocate( context, stream, info, targetOpacityMicromapArray, targetOpacityMicromapArraySizeInBytes ); } +inline OptixResult optixDisplacementMicromapArrayComputeMemoryUsage( OptixDeviceContext context, + const OptixDisplacementMicromapArrayBuildInput* buildInput, + OptixMicromapBufferSizes* bufferSizes ) +{ + return g_optixFunctionTable.optixDisplacementMicromapArrayComputeMemoryUsage( context, buildInput, bufferSizes ); +} + +inline OptixResult optixDisplacementMicromapArrayBuild( OptixDeviceContext context, + CUstream stream, + const OptixDisplacementMicromapArrayBuildInput* buildInput, + const OptixMicromapBuffers* buffers ) +{ + return g_optixFunctionTable.optixDisplacementMicromapArrayBuild( context, stream, buildInput, buffers ); +} inline OptixResult optixSbtRecordPackHeader( OptixProgramGroup programGroup, void* sbtRecordHeaderHostPointer ) { @@ -679,4 +701,4 @@ inline OptixResult optixDenoiserComputeAverageColor( OptixDenoiser handle, } #endif -#endif // __optix_optix_stubs_h__ +#endif // OPTIX_OPTIX_STUBS_H diff --git a/zenovis/xinxinoptix/include/optix_types.h b/zenovis/xinxinoptix/include/optix_types.h index cfe0a10e25..354f933216 100644 --- a/zenovis/xinxinoptix/include/optix_types.h +++ b/zenovis/xinxinoptix/include/optix_types.h @@ -1,3 +1,4 @@ + /* * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. * @@ -18,26 +19,2459 @@ * SUCH DAMAGES */ -/** - * @file optix_types.h - * @author NVIDIA Corporation - * @brief OptiX public API header - * - */ +/// @file +/// @author NVIDIA Corporation +/// @brief OptiX public API header +/// +/// OptiX types include file -- defines types and enums used by the API. +/// For the math library routines include optix_math.h + +#ifndef OPTIX_OPTIX_TYPES_H +#define OPTIX_OPTIX_TYPES_H + +#if !defined(__CUDACC_RTC__) +#include /* for size_t */ +#endif + + + +/// \defgroup optix_types Types +/// \brief OptiX Types + +/** \addtogroup optix_types +@{ +*/ + +// This typedef should match the one in cuda.h in order to avoid compilation errors. +#if defined(_WIN64) || defined(__LP64__) +/// CUDA device pointer +typedef unsigned long long CUdeviceptr; +#else +/// CUDA device pointer +typedef unsigned int CUdeviceptr; +#endif + +/// Opaque type representing a device context +typedef struct OptixDeviceContext_t* OptixDeviceContext; + +/// Opaque type representing a module +typedef struct OptixModule_t* OptixModule; + +/// Opaque type representing a program group +typedef struct OptixProgramGroup_t* OptixProgramGroup; + +/// Opaque type representing a pipeline +typedef struct OptixPipeline_t* OptixPipeline; + +/// Opaque type representing a denoiser instance +typedef struct OptixDenoiser_t* OptixDenoiser; + +/// Opaque type representing a work task +typedef struct OptixTask_t* OptixTask; + +/// Traversable handle +typedef unsigned long long OptixTraversableHandle; + +/// Visibility mask +typedef unsigned int OptixVisibilityMask; + +/// Size of the SBT record headers. +#define OPTIX_SBT_RECORD_HEADER_SIZE ( (size_t)32 ) + +/// Alignment requirement for device pointers in OptixShaderBindingTable. +#define OPTIX_SBT_RECORD_ALIGNMENT 16ull + +/// Alignment requirement for output and temporay buffers for acceleration structures. +#define OPTIX_ACCEL_BUFFER_BYTE_ALIGNMENT 128ull + +/// Alignment requirement for OptixBuildInputInstanceArray::instances. +#define OPTIX_INSTANCE_BYTE_ALIGNMENT 16ull + +/// Alignment requirement for OptixBuildInputCustomPrimitiveArray::aabbBuffers +#define OPTIX_AABB_BUFFER_BYTE_ALIGNMENT 8ull + +/// Alignment requirement for OptixBuildInputTriangleArray::preTransform +#define OPTIX_GEOMETRY_TRANSFORM_BYTE_ALIGNMENT 16ull + +/// Alignment requirement for OptixStaticTransform, OptixMatrixMotionTransform, OptixSRTMotionTransform. +#define OPTIX_TRANSFORM_BYTE_ALIGNMENT 64ull + +/// Alignment requirement for OptixOpacityMicromapArrayBuildInput::perMicromapDescBuffer. +#define OPTIX_OPACITY_MICROMAP_DESC_BUFFER_BYTE_ALIGNMENT 8ull + +/// Maximum number of registers allowed. Defaults to no explicit limit. +#define OPTIX_COMPILE_DEFAULT_MAX_REGISTER_COUNT 0 + +/// Maximum number of payload types allowed. +#define OPTIX_COMPILE_DEFAULT_MAX_PAYLOAD_TYPE_COUNT 8 + +/// Maximum number of payload values allowed. +#define OPTIX_COMPILE_DEFAULT_MAX_PAYLOAD_VALUE_COUNT 32 + +/// Opacity micromaps encode the states of microtriangles in either 1 bit (2-state) or 2 bits (4-state) using +/// the following values. +#define OPTIX_OPACITY_MICROMAP_STATE_TRANSPARENT ( 0 ) +#define OPTIX_OPACITY_MICROMAP_STATE_OPAQUE ( 1 ) +#define OPTIX_OPACITY_MICROMAP_STATE_UNKNOWN_TRANSPARENT ( 2 ) +#define OPTIX_OPACITY_MICROMAP_STATE_UNKNOWN_OPAQUE ( 3 ) + +/// Predefined index to indicate that a triangle in the BVH build doesn't have an associated opacity micromap, +/// and that it should revert to one of the four possible states for the full triangle. +#define OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_TRANSPARENT ( -1 ) +#define OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_OPAQUE ( -2 ) +#define OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_UNKNOWN_TRANSPARENT ( -3 ) +#define OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_UNKNOWN_OPAQUE ( -4 ) + +/// Alignment requirement for opacity micromap array buffers +#define OPTIX_OPACITY_MICROMAP_ARRAY_BUFFER_BYTE_ALIGNMENT 128ull + +/// Maximum subdivision level for opacity micromaps +#define OPTIX_OPACITY_MICROMAP_MAX_SUBDIVISION_LEVEL 12 + +/// Maximum subdivision level for displacement micromaps +#define OPTIX_DISPLACEMENT_MICROMAP_MAX_SUBDIVISION_LEVEL 5 + +/// Alignment requirement for displacement micromap descriptor buffers +#define OPTIX_DISPLACEMENT_MICROMAP_DESC_BUFFER_BYTE_ALIGNMENT 8ull + +/// Alignment requirement for displacement micromap array buffers +#define OPTIX_DISPLACEMENT_MICROMAP_ARRAY_BUFFER_BYTE_ALIGNMENT 128ull + +/// Result codes returned from API functions +/// +/// All host side API functions return OptixResult with the exception of optixGetErrorName +/// and optixGetErrorString. When successful OPTIX_SUCCESS is returned. All return codes +/// except for OPTIX_SUCCESS should be assumed to be errors as opposed to a warning. +/// +/// \see #optixGetErrorName(), #optixGetErrorString() +typedef enum OptixResult +{ + OPTIX_SUCCESS = 0, + OPTIX_ERROR_INVALID_VALUE = 7001, + OPTIX_ERROR_HOST_OUT_OF_MEMORY = 7002, + OPTIX_ERROR_INVALID_OPERATION = 7003, + OPTIX_ERROR_FILE_IO_ERROR = 7004, + OPTIX_ERROR_INVALID_FILE_FORMAT = 7005, + OPTIX_ERROR_DISK_CACHE_INVALID_PATH = 7010, + OPTIX_ERROR_DISK_CACHE_PERMISSION_ERROR = 7011, + OPTIX_ERROR_DISK_CACHE_DATABASE_ERROR = 7012, + OPTIX_ERROR_DISK_CACHE_INVALID_DATA = 7013, + OPTIX_ERROR_LAUNCH_FAILURE = 7050, + OPTIX_ERROR_INVALID_DEVICE_CONTEXT = 7051, + OPTIX_ERROR_CUDA_NOT_INITIALIZED = 7052, + OPTIX_ERROR_VALIDATION_FAILURE = 7053, + OPTIX_ERROR_INVALID_INPUT = 7200, + OPTIX_ERROR_INVALID_LAUNCH_PARAMETER = 7201, + OPTIX_ERROR_INVALID_PAYLOAD_ACCESS = 7202, + OPTIX_ERROR_INVALID_ATTRIBUTE_ACCESS = 7203, + OPTIX_ERROR_INVALID_FUNCTION_USE = 7204, + OPTIX_ERROR_INVALID_FUNCTION_ARGUMENTS = 7205, + OPTIX_ERROR_PIPELINE_OUT_OF_CONSTANT_MEMORY = 7250, + OPTIX_ERROR_PIPELINE_LINK_ERROR = 7251, + OPTIX_ERROR_ILLEGAL_DURING_TASK_EXECUTE = 7270, + OPTIX_ERROR_INTERNAL_COMPILER_ERROR = 7299, + OPTIX_ERROR_DENOISER_MODEL_NOT_SET = 7300, + OPTIX_ERROR_DENOISER_NOT_INITIALIZED = 7301, + OPTIX_ERROR_NOT_COMPATIBLE = 7400, + OPTIX_ERROR_PAYLOAD_TYPE_MISMATCH = 7500, + OPTIX_ERROR_PAYLOAD_TYPE_RESOLUTION_FAILED = 7501, + OPTIX_ERROR_PAYLOAD_TYPE_ID_INVALID = 7502, + OPTIX_ERROR_NOT_SUPPORTED = 7800, + OPTIX_ERROR_UNSUPPORTED_ABI_VERSION = 7801, + OPTIX_ERROR_FUNCTION_TABLE_SIZE_MISMATCH = 7802, + OPTIX_ERROR_INVALID_ENTRY_FUNCTION_OPTIONS = 7803, + OPTIX_ERROR_LIBRARY_NOT_FOUND = 7804, + OPTIX_ERROR_ENTRY_SYMBOL_NOT_FOUND = 7805, + OPTIX_ERROR_LIBRARY_UNLOAD_FAILURE = 7806, + OPTIX_ERROR_DEVICE_OUT_OF_MEMORY = 7807, + OPTIX_ERROR_CUDA_ERROR = 7900, + OPTIX_ERROR_INTERNAL_ERROR = 7990, + OPTIX_ERROR_UNKNOWN = 7999, +} OptixResult; + +/// Parameters used for #optixDeviceContextGetProperty() +/// +/// \see #optixDeviceContextGetProperty() +typedef enum OptixDeviceProperty +{ + /// Maximum value for OptixPipelineLinkOptions::maxTraceDepth. sizeof( unsigned int ) + OPTIX_DEVICE_PROPERTY_LIMIT_MAX_TRACE_DEPTH = 0x2001, + + /// Maximum value to pass into optixPipelineSetStackSize for parameter + /// maxTraversableGraphDepth. sizeof( unsigned int ) + OPTIX_DEVICE_PROPERTY_LIMIT_MAX_TRAVERSABLE_GRAPH_DEPTH = 0x2002, + + /// The maximum number of primitives (over all build inputs) as input to a single + /// Geometry Acceleration Structure (GAS). sizeof( unsigned int ) + OPTIX_DEVICE_PROPERTY_LIMIT_MAX_PRIMITIVES_PER_GAS = 0x2003, + + /// The maximum number of instances (over all build inputs) as input to a single + /// Instance Acceleration Structure (IAS). sizeof( unsigned int ) + OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCES_PER_IAS = 0x2004, + + /// The RT core version supported by the device (0 for no support, 10 for version + /// 1.0). sizeof( unsigned int ) + OPTIX_DEVICE_PROPERTY_RTCORE_VERSION = 0x2005, + + /// The maximum value for #OptixInstance::instanceId. sizeof( unsigned int ) + OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID = 0x2006, + + /// The number of bits available for the #OptixInstance::visibilityMask. + /// Higher bits must be set to zero. sizeof( unsigned int ) + OPTIX_DEVICE_PROPERTY_LIMIT_NUM_BITS_INSTANCE_VISIBILITY_MASK = 0x2007, + + /// The maximum number of instances that can be added to a single Instance + /// Acceleration Structure (IAS). sizeof( unsigned int ) + OPTIX_DEVICE_PROPERTY_LIMIT_MAX_SBT_RECORDS_PER_GAS = 0x2008, + + /// The maximum summed value of #OptixInstance::sbtOffset. + /// Also the maximum summed value of sbt offsets of all ancestor + /// instances of a GAS in a traversable graph. + OPTIX_DEVICE_PROPERTY_LIMIT_MAX_SBT_OFFSET = 0x2009, +} OptixDeviceProperty; + +/// Type of the callback function used for log messages. +/// +/// \param[in] level The log level indicates the severity of the message. See below for +/// possible values. +/// \param[in] tag A terse message category description (e.g., 'SCENE STAT'). +/// \param[in] message Null terminated log message (without newline at the end). +/// \param[in] cbdata Callback data that was provided with the callback pointer. +/// +/// It is the users responsibility to ensure thread safety within this function. +/// +/// The following log levels are defined. +/// +/// 0 disable Setting the callback level will disable all messages. The callback +/// function will not be called in this case. +/// 1 fatal A non-recoverable error. The context and/or OptiX itself might no longer +/// be in a usable state. +/// 2 error A recoverable error, e.g., when passing invalid call parameters. +/// 3 warning Hints that OptiX might not behave exactly as requested by the user or +/// may perform slower than expected. +/// 4 print Status or progress messages. +/// +/// Higher levels might occur. +/// +/// \see #optixDeviceContextSetLogCallback(), #OptixDeviceContextOptions +typedef void ( *OptixLogCallback )( unsigned int level, const char* tag, const char* message, void* cbdata ); + +/// Validation mode settings. +/// +/// When enabled, certain device code utilities will be enabled to provide as good debug and +/// error checking facilities as possible. +/// +/// +/// \see #optixDeviceContextCreate() +typedef enum OptixDeviceContextValidationMode +{ + OPTIX_DEVICE_CONTEXT_VALIDATION_MODE_OFF = 0, + OPTIX_DEVICE_CONTEXT_VALIDATION_MODE_ALL = 0xFFFFFFFF +} OptixDeviceContextValidationMode; + +/// Parameters used for #optixDeviceContextCreate() +/// +/// \see #optixDeviceContextCreate() +typedef struct OptixDeviceContextOptions +{ + /// Function pointer used when OptiX wishes to generate messages + OptixLogCallback logCallbackFunction; + /// Pointer stored and passed to logCallbackFunction when a message is generated + void* logCallbackData; + /// Maximum callback level to generate message for (see #OptixLogCallback) + int logCallbackLevel; + /// Validation mode of context. + OptixDeviceContextValidationMode validationMode; +} OptixDeviceContextOptions; + +/// Flags used by #OptixBuildInputTriangleArray::flags +/// and #OptixBuildInput::flag +/// and #OptixBuildInputCustomPrimitiveArray::flags +typedef enum OptixGeometryFlags +{ + /// No flags set + OPTIX_GEOMETRY_FLAG_NONE = 0, + + /// Disables the invocation of the anyhit program. + /// Can be overridden by OPTIX_INSTANCE_FLAG_ENFORCE_ANYHIT and OPTIX_RAY_FLAG_ENFORCE_ANYHIT. + OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT = 1u << 0, + + /// If set, an intersection with the primitive will trigger one and only one + /// invocation of the anyhit program. Otherwise, the anyhit program may be invoked + /// more than once. + OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL = 1u << 1, + + /// Prevent triangles from getting culled due to their orientation. + /// Effectively ignores ray flags + /// OPTIX_RAY_FLAG_CULL_BACK_FACING_TRIANGLES and OPTIX_RAY_FLAG_CULL_FRONT_FACING_TRIANGLES. + OPTIX_GEOMETRY_FLAG_DISABLE_TRIANGLE_FACE_CULLING = 1u << 2, +} OptixGeometryFlags; + +/// Legacy type: A subset of the hit kinds for built-in primitive intersections. +/// It is preferred to use optixGetPrimitiveType(), together with +/// optixIsFrontFaceHit() or optixIsBackFaceHit(). +/// +/// \see #optixGetHitKind() +typedef enum OptixHitKind +{ + /// Ray hit the triangle on the front face + OPTIX_HIT_KIND_TRIANGLE_FRONT_FACE = 0xFE, + /// Ray hit the triangle on the back face + OPTIX_HIT_KIND_TRIANGLE_BACK_FACE = 0xFF +} OptixHitKind; + +/// Format of indices used int #OptixBuildInputTriangleArray::indexFormat. +typedef enum OptixIndicesFormat +{ + /// No indices, this format must only be used in combination with triangle soups, i.e., numIndexTriplets must be zero + OPTIX_INDICES_FORMAT_NONE = 0, + /// Three shorts + OPTIX_INDICES_FORMAT_UNSIGNED_SHORT3 = 0x2102, + /// Three ints + OPTIX_INDICES_FORMAT_UNSIGNED_INT3 = 0x2103 +} OptixIndicesFormat; + +/// Format of vertices used in #OptixBuildInputTriangleArray::vertexFormat. +typedef enum OptixVertexFormat +{ + OPTIX_VERTEX_FORMAT_NONE = 0, ///< No vertices + OPTIX_VERTEX_FORMAT_FLOAT3 = 0x2121, ///< Vertices are represented by three floats + OPTIX_VERTEX_FORMAT_FLOAT2 = 0x2122, ///< Vertices are represented by two floats + OPTIX_VERTEX_FORMAT_HALF3 = 0x2123, ///< Vertices are represented by three halfs + OPTIX_VERTEX_FORMAT_HALF2 = 0x2124, ///< Vertices are represented by two halfs + OPTIX_VERTEX_FORMAT_SNORM16_3 = 0x2125, + OPTIX_VERTEX_FORMAT_SNORM16_2 = 0x2126 +} OptixVertexFormat; + +/// Format of transform used in #OptixBuildInputTriangleArray::transformFormat. +typedef enum OptixTransformFormat +{ + OPTIX_TRANSFORM_FORMAT_NONE = 0, ///< no transform, default for zero initialization + OPTIX_TRANSFORM_FORMAT_MATRIX_FLOAT12 = 0x21E1, ///< 3x4 row major affine matrix +} OptixTransformFormat; + +typedef enum OptixDisplacementMicromapBiasAndScaleFormat +{ + OPTIX_DISPLACEMENT_MICROMAP_BIAS_AND_SCALE_FORMAT_NONE = 0, + OPTIX_DISPLACEMENT_MICROMAP_BIAS_AND_SCALE_FORMAT_FLOAT2 = 0x2241, + OPTIX_DISPLACEMENT_MICROMAP_BIAS_AND_SCALE_FORMAT_HALF2 = 0x2242, +} OptixDisplacementMicromapBiasAndScaleFormat; + +typedef enum OptixDisplacementMicromapDirectionFormat +{ + OPTIX_DISPLACEMENT_MICROMAP_DIRECTION_FORMAT_NONE = 0, + OPTIX_DISPLACEMENT_MICROMAP_DIRECTION_FORMAT_FLOAT3 = 0x2261, + OPTIX_DISPLACEMENT_MICROMAP_DIRECTION_FORMAT_HALF3 = 0x2262, +} OptixDisplacementMicromapDirectionFormat; + +/// Specifies whether to use a 2- or 4-state opacity micromap format. +typedef enum OptixOpacityMicromapFormat +{ + /// invalid format + OPTIX_OPACITY_MICROMAP_FORMAT_NONE = 0, + /// 0: Transparent, 1: Opaque + OPTIX_OPACITY_MICROMAP_FORMAT_2_STATE = 1, + /// 0: Transparent, 1: Opaque, 2: Unknown-Transparent, 3: Unknown-Opaque + OPTIX_OPACITY_MICROMAP_FORMAT_4_STATE = 2, +} OptixOpacityMicromapFormat; + +/// indexing mode of triangles to opacity micromaps in an array, used in #OptixBuildInputOpacityMicromap. +typedef enum OptixOpacityMicromapArrayIndexingMode +{ + /// No opacity micromap is used + OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_NONE = 0, + /// An implicit linear mapping of triangles to opacity micromaps in the + /// opacity micromap array is used. triangle[i] will use opacityMicromapArray[i]. + OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_LINEAR = 1, + /// OptixBuildInputOpacityMicromap::indexBuffer provides a per triangle array of predefined indices + /// and/or indices into OptixBuildInputOpacityMicromap::opacityMicromapArray. + /// See OptixBuildInputOpacityMicromap::indexBuffer for more details. + OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_INDEXED = 2, +} OptixOpacityMicromapArrayIndexingMode; + +/// Opacity micromap usage count for acceleration structure builds. +/// Specifies how many opacity micromaps of a specific type are referenced by triangles when building the AS. +/// Note that while this is similar to OptixOpacityMicromapHistogramEntry, the usage count specifies how many opacity micromaps +/// of a specific type are referenced by triangles in the AS. +typedef struct OptixOpacityMicromapUsageCount +{ + /// Number of opacity micromaps with this format and subdivision level referenced by triangles in the corresponding + /// triangle build input at AS build time. + unsigned int count; + /// Number of micro-triangles is 4^level. Valid levels are [0, 12] + unsigned int subdivisionLevel; + /// opacity micromap format. + OptixOpacityMicromapFormat format; +} OptixOpacityMicromapUsageCount; + +typedef struct OptixBuildInputOpacityMicromap +{ + /// Indexing mode of triangle to opacity micromap array mapping. + OptixOpacityMicromapArrayIndexingMode indexingMode; + + /// Device pointer to a opacity micromap array used by this build input array. + /// This buffer is required when #OptixBuildInputOpacityMicromap::indexingMode is + /// OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_LINEAR or OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_INDEXED. + /// Must be zero if #OptixBuildInputOpacityMicromap::indexingMode is OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_NONE. + CUdeviceptr opacityMicromapArray; + + /// int16 or int32 buffer specifying which opacity micromap index to use for each triangle. + /// Instead of an actual index, one of the predefined indices + /// OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_(FULLY_TRANSPARENT | FULLY_OPAQUE | FULLY_UNKNOWN_TRANSPARENT | FULLY_UNKNOWN_OPAQUE) + /// can be used to indicate that there is no opacity micromap for this particular triangle + /// but the triangle is in a uniform state and the selected behavior is applied + /// to the entire triangle. + /// This buffer is required when #OptixBuildInputOpacityMicromap::indexingMode is OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_INDEXED. + /// Must be zero if #OptixBuildInputOpacityMicromap::indexingMode is + /// OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_LINEAR or OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_NONE. + CUdeviceptr indexBuffer; + + /// 0, 2 or 4 (unused, 16 or 32 bit) + /// Must be non-zero when #OptixBuildInputOpacityMicromap::indexingMode is OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_INDEXED. + unsigned int indexSizeInBytes; + + /// Opacity micromap index buffer stride. If set to zero, indices are assumed to be tightly + /// packed and stride is inferred from #OptixBuildInputOpacityMicromap::indexSizeInBytes. + unsigned int indexStrideInBytes; + + /// Constant offset to non-negative opacity micromap indices + unsigned int indexOffset; + + /// Number of OptixOpacityMicromapUsageCount. + unsigned int numMicromapUsageCounts; + /// List of number of usages of opacity micromaps of format and subdivision combinations. + /// Counts with equal format and subdivision combination (duplicates) are added together. + const OptixOpacityMicromapUsageCount* micromapUsageCounts; +} OptixBuildInputOpacityMicromap; + +typedef struct OptixRelocateInputOpacityMicromap +{ + /// Device pointer to a relocated opacity micromap array used by the source build input array. + /// May be zero when no micromaps where used in the source accel, or the referenced opacity + /// micromaps don't require relocation (for example relocation of a GAS on the source device). + CUdeviceptr opacityMicromapArray; +} OptixRelocateInputOpacityMicromap; + + +/// DMM input data format. +typedef enum OptixDisplacementMicromapFormat +{ + OPTIX_DISPLACEMENT_MICROMAP_FORMAT_NONE = 0, + OPTIX_DISPLACEMENT_MICROMAP_FORMAT_64_MICRO_TRIS_64_BYTES = 1, + OPTIX_DISPLACEMENT_MICROMAP_FORMAT_256_MICRO_TRIS_128_BYTES = 2, + OPTIX_DISPLACEMENT_MICROMAP_FORMAT_1024_MICRO_TRIS_128_BYTES = 3, +} OptixDisplacementMicromapFormat; + +/// Flags defining behavior of DMMs in a DMM array. +typedef enum OptixDisplacementMicromapFlags +{ + OPTIX_DISPLACEMENT_MICROMAP_FLAG_NONE = 0, + + /// This flag is mutually exclusive with OPTIX_DISPLACEMENT_MICROMAP_FLAG_PREFER_FAST_BUILD. + OPTIX_DISPLACEMENT_MICROMAP_FLAG_PREFER_FAST_TRACE = 1 << 0, + + /// This flag is mutually exclusive with OPTIX_DISPLACEMENT_MICROMAP_FLAG_PREFER_FAST_TRACE. + OPTIX_DISPLACEMENT_MICROMAP_FLAG_PREFER_FAST_BUILD = 1 << 1, + +} OptixDisplacementMicromapFlags; + +typedef enum OptixDisplacementMicromapTriangleFlags +{ + OPTIX_DISPLACEMENT_MICROMAP_TRIANGLE_FLAG_NONE = 0, + /// The triangle edge v0..v1 is decimated: after subdivision the number of micro triangles on that edge is halved + /// such that a neighboring triangle can have a lower subdivision level without introducing cracks. + OPTIX_DISPLACEMENT_MICROMAP_TRIANGLE_FLAG_DECIMATE_EDGE_01 = 1 << 0, + /// The triangle edge v1..v2 is decimated. + OPTIX_DISPLACEMENT_MICROMAP_TRIANGLE_FLAG_DECIMATE_EDGE_12 = 1 << 1, + /// The triangle edge v2..v0 is decimated. + OPTIX_DISPLACEMENT_MICROMAP_TRIANGLE_FLAG_DECIMATE_EDGE_20 = 1 << 2, +} OptixDisplacementMicromapTriangleFlags; + +typedef struct OptixDisplacementMicromapDesc +{ + /// Block is located at displacementValuesBuffer + byteOffset + unsigned int byteOffset; + /// Number of micro-triangles is 4^level. Valid levels are [0, 5] + unsigned short subdivisionLevel; + /// Format (OptixDisplacementMicromapFormat) + unsigned short format; +} OptixDisplacementMicromapDesc; + +/// Displacement micromap histogram entry. +/// Specifies how many displacement micromaps of a specific type are input to the displacement micromap array build. +/// Note that while this is similar to OptixDisplacementMicromapUsageCount, the histogram entry specifies how many displacement micromaps +/// of a specific type are combined into a displacement micromap array. +typedef struct OptixDisplacementMicromapHistogramEntry +{ + /// Number of displacement micromaps with the format and subdivision level that are input to the displacement micromap array build. + unsigned int count; + /// Number of micro-triangles is 4^level. Valid levels are [0, 5] + unsigned int subdivisionLevel; + /// Displacement micromap format. + OptixDisplacementMicromapFormat format; +} OptixDisplacementMicromapHistogramEntry; + +/// Inputs to displacement micromaps array construction. +typedef struct OptixDisplacementMicromapArrayBuildInput +{ + /// Flags that apply to all displacement micromaps in array. + OptixDisplacementMicromapFlags flags; + /// 128 byte aligned pointer for displacement micromap raw input data. + CUdeviceptr displacementValuesBuffer; + /// Descriptors for interpreting raw input data, one OptixDisplacementMicromapDesc entry required per displacement micromap. + /// This device pointer must be a multiple of OPTIX_DISPLACEMENT_MICROMAP_DESC_BUFFER_BYTE_ALIGNMENT. + CUdeviceptr perDisplacementMicromapDescBuffer; + /// Stride between OptixDisplacementMicromapDesc in perDisplacementMicromapDescBuffer + /// If set to zero, the displacement micromap descriptors are assumed to be tightly packed and the stride is assumed to be sizeof( OptixDisplacementMicromapDesc ). + /// This stride must be a multiple of OPTIX_DISPLACEMENT_MICROMAP_DESC_BUFFER_BYTE_ALIGNMENT. + unsigned int perDisplacementMicromapDescStrideInBytes; + /// Number of OptixDisplacementMicromapHistogramEntry entries. + unsigned int numDisplacementMicromapHistogramEntries; + /// Histogram over DMMs for input format and subdivision combinations. + /// Counts of histogram bins with equal format and subdivision combinations are added together. + const OptixDisplacementMicromapHistogramEntry* displacementMicromapHistogramEntries; +} OptixDisplacementMicromapArrayBuildInput; + +/// Displacement micromap usage count for acceleration structure builds. +/// Specifies how many displacement micromaps of a specific type are referenced by triangles when building the AS. +/// Note that while this is similar to OptixDisplacementMicromapHistogramEntry, the usage count specifies how many displacement micromaps +/// of a specific type are referenced by triangles in the AS. +typedef struct OptixDisplacementMicromapUsageCount +{ + /// Number of displacement micromaps with this format and subdivision level referenced by triangles in the corresponding + /// triangle build input at AS build time. + unsigned int count; + /// Number of micro-triangles is 4^level. Valid levels are [0, 5] + unsigned int subdivisionLevel; + /// Displacement micromaps format. + OptixDisplacementMicromapFormat format; +} OptixDisplacementMicromapUsageCount; + +/// indexing mode of triangles to displacement micromaps in an array, used in #OptixBuildInputDisplacementMicromap. +typedef enum OptixDisplacementMicromapArrayIndexingMode +{ + /// No displacement micromap is used + OPTIX_DISPLACEMENT_MICROMAP_ARRAY_INDEXING_MODE_NONE = 0, + /// An implicit linear mapping of triangles to displacement micromaps in the + /// displacement micromap array is used. triangle[i] will use displacementMicromapArray[i]. + OPTIX_DISPLACEMENT_MICROMAP_ARRAY_INDEXING_MODE_LINEAR = 1, + /// OptixBuildInputDisplacementMicromap::displacementMicromapIndexBuffer provides a per triangle array of + /// indices into OptixBuildInputDisplacementMicromap::displacementMicromapArray. + /// See OptixBuildInputDisplacementMicromap::displacementMicromapIndexBuffer for more details. + OPTIX_DISPLACEMENT_MICROMAP_ARRAY_INDEXING_MODE_INDEXED = 2, +} OptixDisplacementMicromapArrayIndexingMode; + +/// Optional displacement part of a triangle array input +typedef struct OptixBuildInputDisplacementMicromap +{ + /// Indexing mode of triangle to displacement micromap array mapping. + OptixDisplacementMicromapArrayIndexingMode indexingMode; + + /// Address to a displacement micromap array used by this build input array. Set to NULL to disable DMs for this input. + CUdeviceptr displacementMicromapArray; + /// int16 or int32 buffer specifying which displacement micromap index to use for each triangle. Only valid if displacementMicromapArray != NULL. + CUdeviceptr displacementMicromapIndexBuffer; + /// Per triangle-vertex displacement directions. + CUdeviceptr vertexDirectionsBuffer; + /// Optional per-vertex bias (offset) along displacement direction and displacement direction scale. + CUdeviceptr vertexBiasAndScaleBuffer; + /// Optional per-triangle flags, uint8_t per triangle, possible values defined in enum OptixDisplacementMicromapTriangleFlags + CUdeviceptr triangleFlagsBuffer; + + /// Constant offset to displacement micromap indices as specified by the displacement micromap index buffer + unsigned int displacementMicromapIndexOffset; + /// Displacement micromap index buffer stride. If set to zero, indices are assumed to be tightly + /// packed and stride is inferred from #OptixBuildInputDisplacementMicromap::displacementMicromapIndexSizeInBytes. + unsigned int displacementMicromapIndexStrideInBytes; + /// 2 or 4 (16 or 32 bit) + unsigned int displacementMicromapIndexSizeInBytes; + + /// Format of displacement vectors + OptixDisplacementMicromapDirectionFormat vertexDirectionFormat; + /// Stride between displacement vectors + unsigned int vertexDirectionStrideInBytes; + + /// Format of vertex bias and direction scale + OptixDisplacementMicromapBiasAndScaleFormat vertexBiasAndScaleFormat; + /// Stride in bytes for vertex bias and direction scale entries + unsigned int vertexBiasAndScaleStrideInBytes; + + /// Stride in bytes for triangleFlags + unsigned int triangleFlagsStrideInBytes; + + /// Number of OptixDisplacementMicromapUsageCount entries. + unsigned int numDisplacementMicromapUsageCounts; + /// List of number of usages of displacement micromaps of format and subdivision combinations. + /// Counts with equal format and subdivision combination (duplicates) are added together. + const OptixDisplacementMicromapUsageCount* displacementMicromapUsageCounts; + +} OptixBuildInputDisplacementMicromap; + + +/// Triangle inputs +/// +/// \see #OptixBuildInput::triangleArray +typedef struct OptixBuildInputTriangleArray +{ + /// Points to host array of device pointers, one per motion step. Host array size must match the number of + /// motion keys as set in #OptixMotionOptions (or an array of size 1 if OptixMotionOptions::numKeys is set + /// to 0 or 1). Each per motion key device pointer must point to an array of vertices of the + /// triangles in the format as described by vertexFormat. The minimum alignment must match the natural + /// alignment of the type as specified in the vertexFormat, i.e., for OPTIX_VERTEX_FORMAT_FLOATX 4-byte, + /// for all others a 2-byte alignment. However, an 16-byte stride (and buffer alignment) is recommended for + /// vertices of format OPTIX_VERTEX_FORMAT_FLOAT3 for GAS build performance. + const CUdeviceptr* vertexBuffers; + + /// Number of vertices in each of buffer in OptixBuildInputTriangleArray::vertexBuffers. + unsigned int numVertices; + + /// \see #OptixVertexFormat + OptixVertexFormat vertexFormat; + + /// Stride between vertices. If set to zero, vertices are assumed to be tightly + /// packed and stride is inferred from vertexFormat. + unsigned int vertexStrideInBytes; + + /// Optional pointer to array of 16 or 32-bit int triplets, one triplet per triangle. + /// The minimum alignment must match the natural alignment of the type as specified in the indexFormat, i.e., + /// for OPTIX_INDICES_FORMAT_UNSIGNED_INT3 4-byte and for OPTIX_INDICES_FORMAT_UNSIGNED_SHORT3 a 2-byte alignment. + CUdeviceptr indexBuffer; + + /// Size of array in OptixBuildInputTriangleArray::indexBuffer. For build, needs to be zero if indexBuffer is \c nullptr. + unsigned int numIndexTriplets; + + /// \see #OptixIndicesFormat + OptixIndicesFormat indexFormat; + + /// Stride between triplets of indices. If set to zero, indices are assumed to be tightly + /// packed and stride is inferred from indexFormat. + unsigned int indexStrideInBytes; + + /// Optional pointer to array of floats + /// representing a 3x4 row major affine + /// transformation matrix. This pointer must be a multiple of OPTIX_GEOMETRY_TRANSFORM_BYTE_ALIGNMENT + CUdeviceptr preTransform; + + /// Array of flags, to specify flags per sbt record, + /// combinations of OptixGeometryFlags describing the + /// primitive behavior, size must match numSbtRecords + const unsigned int* flags; + + /// Number of sbt records available to the sbt index offset override. + unsigned int numSbtRecords; + + /// Device pointer to per-primitive local sbt index offset buffer. May be NULL. + /// Every entry must be in range [0,numSbtRecords-1]. + /// Size needs to be the number of primitives. + CUdeviceptr sbtIndexOffsetBuffer; + + /// Size of type of the sbt index offset. Needs to be 0, 1, 2 or 4 (8, 16 or 32 bit). + unsigned int sbtIndexOffsetSizeInBytes; + + /// Stride between the index offsets. If set to zero, the offsets are assumed to be tightly + /// packed and the stride matches the size of the type (sbtIndexOffsetSizeInBytes). + unsigned int sbtIndexOffsetStrideInBytes; + + /// Primitive index bias, applied in optixGetPrimitiveIndex(). + /// Sum of primitiveIndexOffset and number of triangles must not overflow 32bits. + unsigned int primitiveIndexOffset; + + /// \see #OptixTransformFormat + OptixTransformFormat transformFormat; + + /// Optional opacity micromap inputs. + OptixBuildInputOpacityMicromap opacityMicromap; + /// Optional displacement micromap inputs. + OptixBuildInputDisplacementMicromap displacementMicromap; + +} OptixBuildInputTriangleArray; + +/// Triangle inputs +/// +/// \see #OptixRelocateInput::triangleArray +typedef struct OptixRelocateInputTriangleArray +{ + /// Number of sbt records available to the sbt index offset override. + /// Must match #OptixBuildInputTriangleArray::numSbtRecords of the source build input. + unsigned int numSbtRecords; + + /// Opacity micromap inputs. + OptixRelocateInputOpacityMicromap opacityMicromap; +} OptixRelocateInputTriangleArray; + +/// Builtin primitive types +/// +typedef enum OptixPrimitiveType +{ + /// Custom primitive. + OPTIX_PRIMITIVE_TYPE_CUSTOM = 0x2500, + /// B-spline curve of degree 2 with circular cross-section. + OPTIX_PRIMITIVE_TYPE_ROUND_QUADRATIC_BSPLINE = 0x2501, + /// B-spline curve of degree 3 with circular cross-section. + OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE = 0x2502, + /// Piecewise linear curve with circular cross-section. + OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR = 0x2503, + /// CatmullRom curve with circular cross-section. + OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM = 0x2504, + /// B-spline curve of degree 2 with oriented, flat cross-section. + OPTIX_PRIMITIVE_TYPE_FLAT_QUADRATIC_BSPLINE = 0x2505, + /// Sphere. + OPTIX_PRIMITIVE_TYPE_SPHERE = 0x2506, + /// Bezier curve of degree 3 with circular cross-section. + OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BEZIER = 0x2507, + /// Triangle. + OPTIX_PRIMITIVE_TYPE_TRIANGLE = 0x2531, + /// Triangle with an applied displacement micromap. + OPTIX_PRIMITIVE_TYPE_DISPLACED_MICROMESH_TRIANGLE = 0x2532, +} OptixPrimitiveType; + +/// Builtin flags may be bitwise combined. +/// +/// \see #OptixPipelineCompileOptions::usesPrimitiveTypeFlags +typedef enum OptixPrimitiveTypeFlags +{ + /// Custom primitive. + OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM = 1 << 0, + /// B-spline curve of degree 2 with circular cross-section. + OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_QUADRATIC_BSPLINE = 1 << 1, + /// B-spline curve of degree 3 with circular cross-section. + OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE = 1 << 2, + /// Piecewise linear curve with circular cross-section. + OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_LINEAR = 1 << 3, + /// CatmullRom curve with circular cross-section. + OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CATMULLROM = 1 << 4, + /// B-spline curve of degree 2 with oriented, flat cross-section. + OPTIX_PRIMITIVE_TYPE_FLAGS_FLAT_QUADRATIC_BSPLINE = 1 << 5, + /// Sphere. + OPTIX_PRIMITIVE_TYPE_FLAGS_SPHERE = 1 << 6, + /// Bezier curve of degree 3 with circular cross-section. + OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BEZIER = 1 << 7, + /// Triangle. + OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE = 1 << 31, + /// Triangle with an applied displacement micromap. + OPTIX_PRIMITIVE_TYPE_FLAGS_DISPLACED_MICROMESH_TRIANGLE = 1 << 30, +} OptixPrimitiveTypeFlags; + +/// Curve end cap types, for non-linear curves +/// +typedef enum OptixCurveEndcapFlags +{ + /// Default end caps. Round end caps for linear, no end caps for quadratic/cubic. + OPTIX_CURVE_ENDCAP_DEFAULT = 0, + /// Flat end caps at both ends of quadratic/cubic curve segments. Not valid for linear. + OPTIX_CURVE_ENDCAP_ON = 1 << 0, +} OptixCurveEndcapFlags; + +/// Curve inputs +/// +/// A curve is a swept surface defined by a 3D spline curve and a varying width (radius). A curve (or "strand") of +/// degree d (3=cubic, 2=quadratic, 1=linear) is represented by N > d vertices and N width values, and comprises N - d segments. +/// Each segment is defined by d+1 consecutive vertices. Each curve may have a different number of vertices. +/// +/// OptiX describes the curve array as a list of curve segments. The primitive id is the segment number. +/// It is the user's responsibility to maintain a mapping between curves and curve segments. +/// Each index buffer entry i = indexBuffer[primid] specifies the start of a curve segment, +/// represented by d+1 consecutive vertices in the vertex buffer, +/// and d+1 consecutive widths in the width buffer. Width is interpolated the same +/// way vertices are interpolated, that is, using the curve basis. +/// +/// Each curves build input has only one SBT record. +/// To create curves with different materials in the same BVH, use multiple build inputs. +/// +/// \see #OptixBuildInput::curveArray +typedef struct OptixBuildInputCurveArray +{ + /// Curve degree and basis + /// \see #OptixPrimitiveType + OptixPrimitiveType curveType; + /// Number of primitives. Each primitive is a polynomial curve segment. + unsigned int numPrimitives; + + /// Pointer to host array of device pointers, one per motion step. Host array size must match number of + /// motion keys as set in #OptixMotionOptions (or an array of size 1 if OptixMotionOptions::numKeys is set + /// to 1). Each per-motion-key device pointer must point to an array of floats (the vertices of the + /// curves). + const CUdeviceptr* vertexBuffers; + /// Number of vertices in each buffer in vertexBuffers. + unsigned int numVertices; + /// Stride between vertices. If set to zero, vertices are assumed to be tightly + /// packed and stride is sizeof( float3 ). + unsigned int vertexStrideInBytes; + + /// Parallel to vertexBuffers: a device pointer per motion step, each with numVertices float values, + /// specifying the curve width (radius) corresponding to each vertex. + const CUdeviceptr* widthBuffers; + /// Stride between widths. If set to zero, widths are assumed to be tightly + /// packed and stride is sizeof( float ). + unsigned int widthStrideInBytes; + + /// Reserved for future use. + const CUdeviceptr* normalBuffers; + /// Reserved for future use. + unsigned int normalStrideInBytes; + + /// Device pointer to array of unsigned ints, one per curve segment. + /// This buffer is required (unlike for OptixBuildInputTriangleArray). + /// Each index is the start of degree+1 consecutive vertices in vertexBuffers, + /// and corresponding widths in widthBuffers and normals in normalBuffers. + /// These define a single segment. Size of array is numPrimitives. + CUdeviceptr indexBuffer; + /// Stride between indices. If set to zero, indices are assumed to be tightly + /// packed and stride is sizeof( unsigned int ). + unsigned int indexStrideInBytes; + + /// Combination of OptixGeometryFlags describing the + /// primitive behavior. + unsigned int flag; + + /// Primitive index bias, applied in optixGetPrimitiveIndex(). + /// Sum of primitiveIndexOffset and number of primitives must not overflow 32bits. + unsigned int primitiveIndexOffset; + + /// End cap flags, see OptixCurveEndcapFlags + unsigned int endcapFlags; +} OptixBuildInputCurveArray; + +/// Sphere inputs +/// +/// A sphere is defined by a center point and a radius. +/// Each center point is represented by a vertex in the vertex buffer. +/// There is either a single radius for all spheres, or the radii are represented by entries in the radius buffer. +/// +/// The vertex buffers and radius buffers point to a host array of device pointers, one per motion step. +/// Host array size must match the number of motion keys as set in #OptixMotionOptions (or an array of size 1 if OptixMotionOptions::numKeys is set +/// to 0 or 1). Each per motion key device pointer must point to an array of vertices corresponding to the center points of the spheres, or +/// an array of 1 or N radii. Format OPTIX_VERTEX_FORMAT_FLOAT3 is used for vertices, OPTIX_VERTEX_FORMAT_FLOAT for radii. +/// +/// \see #OptixBuildInput::sphereArray +typedef struct OptixBuildInputSphereArray +{ + /// Pointer to host array of device pointers, one per motion step. Host array size must match number of + /// motion keys as set in #OptixMotionOptions (or an array of size 1 if OptixMotionOptions::numKeys is set + /// to 1). Each per-motion-key device pointer must point to an array of floats (the center points of + /// the spheres). + const CUdeviceptr* vertexBuffers; + + /// Stride between vertices. If set to zero, vertices are assumed to be tightly + /// packed and stride is sizeof( float3 ). + unsigned int vertexStrideInBytes; + /// Number of vertices in each buffer in vertexBuffers. + unsigned int numVertices; + + /// Parallel to vertexBuffers: a device pointer per motion step, each with numRadii float values, + /// specifying the sphere radius corresponding to each vertex. + const CUdeviceptr* radiusBuffers; + /// Stride between radii. If set to zero, widths are assumed to be tightly + /// packed and stride is sizeof( float ). + unsigned int radiusStrideInBytes; + /// Boolean value indicating whether a single radius per radius buffer is used, + /// or the number of radii in radiusBuffers equals numVertices. + int singleRadius; + + /// Array of flags, to specify flags per sbt record, + /// combinations of OptixGeometryFlags describing the + /// primitive behavior, size must match numSbtRecords + const unsigned int* flags; + + /// Number of sbt records available to the sbt index offset override. + unsigned int numSbtRecords; + /// Device pointer to per-primitive local sbt index offset buffer. May be NULL. + /// Every entry must be in range [0,numSbtRecords-1]. + /// Size needs to be the number of primitives. + CUdeviceptr sbtIndexOffsetBuffer; + /// Size of type of the sbt index offset. Needs to be 0, 1, 2 or 4 (8, 16 or 32 bit). + unsigned int sbtIndexOffsetSizeInBytes; + /// Stride between the sbt index offsets. If set to zero, the offsets are assumed to be tightly + /// packed and the stride matches the size of the type (sbtIndexOffsetSizeInBytes). + unsigned int sbtIndexOffsetStrideInBytes; + + /// Primitive index bias, applied in optixGetPrimitiveIndex(). + /// Sum of primitiveIndexOffset and number of primitives must not overflow 32bits. + unsigned int primitiveIndexOffset; +} OptixBuildInputSphereArray; + +/// AABB inputs +typedef struct OptixAabb +{ + float minX; ///< Lower extent in X direction. + float minY; ///< Lower extent in Y direction. + float minZ; ///< Lower extent in Z direction. + float maxX; ///< Upper extent in X direction. + float maxY; ///< Upper extent in Y direction. + float maxZ; ///< Upper extent in Z direction. +} OptixAabb; + +/// Custom primitive inputs +/// +/// \see #OptixBuildInput::customPrimitiveArray +typedef struct OptixBuildInputCustomPrimitiveArray +{ + /// Points to host array of device pointers to AABBs (type OptixAabb), one per motion step. + /// Host array size must match number of motion keys as set in OptixMotionOptions (or an array of size 1 + /// if OptixMotionOptions::numKeys is set to 1). + /// Each device pointer must be a multiple of OPTIX_AABB_BUFFER_BYTE_ALIGNMENT. + const CUdeviceptr* aabbBuffers; + + /// Number of primitives in each buffer (i.e., per motion step) in + /// #OptixBuildInputCustomPrimitiveArray::aabbBuffers. + unsigned int numPrimitives; + + /// Stride between AABBs (per motion key). If set to zero, the aabbs are assumed to be tightly + /// packed and the stride is assumed to be sizeof( OptixAabb ). + /// If non-zero, the value must be a multiple of OPTIX_AABB_BUFFER_BYTE_ALIGNMENT. + unsigned int strideInBytes; + + /// Array of flags, to specify flags per sbt record, + /// combinations of OptixGeometryFlags describing the + /// primitive behavior, size must match numSbtRecords + const unsigned int* flags; + + /// Number of sbt records available to the sbt index offset override. + unsigned int numSbtRecords; + + /// Device pointer to per-primitive local sbt index offset buffer. May be NULL. + /// Every entry must be in range [0,numSbtRecords-1]. + /// Size needs to be the number of primitives. + CUdeviceptr sbtIndexOffsetBuffer; + + /// Size of type of the sbt index offset. Needs to be 0, 1, 2 or 4 (8, 16 or 32 bit). + unsigned int sbtIndexOffsetSizeInBytes; + + /// Stride between the index offsets. If set to zero, the offsets are assumed to be tightly + /// packed and the stride matches the size of the type (sbtIndexOffsetSizeInBytes). + unsigned int sbtIndexOffsetStrideInBytes; + + /// Primitive index bias, applied in optixGetPrimitiveIndex(). + /// Sum of primitiveIndexOffset and number of primitive must not overflow 32bits. + unsigned int primitiveIndexOffset; +} OptixBuildInputCustomPrimitiveArray; + +/// Instance and instance pointer inputs +/// +/// \see #OptixBuildInput::instanceArray +typedef struct OptixBuildInputInstanceArray +{ + /// If OptixBuildInput::type is OPTIX_BUILD_INPUT_TYPE_INSTANCE_POINTERS instances and + /// aabbs should be interpreted as arrays of pointers instead of arrays of structs. + /// + /// This pointer must be a multiple of OPTIX_INSTANCE_BYTE_ALIGNMENT if + /// OptixBuildInput::type is OPTIX_BUILD_INPUT_TYPE_INSTANCES. The array elements must + /// be a multiple of OPTIX_INSTANCE_BYTE_ALIGNMENT if OptixBuildInput::type is + /// OPTIX_BUILD_INPUT_TYPE_INSTANCE_POINTERS. + CUdeviceptr instances; + + /// Number of elements in #OptixBuildInputInstanceArray::instances. + unsigned int numInstances; + + /// Only valid for OPTIX_BUILD_INPUT_TYPE_INSTANCE + /// Defines the stride between instances. A stride of 0 indicates a tight packing, i.e., + /// stride = sizeof( OptixInstance ) + unsigned int instanceStride; +} OptixBuildInputInstanceArray; + +/// Instance and instance pointer inputs +/// +/// \see #OptixRelocateInput::instanceArray +typedef struct OptixRelocateInputInstanceArray +{ + /// Number of elements in #OptixRelocateInputInstanceArray::traversableHandles. + /// Must match #OptixBuildInputInstanceArray::numInstances of the source build input. + unsigned int numInstances; + + /// These are the traversable handles of the instances (See OptixInstance::traversableHandle) + /// These can be used when also relocating the instances. No updates to + /// the bounds are performed. Use optixAccelBuild to update the bounds. + /// 'traversableHandles' may be zero when the traversables are not relocated + /// (i.e. relocation of an IAS on the source device). + CUdeviceptr traversableHandles; + +} OptixRelocateInputInstanceArray; + +/// Enum to distinguish the different build input types. +/// +/// \see #OptixBuildInput::type +typedef enum OptixBuildInputType +{ + /// Triangle inputs. \see #OptixBuildInputTriangleArray + OPTIX_BUILD_INPUT_TYPE_TRIANGLES = 0x2141, + /// Custom primitive inputs. \see #OptixBuildInputCustomPrimitiveArray + OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES = 0x2142, + /// Instance inputs. \see #OptixBuildInputInstanceArray + OPTIX_BUILD_INPUT_TYPE_INSTANCES = 0x2143, + /// Instance pointer inputs. \see #OptixBuildInputInstanceArray + OPTIX_BUILD_INPUT_TYPE_INSTANCE_POINTERS = 0x2144, + /// Curve inputs. \see #OptixBuildInputCurveArray + OPTIX_BUILD_INPUT_TYPE_CURVES = 0x2145, + /// Sphere inputs. \see #OptixBuildInputSphereArray + OPTIX_BUILD_INPUT_TYPE_SPHERES = 0x2146 +} OptixBuildInputType; + +/// Build inputs. +/// +/// All of them support motion and the size of the data arrays needs to match the number of motion steps +/// +/// \see #optixAccelComputeMemoryUsage(), #optixAccelBuild() +typedef struct OptixBuildInput +{ + /// The type of the build input. + OptixBuildInputType type; + + union + { + /// Triangle inputs. + OptixBuildInputTriangleArray triangleArray; + /// Curve inputs. + OptixBuildInputCurveArray curveArray; + /// Sphere inputs. + OptixBuildInputSphereArray sphereArray; + /// Custom primitive inputs. + OptixBuildInputCustomPrimitiveArray customPrimitiveArray; + /// Instance and instance pointer inputs. + OptixBuildInputInstanceArray instanceArray; + char pad[1024]; + }; +} OptixBuildInput; + +/// Relocation inputs. +/// +/// \see #optixAccelRelocate() +typedef struct OptixRelocateInput +{ + /// The type of the build input to relocate. + OptixBuildInputType type; + + union + { + /// Instance and instance pointer inputs. + OptixRelocateInputInstanceArray instanceArray; + + /// Triangle inputs. + OptixRelocateInputTriangleArray triangleArray; + + /// Inputs of any of the other types don't require any relocation data. + }; +} OptixRelocateInput; + +// Some 32-bit tools use this header. This static_assert fails for them because +// the default enum size is 4 bytes, rather than 8, under 32-bit compilers. +// This #ifndef allows them to disable the static assert. + +// TODO Define a static assert for C/pre-C++-11 +#if defined( __cplusplus ) && __cplusplus >= 201103L +static_assert( sizeof( OptixBuildInput ) == 8 + 1024, "OptixBuildInput has wrong size" ); +#endif + +/// Flags set on the #OptixInstance::flags. +/// +/// These can be or'ed together to combine multiple flags. +typedef enum OptixInstanceFlags +{ + /// No special flag set + OPTIX_INSTANCE_FLAG_NONE = 0, + + /// Prevent triangles from getting culled due to their orientation. + /// Effectively ignores ray flags + /// OPTIX_RAY_FLAG_CULL_BACK_FACING_TRIANGLES and OPTIX_RAY_FLAG_CULL_FRONT_FACING_TRIANGLES. + OPTIX_INSTANCE_FLAG_DISABLE_TRIANGLE_FACE_CULLING = 1u << 0, + + /// Flip triangle orientation. + /// This affects front/backface culling as well as the reported face in case of a hit. + OPTIX_INSTANCE_FLAG_FLIP_TRIANGLE_FACING = 1u << 1, + + /// Disable anyhit programs for all geometries of the instance. + /// Can be overridden by OPTIX_RAY_FLAG_ENFORCE_ANYHIT. + /// This flag is mutually exclusive with OPTIX_INSTANCE_FLAG_ENFORCE_ANYHIT. + OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT = 1u << 2, + + /// Enables anyhit programs for all geometries of the instance. + /// Overrides OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT + /// Can be overridden by OPTIX_RAY_FLAG_DISABLE_ANYHIT. + /// This flag is mutually exclusive with OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT. + OPTIX_INSTANCE_FLAG_ENFORCE_ANYHIT = 1u << 3, + + + /// Force 4-state opacity micromaps to behave as 2-state opacity micromaps during traversal. + OPTIX_INSTANCE_FLAG_FORCE_OPACITY_MICROMAP_2_STATE = 1u << 4, + /// Don't perform opacity micromap query for this instance. GAS must be built with ALLOW_DISABLE_OPACITY_MICROMAPS for this to be valid. + /// This flag overrides FORCE_OPACTIY_MIXROMAP_2_STATE instance and ray flags. + OPTIX_INSTANCE_FLAG_DISABLE_OPACITY_MICROMAPS = 1u << 5, + +} OptixInstanceFlags; + +/// Instances +/// +/// \see #OptixBuildInputInstanceArray::instances +typedef struct OptixInstance +{ + /// affine object-to-world transformation as 3x4 matrix in row-major layout + float transform[12]; + + /// Application supplied ID. The maximal ID can be queried using OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID. + unsigned int instanceId; + + /// SBT record offset. + /// In a traversable graph with multiple levels of instance acceleration structure (IAS) objects, offsets are summed together. + /// The maximal SBT offset can be queried using OPTIX_DEVICE_PROPERTY_LIMIT_MAX_SBT_OFFSET. + unsigned int sbtOffset; + + /// Visibility mask. If rayMask & instanceMask == 0 the instance is culled. The number of available bits can be + /// queried using OPTIX_DEVICE_PROPERTY_LIMIT_NUM_BITS_INSTANCE_VISIBILITY_MASK. + unsigned int visibilityMask; + + /// Any combination of OptixInstanceFlags is allowed. + unsigned int flags; + + /// Set with an OptixTraversableHandle. + OptixTraversableHandle traversableHandle; + + /// round up to 80-byte, to ensure 16-byte alignment + unsigned int pad[2]; +} OptixInstance; + +/// Builder Options +/// +/// Used for #OptixAccelBuildOptions::buildFlags. Can be or'ed together. +typedef enum OptixBuildFlags +{ + /// No special flags set. + OPTIX_BUILD_FLAG_NONE = 0, + + /// Allow updating the build with new vertex positions with subsequent calls to + /// optixAccelBuild. + OPTIX_BUILD_FLAG_ALLOW_UPDATE = 1u << 0, + + OPTIX_BUILD_FLAG_ALLOW_COMPACTION = 1u << 1, + + /// This flag is mutually exclusive with OPTIX_BUILD_FLAG_PREFER_FAST_BUILD. + OPTIX_BUILD_FLAG_PREFER_FAST_TRACE = 1u << 2, + + /// This flag is mutually exclusive with OPTIX_BUILD_FLAG_PREFER_FAST_TRACE. + OPTIX_BUILD_FLAG_PREFER_FAST_BUILD = 1u << 3, + + /// Allow random access to build input vertices + /// See optixGetTriangleVertexData + /// optixGetLinearCurveVertexData + /// optixGetQuadraticBSplineVertexData + /// optixGetCubicBSplineVertexData + /// optixGetCatmullRomVertexData + /// optixGetRibbonVertexData + /// optixGetRibbonNormal + /// optixGetSphereData + OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS = 1u << 4, + + /// Allow random access to instances + /// See optixGetInstanceTraversableFromIAS + OPTIX_BUILD_FLAG_ALLOW_RANDOM_INSTANCE_ACCESS = 1u << 5, + + /// Support updating the opacity micromap array and opacity micromap indices on refits. + /// May increase AS size and may have a small negative impact on traversal performance. + /// If this flag is absent, all opacity micromap inputs must remain unchanged between the initial AS builds and their subsequent refits. + OPTIX_BUILD_FLAG_ALLOW_OPACITY_MICROMAP_UPDATE = 1u << 6, + + /// If enabled, any instances referencing this GAS are allowed to disable the opacity micromap test through the DISABLE_OPACITY_MICROMAPS flag instance flag. + /// Note that the GAS will not be optimized for the attached opacity micromap Arrays if this flag is set, + /// which may result in reduced traversal performance. + OPTIX_BUILD_FLAG_ALLOW_DISABLE_OPACITY_MICROMAPS = 1u << 7, +} OptixBuildFlags; + + +/// Flags defining behavior of opacity micromaps in a opacity micromap array. +typedef enum OptixOpacityMicromapFlags +{ + OPTIX_OPACITY_MICROMAP_FLAG_NONE = 0, + + /// This flag is mutually exclusive with OPTIX_OPACITY_MICROMAP_FLAG_PREFER_FAST_BUILD. + OPTIX_OPACITY_MICROMAP_FLAG_PREFER_FAST_TRACE = 1 << 0, -#ifndef __optix_optix_types_h__ -#define __optix_optix_types_h__ + /// This flag is mutually exclusive with OPTIX_OPACITY_MICROMAP_FLAG_PREFER_FAST_TRACE. + OPTIX_OPACITY_MICROMAP_FLAG_PREFER_FAST_BUILD = 1 << 1, +} OptixOpacityMicromapFlags; -// clang-format off -#if !defined(__OPTIX_INCLUDE_INTERNAL_HEADERS__) -# define __OPTIX_INCLUDE_INTERNAL_HEADERS__ -# define __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_TYPES_H__ +/// Opacity micromap descriptor. +typedef struct OptixOpacityMicromapDesc +{ + /// Byte offset to opacity micromap in data input buffer of opacity micromap array build + unsigned int byteOffset; + /// Number of micro-triangles is 4^level. Valid levels are [0, 12] + unsigned short subdivisionLevel; + /// OptixOpacityMicromapFormat + unsigned short format; +} OptixOpacityMicromapDesc; + +/// Opacity micromap histogram entry. +/// Specifies how many opacity micromaps of a specific type are input to the opacity micromap array build. +/// Note that while this is similar to OptixOpacityMicromapUsageCount, the histogram entry specifies how many opacity micromaps +/// of a specific type are combined into a opacity micromap array. +typedef struct OptixOpacityMicromapHistogramEntry +{ + /// Number of opacity micromaps with the format and subdivision level that are input to the opacity micromap array build. + unsigned int count; + /// Number of micro-triangles is 4^level. Valid levels are [0, 12]. + unsigned int subdivisionLevel; + /// Opacity micromap format. + OptixOpacityMicromapFormat format; +} OptixOpacityMicromapHistogramEntry; + +/// Inputs to opacity micromap array construction. +typedef struct OptixOpacityMicromapArrayBuildInput +{ + /// Applies to all opacity micromaps in array. + unsigned int flags; + + /// 128B aligned base pointer for raw opacity micromap input data. + CUdeviceptr inputBuffer; + + /// One OptixOpacityMicromapDesc entry per opacity micromap. + /// This device pointer must be a multiple of OPTIX_OPACITY_MICROMAP_DESC_BYTE_ALIGNMENT. + CUdeviceptr perMicromapDescBuffer; + + /// Stride between OptixOpacityMicromapDescs in perOmDescBuffer. + /// If set to zero, the opacity micromap descriptors are assumed to be tightly packed and the stride is assumed to be sizeof( OptixOpacityMicromapDesc ). + /// This stride must be a multiple of OPTIX_OPACITY_MICROMAP_DESC_BYTE_ALIGNMENT. + unsigned int perMicromapDescStrideInBytes; + + /// Number of OptixOpacityMicromapHistogramEntry. + unsigned int numMicromapHistogramEntries; + /// Histogram over opacity micromaps of input format and subdivision combinations. + /// Counts of entries with equal format and subdivision combination (duplicates) are added together. + const OptixOpacityMicromapHistogramEntry* micromapHistogramEntries; +} OptixOpacityMicromapArrayBuildInput; + +/// Conservative memory requirements for building a opacity/displacement micromap array +typedef struct OptixMicromapBufferSizes +{ + size_t outputSizeInBytes; + size_t tempSizeInBytes; +} OptixMicromapBufferSizes; + +/// Buffer inputs for opacity/displacement micromap array builds. +typedef struct OptixMicromapBuffers +{ + /// Output buffer + CUdeviceptr output; + /// Output buffer size + size_t outputSizeInBytes; + /// Temp buffer + CUdeviceptr temp; + /// Temp buffer size + size_t tempSizeInBytes; +} OptixMicromapBuffers; + + +/// Enum to specify the acceleration build operation. +/// +/// Used in OptixAccelBuildOptions, which is then passed to optixAccelBuild and +/// optixAccelComputeMemoryUsage, this enum indicates whether to do a build or an update +/// of the acceleration structure. +/// +/// Acceleration structure updates utilize the same acceleration structure, but with +/// updated bounds. Updates are typically much faster than builds, however, large +/// perturbations can degrade the quality of the acceleration structure. +/// +/// \see #optixAccelComputeMemoryUsage(), #optixAccelBuild(), #OptixAccelBuildOptions +typedef enum OptixBuildOperation +{ + /// Perform a full build operation + OPTIX_BUILD_OPERATION_BUILD = 0x2161, + /// Perform an update using new bounds + OPTIX_BUILD_OPERATION_UPDATE = 0x2162, +} OptixBuildOperation; + +/// Enum to specify motion flags. +/// +/// \see #OptixMotionOptions::flags. +typedef enum OptixMotionFlags +{ + OPTIX_MOTION_FLAG_NONE = 0, + OPTIX_MOTION_FLAG_START_VANISH = 1u << 0, + OPTIX_MOTION_FLAG_END_VANISH = 1u << 1 +} OptixMotionFlags; + +/// Motion options +/// +/// \see #OptixAccelBuildOptions::motionOptions, #OptixMatrixMotionTransform::motionOptions, +/// #OptixSRTMotionTransform::motionOptions +typedef struct OptixMotionOptions +{ + /// If numKeys > 1, motion is enabled. timeBegin, + /// timeEnd and flags are all ignored when motion is disabled. + unsigned short numKeys; + + /// Combinations of #OptixMotionFlags + unsigned short flags; + + /// Point in time where motion starts. Must be lesser than timeEnd. + float timeBegin; + + /// Point in time where motion ends. Must be greater than timeBegin. + float timeEnd; +} OptixMotionOptions; + +/// Build options for acceleration structures. +/// +/// \see #optixAccelComputeMemoryUsage(), #optixAccelBuild() +typedef struct OptixAccelBuildOptions +{ + /// Combinations of OptixBuildFlags + unsigned int buildFlags; + + /// If OPTIX_BUILD_OPERATION_UPDATE the output buffer is assumed to contain the result + /// of a full build with OPTIX_BUILD_FLAG_ALLOW_UPDATE set and using the same number of + /// primitives. It is updated incrementally to reflect the current position of the + /// primitives. + /// If a BLAS has been built with OPTIX_BUILD_FLAG_ALLOW_OPACITY_MICROMAP_UPDATE, new opacity micromap arrays + /// and opacity micromap indices may be provided to the refit. + OptixBuildOperation operation; + + /// Options for motion. + OptixMotionOptions motionOptions; +} OptixAccelBuildOptions; + +/// Struct for querying builder allocation requirements. +/// +/// Once queried the sizes should be used to allocate device memory of at least these sizes. +/// +/// \see #optixAccelComputeMemoryUsage() +typedef struct OptixAccelBufferSizes +{ + /// The size in bytes required for the outputBuffer parameter to optixAccelBuild when + /// doing a build (OPTIX_BUILD_OPERATION_BUILD). + size_t outputSizeInBytes; + + /// The size in bytes required for the tempBuffer paramter to optixAccelBuild when + /// doing a build (OPTIX_BUILD_OPERATION_BUILD). + size_t tempSizeInBytes; + + /// The size in bytes required for the tempBuffer parameter to optixAccelBuild + /// when doing an update (OPTIX_BUILD_OPERATION_UPDATE). This value can be different + /// than tempSizeInBytes used for a full build. Only non-zero if + /// OPTIX_BUILD_FLAG_ALLOW_UPDATE flag is set in OptixAccelBuildOptions. + size_t tempUpdateSizeInBytes; +} OptixAccelBufferSizes; + +/// Properties which can be emitted during acceleration structure build. +/// +/// \see #OptixAccelEmitDesc::type. +typedef enum OptixAccelPropertyType +{ + /// Size of a compacted acceleration structure. The device pointer points to a uint64. + OPTIX_PROPERTY_TYPE_COMPACTED_SIZE = 0x2181, + + /// OptixAabb * numMotionSteps + OPTIX_PROPERTY_TYPE_AABBS = 0x2182, +} OptixAccelPropertyType; + +/// Specifies a type and output destination for emitted post-build properties. +/// +/// \see #optixAccelBuild() +typedef struct OptixAccelEmitDesc +{ + /// Output buffer for the properties + CUdeviceptr result; + + /// Requested property + OptixAccelPropertyType type; +} OptixAccelEmitDesc; + +/// Used to store information related to relocation of optix data structures. +/// +/// \see #optixOpacityMicromapArrayGetRelocationInfo(), #optixOpacityMicromapArrayRelocate(), +/// #optixAccelGetRelocationInfo(), #optixAccelRelocate(), #optixCheckRelocationCompatibility() +typedef struct OptixRelocationInfo +{ + /// Opaque data, used internally, should not be modified + unsigned long long info[4]; +} OptixRelocationInfo; + +/// Static transform +/// +/// The device address of instances of this type must be a multiple of OPTIX_TRANSFORM_BYTE_ALIGNMENT. +/// +/// \see #optixConvertPointerToTraversableHandle() +typedef struct OptixStaticTransform +{ + /// The traversable transformed by this transformation + OptixTraversableHandle child; + + /// Padding to make the transformations 16 byte aligned + unsigned int pad[2]; + + /// Affine object-to-world transformation as 3x4 matrix in row-major layout + float transform[12]; + + /// Affine world-to-object transformation as 3x4 matrix in row-major layout + /// Must be the inverse of the transform matrix + float invTransform[12]; +} OptixStaticTransform; + +/// Represents a matrix motion transformation. +/// +/// The device address of instances of this type must be a multiple of OPTIX_TRANSFORM_BYTE_ALIGNMENT. +/// +/// This struct, as defined here, handles only N=2 motion keys due to the fixed array length of its transform member. +/// The following example shows how to create instances for an arbitrary number N of motion keys: +/// +/// \code +/// float matrixData[N][12]; +/// ... // setup matrixData +/// +/// size_t transformSizeInBytes = sizeof( OptixMatrixMotionTransform ) + ( N-2 ) * 12 * sizeof( float ); +/// OptixMatrixMotionTransform* matrixMoptionTransform = (OptixMatrixMotionTransform*) malloc( transformSizeInBytes ); +/// memset( matrixMoptionTransform, 0, transformSizeInBytes ); +/// +/// ... // setup other members of matrixMoptionTransform +/// matrixMoptionTransform->motionOptions.numKeys/// = N; +/// memcpy( matrixMoptionTransform->transform, matrixData, N * 12 * sizeof( float ) ); +/// +/// ... // copy matrixMoptionTransform to device memory +/// free( matrixMoptionTransform ) +/// \endcode +/// +/// \see #optixConvertPointerToTraversableHandle() +typedef struct OptixMatrixMotionTransform +{ + /// The traversable that is transformed by this transformation + OptixTraversableHandle child; + + /// The motion options for this transformation. + /// Must have at least two motion keys. + OptixMotionOptions motionOptions; + + /// Padding to make the transformation 16 byte aligned + unsigned int pad[3]; + + /// Affine object-to-world transformation as 3x4 matrix in row-major layout + float transform[2][12]; +} OptixMatrixMotionTransform; + +/// Represents an SRT transformation. +/// +/// An SRT transformation can represent a smooth rotation with fewer motion keys than a matrix transformation. Each +/// motion key is constructed from elements taken from a matrix S, a quaternion R, and a translation T. +/// +/// The scaling matrix +/// \f$S = \begin{bmatrix} sx & a & b & pvx \\ 0 & sy & c & pvy \\ 0 & 0 & sz & pvz \end{bmatrix}\f$ +// [ sx a b pvx ] +// S = [ 0 sy c pvy ] +// [ 0 0 sz pvz ] +/// defines an affine transformation that can include scale, shear, and a translation. +/// The translation allows to define the pivot point for the subsequent rotation. +/// +/// The quaternion R = [ qx, qy, qz, qw ] describes a rotation with angular component qw = cos(theta/2) and other +/// components [ qx, qy, qz ] = sin(theta/2) * [ ax, ay, az ] where the axis [ ax, ay, az ] is normalized. +/// +/// The translation matrix +/// \f$T = \begin{bmatrix} 1 & 0 & 0 & tx \\ 0 & 1 & 0 & ty \\ 0 & 0 & 1 & tz \end{bmatrix}\f$ +// [ 1 0 0 tx ] +// T = [ 0 1 0 ty ] +// [ 0 0 1 tz ] +/// defines another translation that is applied after the rotation. Typically, this translation includes +/// the inverse translation from the matrix S to reverse the translation for the pivot point for R. +/// +/// To obtain the effective transformation at time t, the elements of the components of S, R, and T will be interpolated +/// linearly. The components are then multiplied to obtain the combined transformation C = T * R * S. The transformation +/// C is the effective object-to-world transformations at time t, and C^(-1) is the effective world-to-object +/// transformation at time t. +/// +/// \see #OptixSRTMotionTransform::srtData, #optixConvertPointerToTraversableHandle() +typedef struct OptixSRTData +{ + /// \name Parameters describing the SRT transformation + /// @{ + float sx, a, b, pvx, sy, c, pvy, sz, pvz, qx, qy, qz, qw, tx, ty, tz; + /// @} +} OptixSRTData; + +// TODO Define a static assert for C/pre-C++-11 +#if defined( __cplusplus ) && __cplusplus >= 201103L +static_assert( sizeof( OptixSRTData ) == 16 * 4, "OptixSRTData has wrong size" ); #endif -#include "optix_7_types.h" -#if defined( __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_TYPES_H__ ) -# undef __OPTIX_INCLUDE_INTERNAL_HEADERS__ -# undef __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_TYPES_H__ + +/// Represents an SRT motion transformation. +/// +/// The device address of instances of this type must be a multiple of OPTIX_TRANSFORM_BYTE_ALIGNMENT. +/// +/// This struct, as defined here, handles only N=2 motion keys due to the fixed array length of its srtData member. +/// The following example shows how to create instances for an arbitrary number N of motion keys: +/// +/// \code +/// OptixSRTData srtData[N]; +/// ... // setup srtData +/// +/// size_t transformSizeInBytes = sizeof( OptixSRTMotionTransform ) + ( N-2 ) * sizeof( OptixSRTData ); +/// OptixSRTMotionTransform* srtMotionTransform = (OptixSRTMotionTransform*) malloc( transformSizeInBytes ); +/// memset( srtMotionTransform, 0, transformSizeInBytes ); +/// +/// ... // setup other members of srtMotionTransform +/// srtMotionTransform->motionOptions.numKeys = N; +/// memcpy( srtMotionTransform->srtData, srtData, N * sizeof( OptixSRTData ) ); +/// +/// ... // copy srtMotionTransform to device memory +/// free( srtMotionTransform ) +/// \endcode +/// +/// \see #optixConvertPointerToTraversableHandle() +typedef struct OptixSRTMotionTransform +{ + /// The traversable transformed by this transformation + OptixTraversableHandle child; + + /// The motion options for this transformation + /// Must have at least two motion keys. + OptixMotionOptions motionOptions; + + /// Padding to make the SRT data 16 byte aligned + unsigned int pad[3]; + + /// The actual SRT data describing the transformation + OptixSRTData srtData[2]; +} OptixSRTMotionTransform; + +// TODO Define a static assert for C/pre-C++-11 +#if defined( __cplusplus ) && __cplusplus >= 201103L +static_assert( sizeof( OptixSRTMotionTransform ) == 8 + 12 + 12 + 2 * 16 * 4, "OptixSRTMotionTransform has wrong size" ); +#endif + +/// Traversable Handles +/// +/// \see #optixConvertPointerToTraversableHandle() +typedef enum OptixTraversableType +{ + /// Static transforms. \see #OptixStaticTransform + OPTIX_TRAVERSABLE_TYPE_STATIC_TRANSFORM = 0x21C1, + /// Matrix motion transform. \see #OptixMatrixMotionTransform + OPTIX_TRAVERSABLE_TYPE_MATRIX_MOTION_TRANSFORM = 0x21C2, + /// SRT motion transform. \see #OptixSRTMotionTransform + OPTIX_TRAVERSABLE_TYPE_SRT_MOTION_TRANSFORM = 0x21C3, +} OptixTraversableType; + +/// Pixel formats used by the denoiser. +/// +/// \see #OptixImage2D::format +typedef enum OptixPixelFormat +{ + OPTIX_PIXEL_FORMAT_HALF1 = 0x220a, ///< one half + OPTIX_PIXEL_FORMAT_HALF2 = 0x2207, ///< two halfs, XY + OPTIX_PIXEL_FORMAT_HALF3 = 0x2201, ///< three halfs, RGB + OPTIX_PIXEL_FORMAT_HALF4 = 0x2202, ///< four halfs, RGBA + OPTIX_PIXEL_FORMAT_FLOAT1 = 0x220b, ///< one float + OPTIX_PIXEL_FORMAT_FLOAT2 = 0x2208, ///< two floats, XY + OPTIX_PIXEL_FORMAT_FLOAT3 = 0x2203, ///< three floats, RGB + OPTIX_PIXEL_FORMAT_FLOAT4 = 0x2204, ///< four floats, RGBA + OPTIX_PIXEL_FORMAT_UCHAR3 = 0x2205, ///< three unsigned chars, RGB + OPTIX_PIXEL_FORMAT_UCHAR4 = 0x2206, ///< four unsigned chars, RGBA + OPTIX_PIXEL_FORMAT_INTERNAL_GUIDE_LAYER = 0x2209 ///< internal format +} OptixPixelFormat; + +/// Image descriptor used by the denoiser. +/// +/// \see #optixDenoiserInvoke(), #optixDenoiserComputeIntensity() +typedef struct OptixImage2D +{ + /// Pointer to the actual pixel data. + CUdeviceptr data; + /// Width of the image (in pixels) + unsigned int width; + /// Height of the image (in pixels) + unsigned int height; + /// Stride between subsequent rows of the image (in bytes). + unsigned int rowStrideInBytes; + /// Stride between subsequent pixels of the image (in bytes). + /// If set to 0, dense packing (no gaps) is assumed. + /// For pixel format OPTIX_PIXEL_FORMAT_INTERNAL_GUIDE_LAYER it must be set to + /// OptixDenoiserSizes::internalGuideLayerPixelSizeInBytes. + unsigned int pixelStrideInBytes; + /// Pixel format. + OptixPixelFormat format; +} OptixImage2D; + +/// Model kind used by the denoiser. +/// +/// \see #optixDenoiserCreate +typedef enum OptixDenoiserModelKind +{ + /// Use the built-in model appropriate for low dynamic range input. + OPTIX_DENOISER_MODEL_KIND_LDR = 0x2322, + + /// Use the built-in model appropriate for high dynamic range input. + OPTIX_DENOISER_MODEL_KIND_HDR = 0x2323, + + /// Use the built-in model appropriate for high dynamic range input and support for AOVs + OPTIX_DENOISER_MODEL_KIND_AOV = 0x2324, + + /// Use the built-in model appropriate for high dynamic range input, temporally stable + OPTIX_DENOISER_MODEL_KIND_TEMPORAL = 0x2325, + + /// Use the built-in model appropriate for high dynamic range input and support for AOVs, temporally stable + OPTIX_DENOISER_MODEL_KIND_TEMPORAL_AOV = 0x2326, + + /// Use the built-in model appropriate for high dynamic range input and support for AOVs, upscaling 2x + OPTIX_DENOISER_MODEL_KIND_UPSCALE2X = 0x2327, + + /// Use the built-in model appropriate for high dynamic range input and support for AOVs, upscaling 2x, + /// temporally stable + OPTIX_DENOISER_MODEL_KIND_TEMPORAL_UPSCALE2X = 0x2328 +} OptixDenoiserModelKind; + +/// Options used by the denoiser +/// +/// \see #optixDenoiserCreate() +typedef struct OptixDenoiserOptions +{ + // if nonzero, albedo image must be given in OptixDenoiserGuideLayer + unsigned int guideAlbedo; + + // if nonzero, normal image must be given in OptixDenoiserGuideLayer + unsigned int guideNormal; +} OptixDenoiserOptions; + +/// Guide layer for the denoiser +/// +/// \see #optixDenoiserInvoke() +typedef struct OptixDenoiserGuideLayer +{ + // albedo/bsdf image + OptixImage2D albedo; + + // normal vector image (2d or 3d pixel format) + OptixImage2D normal; + + // 2d flow image, pixel flow from previous to current frame for each pixel + OptixImage2D flow; + + // Internal images used in temporal AOV denoising modes, + // pixel format OPTIX_PIXEL_FORMAT_INTERNAL_GUIDE_LAYER + OptixImage2D previousOutputInternalGuideLayer; + OptixImage2D outputInternalGuideLayer; + + // 1d image specifying how trustworthy the flow vector at x,y position in + // OptixDenoiserGuideLayer::flow is. Range 0..1 (low->high trustworthiness). + // Ignored if data pointer in the image is zero. + OptixImage2D flowTrustworthiness; + +} OptixDenoiserGuideLayer; + +/// AOV type used by the denoiser +/// +typedef enum OptixDenoiserAOVType +{ + /// Unspecified AOV type + OPTIX_DENOISER_AOV_TYPE_NONE = 0, + + OPTIX_DENOISER_AOV_TYPE_BEAUTY = 0x7000, + OPTIX_DENOISER_AOV_TYPE_SPECULAR = 0x7001, + OPTIX_DENOISER_AOV_TYPE_REFLECTION = 0x7002, + OPTIX_DENOISER_AOV_TYPE_REFRACTION = 0x7003, + OPTIX_DENOISER_AOV_TYPE_DIFFUSE = 0x7004 + +} OptixDenoiserAOVType; + +/// Input/Output layers for the denoiser +/// +/// \see #optixDenoiserInvoke() +typedef struct OptixDenoiserLayer +{ + // input image (beauty or AOV) + OptixImage2D input; + + // denoised output image from previous frame if temporal model kind selected + OptixImage2D previousOutput; + + // denoised output for given input + OptixImage2D output; + + // Type of AOV, used in temporal AOV modes as a hint to improve image quality. + OptixDenoiserAOVType type; +} OptixDenoiserLayer; + +/// Various parameters used by the denoiser +/// +/// \see #optixDenoiserInvoke() +/// \see #optixDenoiserComputeIntensity() +/// \see #optixDenoiserComputeAverageColor() +typedef enum OptixDenoiserAlphaMode +{ + /// Copy alpha (if present) from input layer, no denoising. + OPTIX_DENOISER_ALPHA_MODE_COPY = 0, + + /// Denoise alpha separately. With AOV model kinds, treat alpha like an AOV. + OPTIX_DENOISER_ALPHA_MODE_ALPHA_AS_AOV = 1, + + /// With AOV model kinds, full denoise pass with alpha. + /// This is slower than OPTIX_DENOISER_ALPHA_MODE_ALPHA_AS_AOV. + OPTIX_DENOISER_ALPHA_MODE_FULL_DENOISE_PASS = 2 +} OptixDenoiserAlphaMode; +typedef struct OptixDenoiserParams +{ + /// alpha denoise mode + OptixDenoiserAlphaMode denoiseAlpha; + + /// average log intensity of input image (default null pointer). points to a single float. + /// with the default (null pointer) denoised results will not be optimal for very dark or + /// bright input images. + CUdeviceptr hdrIntensity; + + /// blend factor. + /// If set to 0 the output is 100% of the denoised input. If set to 1, the output is 100% of + /// the unmodified input. Values between 0 and 1 will linearly interpolate between the denoised + /// and unmodified input. + float blendFactor; + + /// this parameter is used when the OPTIX_DENOISER_MODEL_KIND_AOV model kind is set. + /// average log color of input image, separate for RGB channels (default null pointer). + /// points to three floats. with the default (null pointer) denoised results will not be + /// optimal. + CUdeviceptr hdrAverageColor; + + /// In temporal modes this parameter must be set to 1 if previous layers (e.g. + /// previousOutputInternalGuideLayer) contain valid data. This is the case in the + /// second and subsequent frames of a sequence (for example after a change of camera + /// angle). In the first frame of such a sequence this parameter must be set to 0. + unsigned int temporalModeUsePreviousLayers; +} OptixDenoiserParams; + +/// Various sizes related to the denoiser. +/// +/// \see #optixDenoiserComputeMemoryResources() +typedef struct OptixDenoiserSizes +{ + /// Size of state memory passed to #optixDenoiserSetup, #optixDenoiserInvoke. + size_t stateSizeInBytes; + + /// Size of scratch memory passed to #optixDenoiserSetup, #optixDenoiserInvoke. + /// Overlap added to dimensions passed to #optixDenoiserComputeMemoryResources. + size_t withOverlapScratchSizeInBytes; + + /// Size of scratch memory passed to #optixDenoiserSetup, #optixDenoiserInvoke. + /// No overlap added. + size_t withoutOverlapScratchSizeInBytes; + + /// Overlap on all four tile sides. + unsigned int overlapWindowSizeInPixels; + + /// Size of scratch memory passed to #optixDenoiserComputeAverageColor. + /// The size is independent of the tile/image resolution. + size_t computeAverageColorSizeInBytes; + + /// Size of scratch memory passed to #optixDenoiserComputeIntensity. + /// The size is independent of the tile/image resolution. + size_t computeIntensitySizeInBytes; + + /// Number of bytes for each pixel in internal guide layers. + size_t internalGuideLayerPixelSizeInBytes; +} OptixDenoiserSizes; + +/// Ray flags passed to the device function #optixTrace(). These affect the behavior of +/// traversal per invocation. +/// +/// \see #optixTrace() +typedef enum OptixRayFlags +{ + /// No change from the behavior configured for the individual AS. + OPTIX_RAY_FLAG_NONE = 0u, + + /// Disables anyhit programs for the ray. + /// Overrides OPTIX_INSTANCE_FLAG_ENFORCE_ANYHIT. + /// This flag is mutually exclusive with OPTIX_RAY_FLAG_ENFORCE_ANYHIT, + /// OPTIX_RAY_FLAG_CULL_DISABLED_ANYHIT, OPTIX_RAY_FLAG_CULL_ENFORCED_ANYHIT. + OPTIX_RAY_FLAG_DISABLE_ANYHIT = 1u << 0, + + /// Forces anyhit program execution for the ray. + /// Overrides OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT as well as OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT. + /// This flag is mutually exclusive with OPTIX_RAY_FLAG_DISABLE_ANYHIT, + /// OPTIX_RAY_FLAG_CULL_DISABLED_ANYHIT, OPTIX_RAY_FLAG_CULL_ENFORCED_ANYHIT. + OPTIX_RAY_FLAG_ENFORCE_ANYHIT = 1u << 1, + + /// Terminates the ray after the first hit and executes + /// the closesthit program of that hit. + OPTIX_RAY_FLAG_TERMINATE_ON_FIRST_HIT = 1u << 2, + + /// Disables closesthit programs for the ray, but still executes miss program in case of a miss. + OPTIX_RAY_FLAG_DISABLE_CLOSESTHIT = 1u << 3, + + /// Do not intersect triangle back faces + /// (respects a possible face change due to instance flag + /// OPTIX_INSTANCE_FLAG_FLIP_TRIANGLE_FACING). + /// This flag is mutually exclusive with OPTIX_RAY_FLAG_CULL_FRONT_FACING_TRIANGLES. + OPTIX_RAY_FLAG_CULL_BACK_FACING_TRIANGLES = 1u << 4, + + /// Do not intersect triangle front faces + /// (respects a possible face change due to instance flag + /// OPTIX_INSTANCE_FLAG_FLIP_TRIANGLE_FACING). + /// This flag is mutually exclusive with OPTIX_RAY_FLAG_CULL_BACK_FACING_TRIANGLES. + OPTIX_RAY_FLAG_CULL_FRONT_FACING_TRIANGLES = 1u << 5, + + /// Do not intersect geometry which disables anyhit programs + /// (due to setting geometry flag OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT or + /// instance flag OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT). + /// This flag is mutually exclusive with OPTIX_RAY_FLAG_CULL_ENFORCED_ANYHIT, + /// OPTIX_RAY_FLAG_ENFORCE_ANYHIT, OPTIX_RAY_FLAG_DISABLE_ANYHIT. + OPTIX_RAY_FLAG_CULL_DISABLED_ANYHIT = 1u << 6, + + /// Do not intersect geometry which have an enabled anyhit program + /// (due to not setting geometry flag OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT or + /// setting instance flag OPTIX_INSTANCE_FLAG_ENFORCE_ANYHIT). + /// This flag is mutually exclusive with OPTIX_RAY_FLAG_CULL_DISABLED_ANYHIT, + /// OPTIX_RAY_FLAG_ENFORCE_ANYHIT, OPTIX_RAY_FLAG_DISABLE_ANYHIT. + OPTIX_RAY_FLAG_CULL_ENFORCED_ANYHIT = 1u << 7, + + /// Force 4-state opacity micromaps to behave as 2-state opactiy micromaps during traversal. + OPTIX_RAY_FLAG_FORCE_OPACITY_MICROMAP_2_STATE = 1u << 10, +} OptixRayFlags; + +/// Transform +/// +/// OptixTransformType is used by the device function #optixGetTransformTypeFromHandle() to +/// determine the type of the OptixTraversableHandle returned from +/// optixGetTransformListHandle(). +typedef enum OptixTransformType +{ + OPTIX_TRANSFORM_TYPE_NONE = 0, ///< Not a transformation + OPTIX_TRANSFORM_TYPE_STATIC_TRANSFORM = 1, ///< \see #OptixStaticTransform + OPTIX_TRANSFORM_TYPE_MATRIX_MOTION_TRANSFORM = 2, ///< \see #OptixMatrixMotionTransform + OPTIX_TRANSFORM_TYPE_SRT_MOTION_TRANSFORM = 3, ///< \see #OptixSRTMotionTransform + OPTIX_TRANSFORM_TYPE_INSTANCE = 4, ///< \see #OptixInstance +} OptixTransformType; + +/// Specifies the set of valid traversable graphs that may be +/// passed to invocation of #optixTrace(). Flags may be bitwise combined. +typedef enum OptixTraversableGraphFlags +{ + /// Used to signal that any traversable graphs is valid. + /// This flag is mutually exclusive with all other flags. + OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY = 0, + + /// Used to signal that a traversable graph of a single Geometry Acceleration + /// Structure (GAS) without any transforms is valid. This flag may be combined with + /// other flags except for OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY. + OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_GAS = 1u << 0, + + /// Used to signal that a traversable graph of a single Instance Acceleration + /// Structure (IAS) directly connected to Geometry Acceleration Structure (GAS) + /// traversables without transform traversables in between is valid. This flag may + /// be combined with other flags except for OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY. + OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_LEVEL_INSTANCING = 1u << 1, +} OptixTraversableGraphFlags; + +/// Optimization levels +/// +/// \see #OptixModuleCompileOptions::optLevel +typedef enum OptixCompileOptimizationLevel +{ + /// Default is to run all optimizations + OPTIX_COMPILE_OPTIMIZATION_DEFAULT = 0, + /// No optimizations + OPTIX_COMPILE_OPTIMIZATION_LEVEL_0 = 0x2340, + /// Some optimizations + OPTIX_COMPILE_OPTIMIZATION_LEVEL_1 = 0x2341, + /// Most optimizations + OPTIX_COMPILE_OPTIMIZATION_LEVEL_2 = 0x2342, + /// All optimizations + OPTIX_COMPILE_OPTIMIZATION_LEVEL_3 = 0x2343, +} OptixCompileOptimizationLevel; + +/// Debug levels +/// +/// \see #OptixModuleCompileOptions::debugLevel +typedef enum OptixCompileDebugLevel +{ + /// Default currently is minimal + OPTIX_COMPILE_DEBUG_LEVEL_DEFAULT = 0, + /// No debug information + OPTIX_COMPILE_DEBUG_LEVEL_NONE = 0x2350, + /// Generate information that does not impact performance. + /// Note this replaces OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO. + OPTIX_COMPILE_DEBUG_LEVEL_MINIMAL = 0x2351, + /// Generate some debug information with slight performance cost + OPTIX_COMPILE_DEBUG_LEVEL_MODERATE = 0x2353, + /// Generate full debug information + OPTIX_COMPILE_DEBUG_LEVEL_FULL = 0x2352, +} OptixCompileDebugLevel; + +/// Module compilation state. +/// +/// \see #optixModuleGetCompilationState(), #optixModuleCreateWithTasks() +typedef enum OptixModuleCompileState +{ + /// No OptixTask objects have started + OPTIX_MODULE_COMPILE_STATE_NOT_STARTED = 0x2360, + + /// Started, but not all OptixTask objects have completed. No detected failures. + OPTIX_MODULE_COMPILE_STATE_STARTED = 0x2361, + + /// Not all OptixTask objects have completed, but at least one has failed. + OPTIX_MODULE_COMPILE_STATE_IMPENDING_FAILURE = 0x2362, + + /// All OptixTask objects have completed, and at least one has failed + OPTIX_MODULE_COMPILE_STATE_FAILED = 0x2363, + + /// All OptixTask objects have completed. The OptixModule is ready to be used. + OPTIX_MODULE_COMPILE_STATE_COMPLETED = 0x2364, +} OptixModuleCompileState; + + + +/// Struct for specifying specializations for pipelineParams as specified in +/// OptixPipelineCompileOptions::pipelineLaunchParamsVariableName. +/// +/// The bound values are supposed to represent a constant value in the +/// pipelineParams. OptiX will attempt to locate all loads from the pipelineParams and +/// correlate them to the appropriate bound value, but there are cases where OptiX cannot +/// safely or reliably do this. For example if the pointer to the pipelineParams is passed +/// as an argument to a non-inline function or the offset of the load to the +/// pipelineParams cannot be statically determined (e.g. accessed in a loop). No module +/// should rely on the value being specialized in order to work correctly. The values in +/// the pipelineParams specified on optixLaunch should match the bound value. If +/// validation mode is enabled on the context, OptiX will verify that the bound values +/// specified matches the values in pipelineParams specified to optixLaunch. +/// +/// These values are compiled in to the module as constants. Once the constants are +/// inserted into the code, an optimization pass will be run that will attempt to +/// propagate the consants and remove unreachable code. +/// +/// If caching is enabled, changes in these values will result in newly compiled modules. +/// +/// The pipelineParamOffset and sizeInBytes must be within the bounds of the +/// pipelineParams variable. OPTIX_ERROR_INVALID_VALUE will be returned from +/// optixModuleCreate otherwise. +/// +/// If more than one bound value overlaps or the size of a bound value is equal to 0, +/// an OPTIX_ERROR_INVALID_VALUE will be returned from optixModuleCreate. +/// +/// The same set of bound values do not need to be used for all modules in a pipeline, but +/// overlapping values between modules must have the same value. +/// OPTIX_ERROR_INVALID_VALUE will be returned from optixPipelineCreate otherwise. +/// +/// \see #OptixModuleCompileOptions +typedef struct OptixModuleCompileBoundValueEntry { + size_t pipelineParamOffsetInBytes; + size_t sizeInBytes; + const void* boundValuePtr; + const char* annotation; // optional string to display, set to 0 if unused. If unused, + // OptiX will report the annotation as "No annotation" +} OptixModuleCompileBoundValueEntry; + +/// Payload type identifiers. +typedef enum OptixPayloadTypeID { + OPTIX_PAYLOAD_TYPE_DEFAULT = 0, + OPTIX_PAYLOAD_TYPE_ID_0 = (1 << 0u), + OPTIX_PAYLOAD_TYPE_ID_1 = (1 << 1u), + OPTIX_PAYLOAD_TYPE_ID_2 = (1 << 2u), + OPTIX_PAYLOAD_TYPE_ID_3 = (1 << 3u), + OPTIX_PAYLOAD_TYPE_ID_4 = (1 << 4u), + OPTIX_PAYLOAD_TYPE_ID_5 = (1 << 5u), + OPTIX_PAYLOAD_TYPE_ID_6 = (1 << 6u), + OPTIX_PAYLOAD_TYPE_ID_7 = (1 << 7u) +} OptixPayloadTypeID; + +/// Semantic flags for a single payload word. +/// +/// Used to specify the semantics of a payload word per shader type. +/// "read": Shader of this type may read the payload word. +/// "write": Shader of this type may write the payload word. +/// +/// "trace_caller_write": Shaders may consume the value of the payload word passed to optixTrace by the caller. +/// "trace_caller_read": The caller to optixTrace may read the payload word after the call to optixTrace. +/// +/// Semantics can be bitwise combined. +/// Combining "read" and "write" is equivalent to specifying "read_write". +/// A payload needs to be writable by the caller or at least one shader type. +/// A payload needs to be readable by the caller or at least one shader type after a being writable. +typedef enum OptixPayloadSemantics +{ + OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_NONE = 0, + OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ = 1u << 0, + OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_WRITE = 2u << 0, + OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ_WRITE = 3u << 0, + + OPTIX_PAYLOAD_SEMANTICS_CH_NONE = 0, + OPTIX_PAYLOAD_SEMANTICS_CH_READ = 1u << 2, + OPTIX_PAYLOAD_SEMANTICS_CH_WRITE = 2u << 2, + OPTIX_PAYLOAD_SEMANTICS_CH_READ_WRITE = 3u << 2, + + OPTIX_PAYLOAD_SEMANTICS_MS_NONE = 0, + OPTIX_PAYLOAD_SEMANTICS_MS_READ = 1u << 4, + OPTIX_PAYLOAD_SEMANTICS_MS_WRITE = 2u << 4, + OPTIX_PAYLOAD_SEMANTICS_MS_READ_WRITE = 3u << 4, + + OPTIX_PAYLOAD_SEMANTICS_AH_NONE = 0, + OPTIX_PAYLOAD_SEMANTICS_AH_READ = 1u << 6, + OPTIX_PAYLOAD_SEMANTICS_AH_WRITE = 2u << 6, + OPTIX_PAYLOAD_SEMANTICS_AH_READ_WRITE = 3u << 6, + + OPTIX_PAYLOAD_SEMANTICS_IS_NONE = 0, + OPTIX_PAYLOAD_SEMANTICS_IS_READ = 1u << 8, + OPTIX_PAYLOAD_SEMANTICS_IS_WRITE = 2u << 8, + OPTIX_PAYLOAD_SEMANTICS_IS_READ_WRITE = 3u << 8, +} OptixPayloadSemantics; + +/// Specifies a single payload type +typedef struct OptixPayloadType +{ + /// The number of 32b words the payload of this type holds + unsigned int numPayloadValues; + + /// Points to host array of payload word semantics, size must match numPayloadValues + const unsigned int *payloadSemantics; +} OptixPayloadType; + +/// Compilation options for module +/// +/// \see #optixModuleCreate() +typedef struct OptixModuleCompileOptions +{ + /// Maximum number of registers allowed when compiling to SASS. + /// Set to 0 for no explicit limit. May vary within a pipeline. + int maxRegisterCount; + + /// Optimization level. May vary within a pipeline. + OptixCompileOptimizationLevel optLevel; + + /// Generate debug information. + OptixCompileDebugLevel debugLevel; + + /// Ingored if numBoundValues is set to 0 + const OptixModuleCompileBoundValueEntry* boundValues; + + /// set to 0 if unused + unsigned int numBoundValues; + + /// The number of different payload types available for compilation. + /// Must be zero if OptixPipelineCompileOptions::numPayloadValues is not zero. + unsigned int numPayloadTypes; + + /// Points to host array of payload type definitions, size must match numPayloadTypes + OptixPayloadType *payloadTypes; + +} OptixModuleCompileOptions; + +/// Distinguishes different kinds of program groups. +typedef enum OptixProgramGroupKind +{ + /// Program group containing a raygen (RG) program + /// \see #OptixProgramGroupSingleModule, #OptixProgramGroupDesc::raygen + OPTIX_PROGRAM_GROUP_KIND_RAYGEN = 0x2421, + + /// Program group containing a miss (MS) program + /// \see #OptixProgramGroupSingleModule, #OptixProgramGroupDesc::miss + OPTIX_PROGRAM_GROUP_KIND_MISS = 0x2422, + + /// Program group containing an exception (EX) program + /// \see OptixProgramGroupHitgroup, #OptixProgramGroupDesc::exception + OPTIX_PROGRAM_GROUP_KIND_EXCEPTION = 0x2423, + + /// Program group containing an intersection (IS), any hit (AH), and/or closest hit (CH) program + /// \see #OptixProgramGroupSingleModule, #OptixProgramGroupDesc::hitgroup + OPTIX_PROGRAM_GROUP_KIND_HITGROUP = 0x2424, + + /// Program group containing a direct (DC) or continuation (CC) callable program + /// \see OptixProgramGroupCallables, #OptixProgramGroupDesc::callables + OPTIX_PROGRAM_GROUP_KIND_CALLABLES = 0x2425 +} OptixProgramGroupKind; + +/// Flags for program groups +typedef enum OptixProgramGroupFlags +{ + /// Currently there are no flags + OPTIX_PROGRAM_GROUP_FLAGS_NONE = 0 +} OptixProgramGroupFlags; + +/// Program group representing a single module. +/// +/// Used for raygen, miss, and exception programs. In case of raygen and exception programs, module and entry +/// function name need to be valid. For miss programs, module and entry function name might both be \c nullptr. +/// +/// \see #OptixProgramGroupDesc::raygen, #OptixProgramGroupDesc::miss, #OptixProgramGroupDesc::exception +typedef struct OptixProgramGroupSingleModule +{ + /// Module holding single program. + OptixModule module; + /// Entry function name of the single program. + const char* entryFunctionName; +} OptixProgramGroupSingleModule; + +/// Program group representing the hitgroup. +/// +/// For each of the three program types, module and entry function name might both be \c nullptr. +/// +/// \see #OptixProgramGroupDesc::hitgroup +typedef struct OptixProgramGroupHitgroup +{ + /// Module holding the closest hit (CH) program. + OptixModule moduleCH; + /// Entry function name of the closest hit (CH) program. + const char* entryFunctionNameCH; + /// Module holding the any hit (AH) program. + OptixModule moduleAH; + /// Entry function name of the any hit (AH) program. + const char* entryFunctionNameAH; + /// Module holding the intersection (Is) program. + OptixModule moduleIS; + /// Entry function name of the intersection (IS) program. + const char* entryFunctionNameIS; +} OptixProgramGroupHitgroup; + +/// Program group representing callables. +/// +/// Module and entry function name need to be valid for at least one of the two callables. +/// +/// \see ##OptixProgramGroupDesc::callables +typedef struct OptixProgramGroupCallables +{ + /// Module holding the direct callable (DC) program. + OptixModule moduleDC; + /// Entry function name of the direct callable (DC) program. + const char* entryFunctionNameDC; + /// Module holding the continuation callable (CC) program. + OptixModule moduleCC; + /// Entry function name of the continuation callable (CC) program. + const char* entryFunctionNameCC; +} OptixProgramGroupCallables; + +/// Descriptor for program groups. +typedef struct OptixProgramGroupDesc +{ + /// The kind of program group. + OptixProgramGroupKind kind; + + /// See #OptixProgramGroupFlags + unsigned int flags; + + union + { + /// \see #OPTIX_PROGRAM_GROUP_KIND_RAYGEN + OptixProgramGroupSingleModule raygen; + /// \see #OPTIX_PROGRAM_GROUP_KIND_MISS + OptixProgramGroupSingleModule miss; + /// \see #OPTIX_PROGRAM_GROUP_KIND_EXCEPTION + OptixProgramGroupSingleModule exception; + /// \see #OPTIX_PROGRAM_GROUP_KIND_CALLABLES + OptixProgramGroupCallables callables; + /// \see #OPTIX_PROGRAM_GROUP_KIND_HITGROUP + OptixProgramGroupHitgroup hitgroup; + }; +} OptixProgramGroupDesc; + +/// Program group options +/// +/// \see #optixProgramGroupCreate() +typedef struct OptixProgramGroupOptions +{ + /// Specifies the payload type of this program group. + /// All programs in the group must support the payload type + /// (Program support for a type is specified by calling + /// \see #optixSetPayloadTypes or otherwise all types specified in + /// \see #OptixModuleCompileOptions are supported). + /// If a program is not available for the requested payload type, + /// optixProgramGroupCreate returns OPTIX_ERROR_PAYLOAD_TYPE_MISMATCH. + /// If the payloadType is left zero, a unique type is deduced. + /// The payload type can be uniquely deduced if there is exactly one payload type + /// for which all programs in the group are available. + /// If the payload type could not be deduced uniquely + /// optixProgramGroupCreate returns OPTIX_ERROR_PAYLOAD_TYPE_RESOLUTION_FAILED. + OptixPayloadType* payloadType; +} OptixProgramGroupOptions; + +/// The following values are used to indicate which exception was thrown. +typedef enum OptixExceptionCodes +{ + /// Stack overflow of the continuation stack. + /// no exception details. + OPTIX_EXCEPTION_CODE_STACK_OVERFLOW = -1, + + /// The trace depth is exceeded. + /// no exception details. + OPTIX_EXCEPTION_CODE_TRACE_DEPTH_EXCEEDED = -2, + + /// The traversal depth is exceeded. + /// Exception details: + /// optixGetTransformListSize() + /// optixGetTransformListHandle() + OPTIX_EXCEPTION_CODE_TRAVERSAL_DEPTH_EXCEEDED = -3, + + /// Traversal encountered an invalid traversable type. + /// Exception details: + /// optixGetTransformListSize() + /// optixGetTransformListHandle() + /// optixGetExceptionInvalidTraversable() + OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_TRAVERSABLE = -5, + + /// The miss SBT record index is out of bounds + /// A miss SBT record index is valid within the range [0, OptixShaderBindingTable::missRecordCount) (See optixLaunch) + /// Exception details: + /// optixGetExceptionInvalidSbtOffset() + OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_MISS_SBT = -6, + + /// The traversal hit SBT record index out of bounds. + /// + /// A traversal hit SBT record index is valid within the range [0, OptixShaderBindingTable::hitgroupRecordCount) (See optixLaunch) + /// The following formula relates the + // sbt-index (See optixGetExceptionInvalidSbtOffset), + // sbt-instance-offset (See OptixInstance::sbtOffset), + /// sbt-geometry-acceleration-structure-index (See optixGetSbtGASIndex), + /// sbt-stride-from-trace-call and sbt-offset-from-trace-call (See optixTrace) + /// + /// sbt-index = sbt-instance-offset + (sbt-geometry-acceleration-structure-index * sbt-stride-from-trace-call) + sbt-offset-from-trace-call + /// + /// Exception details: + /// optixGetTransformListSize() + /// optixGetTransformListHandle() + /// optixGetExceptionInvalidSbtOffset() + /// optixGetSbtGASIndex() + OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_HIT_SBT = -7, + + /// The shader encountered an unsupported primitive type (See OptixPipelineCompileOptions::usesPrimitiveTypeFlags). + /// no exception details. + OPTIX_EXCEPTION_CODE_UNSUPPORTED_PRIMITIVE_TYPE = -8, + + /// The shader encountered a call to optixTrace with at least + /// one of the float arguments being inf or nan, or the tmin argument is negative. + /// Exception details: + /// optixGetExceptionInvalidRay() + OPTIX_EXCEPTION_CODE_INVALID_RAY = -9, + + /// The shader encountered a call to either optixDirectCall or optixCallableCall + /// where the argument count does not match the parameter count of the callable + /// program which is called. + /// Exception details: + /// optixGetExceptionParameterMismatch + OPTIX_EXCEPTION_CODE_CALLABLE_PARAMETER_MISMATCH = -10, + + /// The invoked builtin IS does not match the current GAS + OPTIX_EXCEPTION_CODE_BUILTIN_IS_MISMATCH = -11, + + /// Tried to call a callable program using an SBT offset that is larger + /// than the number of passed in callable SBT records. + /// Exception details: + /// optixGetExceptionInvalidSbtOffset() + OPTIX_EXCEPTION_CODE_CALLABLE_INVALID_SBT = -12, + + /// Tried to call a direct callable using an SBT offset of a record that + /// was built from a program group that did not include a direct callable. + OPTIX_EXCEPTION_CODE_CALLABLE_NO_DC_SBT_RECORD = -13, + + /// Tried to call a continuation callable using an SBT offset of a record + /// that was built from a program group that did not include a continuation callable. + OPTIX_EXCEPTION_CODE_CALLABLE_NO_CC_SBT_RECORD = -14, + + /// Tried to directly traverse a single gas while single gas traversable graphs are not enabled + /// (see OptixTraversableGraphFlags::OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_GAS). + /// Exception details: + /// optixGetTransformListSize() + /// optixGetTransformListHandle() + /// optixGetExceptionInvalidTraversable() + OPTIX_EXCEPTION_CODE_UNSUPPORTED_SINGLE_LEVEL_GAS = -15, + + /// argument passed to an optix call is + /// not within an acceptable range of values. + OPTIX_EXCEPTION_CODE_INVALID_VALUE_ARGUMENT_0 = -16, + OPTIX_EXCEPTION_CODE_INVALID_VALUE_ARGUMENT_1 = -17, + OPTIX_EXCEPTION_CODE_INVALID_VALUE_ARGUMENT_2 = -18, + + /// Tried to access data on an AS without random data access support (See OptixBuildFlags). + OPTIX_EXCEPTION_CODE_UNSUPPORTED_DATA_ACCESS = -32, + + /// The program payload type doesn't match the trace payload type. + OPTIX_EXCEPTION_CODE_PAYLOAD_TYPE_MISMATCH = -33, +} OptixExceptionCodes; + +/// Exception flags. +/// +/// \see #OptixPipelineCompileOptions::exceptionFlags, #OptixExceptionCodes +typedef enum OptixExceptionFlags +{ + /// No exception are enabled. + OPTIX_EXCEPTION_FLAG_NONE = 0, + + /// Enables exceptions check related to the continuation stack. + OPTIX_EXCEPTION_FLAG_STACK_OVERFLOW = 1u << 0, + + /// Enables exceptions check related to trace depth. + OPTIX_EXCEPTION_FLAG_TRACE_DEPTH = 1u << 1, + + /// Enables user exceptions via optixThrowException(). This flag must be specified for all modules in a pipeline + /// if any module calls optixThrowException(). + OPTIX_EXCEPTION_FLAG_USER = 1u << 2, + + /// Enables various exceptions check related to traversal. + OPTIX_EXCEPTION_FLAG_DEBUG = 1u << 3 +} OptixExceptionFlags; + +/// Compilation options for all modules of a pipeline. +/// +/// Similar to #OptixModuleCompileOptions, but these options here need to be equal for all modules of a pipeline. +/// +/// \see #optixModuleCreate(), #optixPipelineCreate() +typedef struct OptixPipelineCompileOptions +{ + /// Boolean value indicating whether motion blur could be used + int usesMotionBlur; + + /// Traversable graph bitfield. See OptixTraversableGraphFlags + unsigned int traversableGraphFlags; + + /// How much storage, in 32b words, to make available for the payload, [0..32] + /// Must be zero if numPayloadTypes is not zero. + int numPayloadValues; + + /// How much storage, in 32b words, to make available for the attributes. The + /// minimum number is 2. Values below that will automatically be changed to 2. [2..8] + int numAttributeValues; + + /// A bitmask of OptixExceptionFlags indicating which exceptions are enabled. + unsigned int exceptionFlags; + + /// The name of the pipeline parameter variable. If 0, no pipeline parameter + /// will be available. This will be ignored if the launch param variable was + /// optimized out or was not found in the modules linked to the pipeline. + const char* pipelineLaunchParamsVariableName; + + /// Bit field enabling primitive types. See OptixPrimitiveTypeFlags. + /// Setting to zero corresponds to enabling OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM and OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE. + unsigned int usesPrimitiveTypeFlags; + + /// Boolean value indicating whether opacity micromaps could be used + int allowOpacityMicromaps; +} OptixPipelineCompileOptions; + +/// Link options for a pipeline +/// +/// \see #optixPipelineCreate() +typedef struct OptixPipelineLinkOptions +{ + /// Maximum trace recursion depth. 0 means a ray generation program can be + /// launched, but can't trace any rays. The maximum allowed value is 31. + unsigned int maxTraceDepth; + +} OptixPipelineLinkOptions; + +/// Describes the shader binding table (SBT) +/// +/// \see #optixLaunch() +typedef struct OptixShaderBindingTable +{ + /// Device address of the SBT record of the ray gen program to start launch at. The address must be a multiple of + /// OPTIX_SBT_RECORD_ALIGNMENT. + CUdeviceptr raygenRecord; + + /// Device address of the SBT record of the exception program. The address must be a multiple of + /// OPTIX_SBT_RECORD_ALIGNMENT. + CUdeviceptr exceptionRecord; + + /// Arrays of SBT records for miss programs. The base address and the stride must be a multiple of + /// OPTIX_SBT_RECORD_ALIGNMENT. + /// @{ + CUdeviceptr missRecordBase; + unsigned int missRecordStrideInBytes; + unsigned int missRecordCount; + /// @} + + /// Arrays of SBT records for hit groups. The base address and the stride must be a multiple of + /// OPTIX_SBT_RECORD_ALIGNMENT. + /// @{ + CUdeviceptr hitgroupRecordBase; + unsigned int hitgroupRecordStrideInBytes; + unsigned int hitgroupRecordCount; + /// @} + + /// Arrays of SBT records for callable programs. If the base address is not null, the stride and count must not be + /// zero. If the base address is null, then the count needs to zero. The base address and the stride must be a + /// multiple of OPTIX_SBT_RECORD_ALIGNMENT. + /// @{ + CUdeviceptr callablesRecordBase; + unsigned int callablesRecordStrideInBytes; + unsigned int callablesRecordCount; + /// @} + +} OptixShaderBindingTable; + +/// Describes the stack size requirements of a program group. +/// +/// \see optixProgramGroupGetStackSize() +typedef struct OptixStackSizes +{ + /// Continuation stack size of RG programs in bytes + unsigned int cssRG; + /// Continuation stack size of MS programs in bytes + unsigned int cssMS; + /// Continuation stack size of CH programs in bytes + unsigned int cssCH; + /// Continuation stack size of AH programs in bytes + unsigned int cssAH; + /// Continuation stack size of IS programs in bytes + unsigned int cssIS; + /// Continuation stack size of CC programs in bytes + unsigned int cssCC; + /// Direct stack size of DC programs in bytes + unsigned int dssDC; + +} OptixStackSizes; + +/// Options that can be passed to \c optixQueryFunctionTable() +typedef enum OptixQueryFunctionTableOptions +{ + /// Placeholder (there are no options yet) + OPTIX_QUERY_FUNCTION_TABLE_OPTION_DUMMY = 0 + +} OptixQueryFunctionTableOptions; + +/// Type of the function \c optixQueryFunctionTable() +typedef OptixResult( OptixQueryFunctionTable_t )( int abiId, + unsigned int numOptions, + OptixQueryFunctionTableOptions* /*optionKeys*/, + const void** /*optionValues*/, + void* functionTable, + size_t sizeOfTable ); + +/// Specifies the options for retrieving an intersection program for a built-in primitive type. +/// The primitive type must not be OPTIX_PRIMITIVE_TYPE_CUSTOM. +/// +/// \see #optixBuiltinISModuleGet() +typedef struct OptixBuiltinISOptions +{ + OptixPrimitiveType builtinISModuleType; + /// Boolean value indicating whether vertex motion blur is used (but not motion transform blur). + int usesMotionBlur; + /// Build flags, see OptixBuildFlags. + unsigned int buildFlags; + /// End cap properties of curves, see OptixCurveEndcapFlags, 0 for non-curve types. + unsigned int curveEndcapFlags; +} OptixBuiltinISOptions; + +#if defined( __CUDACC__ ) +/// Describes the ray that was passed into \c optixTrace() which caused an exception with +/// exception code OPTIX_EXCEPTION_CODE_INVALID_RAY. +/// +/// \see #optixGetExceptionInvalidRay() +typedef struct OptixInvalidRayExceptionDetails +{ + float3 origin; + float3 direction; + float tmin; + float tmax; + float time; +} OptixInvalidRayExceptionDetails; + +/// Describes the details of a call to a callable program which caused an exception with +/// exception code OPTIX_EXCEPTION_CODE_CALLABLE_PARAMETER_MISMATCH, +/// Note that OptiX packs the parameters into individual 32 bit values, so the number of +/// expected and passed values may not correspond to the number of arguments passed into +/// optixDirectCall or optixContinuationCall, or the number parameters in the definition +/// of the function that is called. +typedef struct OptixParameterMismatchExceptionDetails +{ + /// Number of 32 bit values expected by the callable program + unsigned int expectedParameterCount; + /// Number of 32 bit values that were passed to the callable program + unsigned int passedArgumentCount; + /// The offset of the SBT entry of the callable program relative to OptixShaderBindingTable::callablesRecordBase + unsigned int sbtIndex; + /// Pointer to a string that holds the name of the callable program that was called + char* callableName; +} OptixParameterMismatchExceptionDetails; #endif -// clang-format on -#endif // #ifndef __optix_optix_types_h__ + +/**@}*/ // end group optix_types + +#endif // OPTIX_OPTIX_TYPES_H From 9ac78314a30cbd067fc2d4afd028be5a1f6d6dba Mon Sep 17 00:00:00 2001 From: iaomw Date: Fri, 8 Dec 2023 20:00:16 +0800 Subject: [PATCH 2/4] Updates for 7.7 --- zenovis/xinxinoptix/CMakeLists.txt | 10 +- zenovis/xinxinoptix/OptiXStuff.h | 31 ++--- zenovis/xinxinoptix/SDK/sutil/CMakeLists.txt | 123 ------------------- zenovis/xinxinoptix/SDK/sutil/sutil.cpp | 20 +-- zenovis/xinxinoptix/optixPathTracer.cpp | 2 +- zenovis/xinxinoptix/raiicuda.h | 1 - zenovis/xinxinoptix/volume/optixVolume.cpp | 3 - zenovis/xinxinoptix/volume/optixVolume.h | 2 + 8 files changed, 34 insertions(+), 158 deletions(-) delete mode 100644 zenovis/xinxinoptix/SDK/sutil/CMakeLists.txt diff --git a/zenovis/xinxinoptix/CMakeLists.txt b/zenovis/xinxinoptix/CMakeLists.txt index 00e2fbd354..d0fa96bac4 100644 --- a/zenovis/xinxinoptix/CMakeLists.txt +++ b/zenovis/xinxinoptix/CMakeLists.txt @@ -81,12 +81,12 @@ set(CUDA_PATH ${CUDAToolkit_BIN_DIR}/..) set(FILE_LIST ${OPTIX_PATH}/include/@optix.h + ${OPTIX_PATH}/include/@optix_types.h + ${OPTIX_PATH}/include/@optix_host.h ${OPTIX_PATH}/include/@optix_device.h - ${OPTIX_PATH}/include/@optix_7_device.h - ${OPTIX_PATH}/include/@optix_7_types.h - ${OPTIX_PATH}/include/@internal/optix_7_device_impl.h - ${OPTIX_PATH}/include/@internal/optix_7_device_impl_exception.h - ${OPTIX_PATH}/include/@internal/optix_7_device_impl_transformations.h + ${OPTIX_PATH}/include/@internal/optix_device_impl.h + ${OPTIX_PATH}/include/@internal/optix_device_impl_exception.h + ${OPTIX_PATH}/include/@internal/optix_device_impl_transformations.h ${CUDA_PATH}/include/@cuda_fp16.h ${CUDA_PATH}/include/@cuda_fp16.hpp diff --git a/zenovis/xinxinoptix/OptiXStuff.h b/zenovis/xinxinoptix/OptiXStuff.h index 03178e43ab..c6b4652588 100644 --- a/zenovis/xinxinoptix/OptiXStuff.h +++ b/zenovis/xinxinoptix/OptiXStuff.h @@ -8,7 +8,6 @@ #include #include -#include #include #include @@ -24,6 +23,7 @@ #include #include #include "optixVolume.h" +#include "optix_types.h" #include "raiicuda.h" #include "zeno/types/TextureObject.h" #include "zeno/utils/log.h" @@ -191,7 +191,7 @@ inline bool createModule(OptixModule &m, OptixDeviceContext &context, const char "-lineinfo", //"-G"//"--dopt=on", #endif //"--gpu-architecture=compute_60", - //"--relocatable-device-code=true" + "--relocatable-device-code=true" //"--extensible-whole-program" }; @@ -205,13 +205,13 @@ inline bool createModule(OptixModule &m, OptixDeviceContext &context, const char if (_c_group == nullptr) { OPTIX_CHECK( - optixModuleCreateFromPTX(context, &module_compile_options, &pipeline_compile_options, input, inputSize, log, &sizeof_log, &m) + optixModuleCreate( context, &module_compile_options, &pipeline_compile_options, input, inputSize, log, &sizeof_log, &m ) ); } else { OptixTask firstTask; OPTIX_CHECK( - optixModuleCreateFromPTXWithTasks( + optixModuleCreateWithTasks( context, &module_compile_options, &pipeline_compile_options, @@ -890,11 +890,12 @@ inline void createPipeline() { OptixPipelineLinkOptions pipeline_link_options = {}; pipeline_link_options.maxTraceDepth = 2; -#if defined( NDEBUG ) - pipeline_link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_MINIMAL; -#else - pipeline_link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL; -#endif + +// #if defined( NDEBUG ) +// pipeline_link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_MINIMAL; +// #else +// pipeline_link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL; +// #endif size_t num_progs = 3 + rtMaterialShaders.size() * 2; OptixProgramGroup* program_groups = new OptixProgramGroup[num_progs]; @@ -927,13 +928,13 @@ inline void createPipeline() isPipelineCreated = true; OptixStackSizes stack_sizes = {}; - OPTIX_CHECK( optixUtilAccumulateStackSizes( raygen_prog_group, &stack_sizes ) ); - OPTIX_CHECK( optixUtilAccumulateStackSizes( radiance_miss_group, &stack_sizes ) ); - OPTIX_CHECK( optixUtilAccumulateStackSizes( occlusion_miss_group, &stack_sizes ) ); + OPTIX_CHECK( optixUtilAccumulateStackSizes( raygen_prog_group, &stack_sizes, pipeline ) ); + OPTIX_CHECK( optixUtilAccumulateStackSizes( radiance_miss_group, &stack_sizes, pipeline ) ); + OPTIX_CHECK( optixUtilAccumulateStackSizes( occlusion_miss_group, &stack_sizes, pipeline ) ); for(int i=0;i #include -#include #include +#include #include diff --git a/zenovis/xinxinoptix/raiicuda.h b/zenovis/xinxinoptix/raiicuda.h index 60b6ef9235..69a7f26c47 100644 --- a/zenovis/xinxinoptix/raiicuda.h +++ b/zenovis/xinxinoptix/raiicuda.h @@ -4,7 +4,6 @@ #include #include -#include #include #include diff --git a/zenovis/xinxinoptix/volume/optixVolume.cpp b/zenovis/xinxinoptix/volume/optixVolume.cpp index 21151934df..f1f2703395 100644 --- a/zenovis/xinxinoptix/volume/optixVolume.cpp +++ b/zenovis/xinxinoptix/volume/optixVolume.cpp @@ -6,9 +6,6 @@ #include #include -#include -#include - // ---------------------------------------------------------------------------- // Functions for manipulating Volume instances // ---------------------------------------------------------------------------- diff --git a/zenovis/xinxinoptix/volume/optixVolume.h b/zenovis/xinxinoptix/volume/optixVolume.h index 6957f12e10..f438bc6a44 100644 --- a/zenovis/xinxinoptix/volume/optixVolume.h +++ b/zenovis/xinxinoptix/volume/optixVolume.h @@ -1,5 +1,7 @@ #pragma once + #include +#include #include #include From 037fa289f496226d9a9dd6c74486a220a48662fd Mon Sep 17 00:00:00 2001 From: iaomw Date: Mon, 25 Dec 2023 20:13:09 +0800 Subject: [PATCH 3/4] Optix callable --- zeno/include/zeno/types/MaterialObject.h | 18 ++ zeno/src/nodes/mtl/ShaderFinalize.cpp | 35 ++- zenovis/src/optx/RenderEngineOptx.cpp | 106 +++---- zenovis/xinxinoptix/CMakeLists.txt | 2 + zenovis/xinxinoptix/CallableDefault.cu | 189 ++++++++++++ zenovis/xinxinoptix/CallableVolume.cu | 304 +++++++++++++++++++ zenovis/xinxinoptix/DeflMatShader.cu | 155 +--------- zenovis/xinxinoptix/DisneyBSDF.h | 17 +- zenovis/xinxinoptix/IOMat.h | 3 +- zenovis/xinxinoptix/Light.cu | 9 - zenovis/xinxinoptix/OptiXStuff.h | 230 ++++++++------ zenovis/xinxinoptix/SDK/sutil/sutil.cpp | 16 +- zenovis/xinxinoptix/SDK/sutil/sutil.h | 9 +- zenovis/xinxinoptix/optixPathTracer.cpp | 195 ++++++++---- zenovis/xinxinoptix/optixPathTracer.h | 3 + zenovis/xinxinoptix/volume.cu | 369 +---------------------- zenovis/xinxinoptix/volume.h | 19 ++ zenovis/xinxinoptix/xinxinoptixapi.h | 8 +- 18 files changed, 929 insertions(+), 758 deletions(-) create mode 100644 zenovis/xinxinoptix/CallableDefault.cu create mode 100644 zenovis/xinxinoptix/CallableVolume.cu diff --git a/zeno/include/zeno/types/MaterialObject.h b/zeno/include/zeno/types/MaterialObject.h index 6b4726cc2d..4bd300fa71 100644 --- a/zeno/include/zeno/types/MaterialObject.h +++ b/zeno/include/zeno/types/MaterialObject.h @@ -20,6 +20,8 @@ namespace zeno std::vector> tex2Ds; std::vector> tex3Ds; std::string transform; + + std::string parameters; // json std::string mtlidkey; // unused for now size_t serializeSize() const @@ -30,6 +32,10 @@ namespace zeno size += sizeof(mtlidkeyLen); size += mtlidkeyLen; + auto paramLen {parameters.size()}; + size += sizeof(paramLen); + size += paramLen; + auto vertLen{vert.size()}; size += sizeof(vertLen); size += vertLen; @@ -83,6 +89,12 @@ namespace zeno mtlidkey.copy(str + i, mtlidkeyLen); i += mtlidkeyLen; + auto paramLen{parameters.size()}; + memcpy(str+i, ¶mLen, sizeof(paramLen)); + i += sizeof(paramLen); + parameters.copy(str+i, paramLen); + i += paramLen; + auto vertLen{vert.size()}; memcpy(str + i, &vertLen, sizeof(vertLen)); i += sizeof(vertLen); @@ -167,6 +179,12 @@ namespace zeno this->mtlidkey = std::string{str + i, mtlidkeyLen}; i += mtlidkeyLen; + size_t paramLen; + memcpy(¶mLen, str+i, sizeof(paramLen)); + i += sizeof(paramLen); + this->parameters = std::string(str+i, paramLen); + i += paramLen; + size_t vertLen; memcpy(&vertLen, str + i, sizeof(vertLen)); i += sizeof(vertLen); diff --git a/zeno/src/nodes/mtl/ShaderFinalize.cpp b/zeno/src/nodes/mtl/ShaderFinalize.cpp index 3157ebed11..e718d830a4 100644 --- a/zeno/src/nodes/mtl/ShaderFinalize.cpp +++ b/zeno/src/nodes/mtl/ShaderFinalize.cpp @@ -12,6 +12,12 @@ #include #include #include "magic_enum.hpp" +#include "zeno/utils/vec.h" + +#include +#include +#include +#include namespace zeno { @@ -141,9 +147,9 @@ struct ShaderFinalize : INode { auto sssRadiusMethod = get_input2("sssRadius"); if (sssRadiusMethod == "Fixed") { - commonCode += "#define _SSS_FIXED_RADIUS_ 1 \n"; + code += "bool sssFxiedRadius = true;\n"; } else { - commonCode += "#define _SSS_FIXED_RADIUS_ 0 \n"; + code += "bool sssFxiedRadius = false;\n"; } auto mtl = std::make_shared(); @@ -180,9 +186,28 @@ struct ShaderFinalize : INode { commonCode += "#define VolumeEmissionScaler VolumeEmissionScalerType::" + VolumeEmissionScaler + "\n"; vol_depth = clamp(vol_depth, 9, 99); - - commonCode += "static const int _vol_depth = " + std::to_string(vol_depth) + ";\n"; - commonCode += "static const float _vol_extinction = " + std::to_string(vol_extinction) + ";\n"; + vol_extinction = clamp(vol_extinction, 1e-5, 1e+5); + + std::string parameters = ""; + { + using namespace rapidjson; + Document d; d.SetObject(); + auto& allocator = d.GetAllocator(); + + Value s = Value(); + s.SetInt(vol_depth); + d.AddMember("vol_depth", s, allocator); + + s = Value(); + s.SetFloat(vol_extinction); + d.AddMember("vol_extinction", s, allocator); + + StringBuffer buffer; + Writer writer(buffer); + d.Accept(writer); + parameters = buffer.GetString(); + } + mtl->parameters = parameters; auto tex3dList = get_input("tex3dList")->getRaw(); //get(); diff --git a/zenovis/src/optx/RenderEngineOptx.cpp b/zenovis/src/optx/RenderEngineOptx.cpp index f15530d18f..23e8e9c39d 100644 --- a/zenovis/src/optx/RenderEngineOptx.cpp +++ b/zenovis/src/optx/RenderEngineOptx.cpp @@ -70,10 +70,12 @@ struct GraphicsManager { std::vector> tex3Ds; std::string common; std::string shader; - std::string mtlidkey; std::string extensions; + std::string mtlidkey; std::string transform; + std::string parameters; }; + struct DetPrimitive { std::shared_ptr primSp; }; @@ -469,7 +471,7 @@ struct GraphicsManager { } else if (auto mtl = dynamic_cast(obj)) { - det = DetMaterial{mtl->tex2Ds, mtl->tex3Ds, mtl->common, mtl->frag, mtl->mtlidkey, mtl->extensions, mtl->transform}; + det = DetMaterial{mtl->tex2Ds, mtl->tex3Ds, mtl->common, mtl->frag, mtl->extensions, mtl->mtlidkey, mtl->transform, mtl->parameters}; } } @@ -874,17 +876,15 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { std::pair shadtpl2; }; - ShaderTemplateInfo _fallback_shader_template { - "DefaultFallback.cu", false, {}, {}, {} + ShaderTemplateInfo _default_callable_template { + "CallableDefault.cu", false, {}, {}, {} + }; + ShaderTemplateInfo _volume_callable_template { + "CallableVolume.cu", false, {}, {}, {} }; - void ensure_fallback() { - _fallback_shader_template.shadtmpl = sutil::lookupIncFile(_fallback_shader_template.name.c_str()); - } - ShaderTemplateInfo _default_shader_template { "DeflMatShader.cu", false, {}, {}, {} }; - ShaderTemplateInfo _volume_shader_template { "volume.cu", false, {}, {}, {} }; @@ -928,6 +928,7 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { _template.commontpl = tplsv.substr(0, pcommon); } else{ + return; throw std::runtime_error("cannot find stub COMMON_CODE in shader template"); } std::string_view tmplstub0 = "//GENERATED_BEGIN_MARK"; @@ -1006,30 +1007,29 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { if (meshNeedUpdate || matNeedUpdate || staticNeedUpdate) { //zeno::log_debug("[zeno-optix] updating scene"); //zeno::log_debug("[zeno-optix] updating material"); - std::vector> _mesh_shader_list{}; + std::vector> _meshes_shader_list{}; std::vector> _sphere_shader_list{}; std::vector> _volume_shader_list{}; std::map meshMatLUT{}; std::map matIDtoShaderIndex{}; + ensure_shadtmpl(_default_callable_template); + ensure_shadtmpl(_volume_callable_template); + ensure_shadtmpl(_default_shader_template); ensure_shadtmpl(_volume_shader_template); ensure_shadtmpl(_light_shader_template); - ensure_fallback(); - - auto _default_shader_fallback = std::make_shared(_fallback_shader_template.shadtmpl); - auto _volume_shader_fallback = std::make_shared(_volume_shader_template.shadtmpl); { auto tmp = std::make_shared(); tmp->mark = ShaderMaker::Mesh; tmp->matid = "Default"; - tmp->source = _default_shader_template.shadtmpl; - tmp->fallback = _default_shader_fallback; + tmp->filename = _default_shader_template.name; + tmp->callable = _default_callable_template.shadtmpl; - _mesh_shader_list.push_back(tmp); + _meshes_shader_list.push_back(tmp); } { @@ -1037,8 +1037,8 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { tmp->mark = ShaderMaker::Sphere; tmp->matid = "Default"; - tmp->source = _default_shader_template.shadtmpl; - tmp->fallback = _default_shader_fallback; + tmp->filename = _default_shader_template.name; + tmp->callable = _default_callable_template.shadtmpl; _sphere_shader_list.push_back(tmp); } @@ -1111,39 +1111,26 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { } } - const auto& selected_template = has_vdb? _volume_shader_template : _default_shader_template; - const auto& selected_fallback = has_vdb? _volume_shader_fallback : _default_shader_fallback; + const auto& selected_source = has_vdb? _volume_shader_template : _default_shader_template; + const auto& selected_callable = has_vdb? _volume_callable_template : _default_callable_template; - std::string shader; + std::string callable; auto common_code = mtldet->common; - std::string tar = "uniform sampler2D"; - size_t index = 0; - while (true) { - /* Locate the substring to replace. */ - index = common_code.find(tar, index); - if (index == std::string::npos) break; - - /* Make the replacement. */ - common_code.replace(index, tar.length(), "//////////"); - - /* Advance index forward so the next iteration doesn't pick it up as well. */ - index += tar.length(); - } - auto& commontpl = selected_template.commontpl; - auto& shadtpl2 = selected_template.shadtpl2; + auto& commontpl = selected_callable.commontpl; + auto& shadtpl2 = selected_callable.shadtpl2; - shader.reserve(commontpl.size() + callable.reserve(commontpl.size() + common_code.size() + shadtpl2.first.size() + mtldet->shader.size() + shadtpl2.second.size()); - shader.append(commontpl); - shader.append(common_code); - shader.append(shadtpl2.first); - shader.append(mtldet->shader); - shader.append(shadtpl2.second); - //std::cout<shader); + callable.append(shadtpl2.second); + //std::cout< shaderTex; int texid=0; @@ -1155,10 +1142,13 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { } ShaderPrepared shaderP; + + shaderP.callable = callable; + shaderP.filename = selected_source.name; + shaderP.parameters = mtldet->parameters; + shaderP.matid = mtldet->mtlidkey; - shaderP.source = shader; shaderP.tex_names = shaderTex; - shaderP.fallback = selected_fallback; if (has_vdb) { @@ -1167,10 +1157,10 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { } else { if (cachedMeshesMaterials.count(mtldet->mtlidkey) > 0) { - meshMatLUT.insert({mtldet->mtlidkey, (int)_mesh_shader_list.size()}); + meshMatLUT.insert({mtldet->mtlidkey, (int)_meshes_shader_list.size()}); shaderP.mark = ShaderMaker::Mesh; - _mesh_shader_list.push_back(std::make_shared(shaderP)); + _meshes_shader_list.push_back(std::make_shared(shaderP)); } if (cachedSphereMaterials.count(mtldet->mtlidkey) > 0) { @@ -1184,34 +1174,34 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { { auto tmp = std::make_shared(); + tmp->filename = _light_shader_template.name; + tmp->callable = _default_callable_template.shadtmpl; tmp->mark = ShaderMaker::Mesh; tmp->matid = "Light"; - tmp->source = _light_shader_template.shadtmpl; - tmp->fallback = _default_shader_fallback; - _mesh_shader_list.push_back(tmp); + _meshes_shader_list.push_back(tmp); } { auto tmp = std::make_shared(); + tmp->filename = _light_shader_template.name; + tmp->callable = _default_callable_template.shadtmpl; tmp->mark = ShaderMaker::Sphere; tmp->matid = "Light"; - tmp->source = _light_shader_template.shadtmpl; - tmp->fallback = _default_shader_fallback; - + _sphere_shader_list.push_back(tmp); } std::vector> allShaders{}; - allShaders.reserve(_mesh_shader_list.size()+_sphere_shader_list.size()+_volume_shader_list.size()); + allShaders.reserve(_meshes_shader_list.size()+_sphere_shader_list.size()+_volume_shader_list.size()); - allShaders.insert(allShaders.end(), _mesh_shader_list.begin(), _mesh_shader_list.end()); + allShaders.insert(allShaders.end(), _meshes_shader_list.begin(), _meshes_shader_list.end()); allShaders.insert(allShaders.end(), _sphere_shader_list.begin(), _sphere_shader_list.end()); allShaders.insert(allShaders.end(), _volume_shader_list.begin(), _volume_shader_list.end()); - const size_t sphere_shader_offset = _mesh_shader_list.size(); - const size_t volume_shader_offset = _mesh_shader_list.size() + _sphere_shader_list.size(); + const size_t sphere_shader_offset = _meshes_shader_list.size(); + const size_t volume_shader_offset = _meshes_shader_list.size() + _sphere_shader_list.size(); for (uint i=0; i +#include +#include +#include + +#include "TraceStuff.h" +#include "zxxglslvec.h" +#include "IOMat.h" + +//COMMON_CODE + +extern "C" __device__ MatOutput __direct_callable__evalmat(cudaTextureObject_t zenotex[], float4* uniforms, const MatInput& attrs) { + + /* MODMA */ + auto att_pos = attrs.pos; + auto att_clr = attrs.clr; + auto att_uv = attrs.uv; + auto att_nrm = attrs.nrm; + auto att_tang = attrs.tang; + auto att_instPos = attrs.instPos; + auto att_instNrm = attrs.instNrm; + auto att_instUv = attrs.instUv; + auto att_instClr = attrs.instClr; + auto att_instTang = attrs.instTang; + auto att_NoL = attrs.NoL; + auto att_LoV = attrs.LoV; + auto att_N = attrs.N; + auto att_T = attrs.T; + auto att_L = attrs.L; + auto att_V = attrs.V; + auto att_H = attrs.H; + auto att_reflectance = attrs.reflectance; + auto att_fresnel = attrs.fresnel; + +#ifndef _FALLBACK_ + + /** generated code here beg **/ + //GENERATED_BEGIN_MARK + /* MODME */ + float mat_base = 1.0f; + vec3 mat_basecolor = vec3(1.0f, 1.0f, 1.0f); + float mat_roughness = 0.5f; + float mat_metallic = 0.0f; + vec3 mat_metalColor = vec3(1.0f,1.0f,1.0f); + float mat_specular = 0.0f; + float mat_specularTint = 0.0f; + float mat_anisotropic = 0.0f; + float mat_anisoRotation = 0.0f; + + float mat_subsurface = 0.0f; + vec3 mat_sssParam = vec3(0.0f,0.0f,0.0f); + vec3 mat_sssColor = vec3(0.0f,0.0f,0.0f); + float mat_scatterDistance = 0.0f; + float mat_scatterStep = 0.0f; + + float mat_sheen = 0.0f; + float mat_sheenTint = 0.0f; + + float mat_clearcoat = 0.0f; + vec3 mat_clearcoatColor = vec3(1.0f,1.0f,1.0f); + float mat_clearcoatRoughness = 0.0f; + float mat_clearcoatIOR = 1.5f; + float mat_opacity = 0.0f; + + float mat_specTrans = 0.0f; + vec3 mat_transColor = vec3(1.0f,1.0f,1.0f); + vec3 mat_transTint = vec3(1.0f,1.0f,1.0f); + float mat_transTintDepth = 0.0f; + float mat_transDistance = 0.0f; + vec3 mat_transScatterColor = vec3(1.0f,1.0f,1.0f); + float mat_ior = 1.0f; + + float mat_flatness = 0.0f; + float mat_thin = 0.0f; + float mat_doubleSide= 0.0f; + float mat_smoothness = 0.0f; + vec3 mat_normal = vec3(0.0f, 0.0f, 1.0f); + float mat_emissionIntensity = float(0); + vec3 mat_emission = vec3(1.0f, 1.0f, 1.0f); + float mat_displacement = 0.0f; + float mat_shadowReceiver = 0.0f; + float mat_NoL = 1.0f; + float mat_LoV = 1.0f; + vec3 mat_reflectance = att_reflectance; + + bool sssFxiedRadius = false; + + //GENERATED_END_MARK + /** generated code here end **/ + +#else + + float mat_base = 1.0f; + vec3 mat_basecolor = vec3(1.0f, 1.0f, 1.0f); + float mat_roughness = 0.5f; + float mat_metallic = 0.0f; + vec3 mat_metalColor = vec3(1.0f,1.0f,1.0f); + float mat_specular = 0.0f; + float mat_specularTint = 0.0f; + float mat_anisotropic = 0.0f; + float mat_anisoRotation = 0.0f; + + float mat_subsurface = 0.0f; + vec3 mat_sssParam = vec3(0.0f,0.0f,0.0f); + vec3 mat_sssColor = vec3(0.0f,0.0f,0.0f); + float mat_scatterDistance = 0.0f; + float mat_scatterStep = 0.0f; + + float mat_sheen = 0.0f; + float mat_sheenTint = 0.0f; + + float mat_clearcoat = 0.0f; + vec3 mat_clearcoatColor = vec3(1.0f,1.0f,1.0f); + float mat_clearcoatRoughness = 0.0f; + float mat_clearcoatIOR = 1.5f; + float mat_opacity = 0.0f; + + float mat_specTrans = 0.0f; + vec3 mat_transColor = vec3(1.0f,1.0f,1.0f); + vec3 mat_transTint = vec3(1.0f,1.0f,1.0f); + float mat_transTintDepth = 0.0f; + float mat_transDistance = 0.0f; + vec3 mat_transScatterColor = vec3(1.0f,1.0f,1.0f); + float mat_ior = 1.0f; + + float mat_flatness = 0.0f; + float mat_thin = 0.0f; + float mat_doubleSide= 0.0f; + float mat_smoothness = 0.0f; + vec3 mat_normal = vec3(0.0f, 0.0f, 1.0f); + float mat_emissionIntensity = float(0); + vec3 mat_emission = vec3(1.0f, 1.0f, 1.0f); + float mat_displacement = 0.0f; + float mat_shadowReceiver = 0.0f; + float mat_NoL = 1.0f; + float mat_LoV = 1.0f; + vec3 mat_reflectance = att_reflectance; + + bool sssFxiedRadius = false; + +#endif // _FALLBACK_ + + MatOutput mats; + /* MODME */ + mats.basecolor = mat_base * mat_basecolor; + mats.roughness = clamp(mat_roughness, 0.01, 0.99); + mats.metallic = clamp(mat_metallic, 0.0f, 1.0f); + mats.metalColor = mat_metalColor; + mats.specular = mat_specular; + mats.specularTint = mat_specularTint; + mats.anisotropic = clamp(mat_anisotropic, 0.0f, 1.0f); + mats.anisoRotation = clamp(mat_anisoRotation, 0.0f, 1.0f); + + mats.subsurface = mat_subsurface; + mats.sssColor = mat_sssColor; + mats.sssParam = mat_sssParam; + mats.scatterDistance = max(0.0f,mat_scatterDistance); + mats.scatterStep = clamp(mat_scatterStep,0.0f,1.0f); + + mats.sheen = mat_sheen; + mats.sheenTint = mat_sheenTint; + + mats.clearcoat = clamp(mat_clearcoat, 0.0f, 1.0f); + mats.clearcoatColor = mat_clearcoatColor; + mats.clearcoatRoughness = clamp(mat_clearcoatRoughness, 0.01, 0.99); + mats.clearcoatIOR = mat_clearcoatIOR; + + mats.specTrans = clamp(mat_specTrans, 0.0f, 1.0f); + mats.transColor = mat_transColor; + mats.transTint = mat_transTint; + mats.transTintDepth = max(0.0f,mat_transTintDepth); + mats.transDistance = max(mat_transDistance,0.1f); + mats.transScatterColor = mat_transScatterColor; + mats.ior = max(0.0f,mat_ior); + + mats.opacity = mat_opacity; + mats.nrm = mat_normal; + mats.emission = mat_emissionIntensity * mat_emission; + + mats.flatness = mat_flatness; + mats.thin = mat_thin; + mats.doubleSide = mat_doubleSide; + mats.shadowReceiver = mat_shadowReceiver; + + mats.sssFxiedRadius = sssFxiedRadius; + mats.smoothness = mat_smoothness; + + return mats; +} \ No newline at end of file diff --git a/zenovis/xinxinoptix/CallableVolume.cu b/zenovis/xinxinoptix/CallableVolume.cu new file mode 100644 index 0000000000..4c300af7d6 --- /dev/null +++ b/zenovis/xinxinoptix/CallableVolume.cu @@ -0,0 +1,304 @@ +#include "volume.h" + +#include "TraceStuff.h" +#include "zxxglslvec.h" +#include "math_constants.h" + +// #include +// #include "nvfunctional" +#include +#include +#include +#include + +enum struct VolumeEmissionScalerType { + Raw, Density, Absorption +}; + +//PLACEHOLDER +using DataTypeNVDB0 = nanovdb::Fp32; +using GridTypeNVDB0 = nanovdb::NanoGrid; +#define VolumeEmissionScaler VolumeEmissionScalerType::Raw +//PLACEHOLDER + +#define _USING_NANOVDB_ true + +//COMMON_CODE + +/* w0, w1, w2, and w3 are the four cubic B-spline basis functions. */ +inline __device__ float cubic_w0(float a) +{ + return (1.0f / 6.0f) * (a * (a * (-a + 3.0f) - 3.0f) + 1.0f); +} +inline __device__ float cubic_w1(float a) +{ + return (1.0f / 6.0f) * (a * a * (3.0f * a - 6.0f) + 4.0f); +} +inline __device__ float cubic_w2(float a) +{ + return (1.0f / 6.0f) * (a * (a * (-3.0f * a + 3.0f) + 3.0f) + 1.0f); +} +inline __device__ float cubic_w3(float a) +{ + return (1.0f / 6.0f) * (a * a * a); +} + +/* g0 and g1 are the two amplitude functions. */ +inline __device__ float cubic_g0(float a) +{ + return cubic_w0(a) + cubic_w1(a); +} +inline __device__ float cubic_g1(float a) +{ + return cubic_w2(a) + cubic_w3(a); +} + +/* h0 and h1 are the two offset functions */ +inline __device__ float cubic_h0(float a) +{ + return (cubic_w1(a) / cubic_g0(a)) - 1.0f; +} +inline __device__ float cubic_h1(float a) +{ + return (cubic_w3(a) / cubic_g1(a)) + 1.0f; +} + +template +inline __device__ float interp_tricubic_nanovdb(S &s, float x, float y, float z) +{ + float px = floorf(x); + float py = floorf(y); + float pz = floorf(z); + float fx = x - px; + float fy = y - py; + float fz = z - pz; + + float g0x = cubic_g0(fx); + float g1x = cubic_g1(fx); + float g0y = cubic_g0(fy); + float g1y = cubic_g1(fy); + float g0z = cubic_g0(fz); + float g1z = cubic_g1(fz); + + float x0 = px + cubic_h0(fx); + float x1 = px + cubic_h1(fx); + float y0 = py + cubic_h0(fy); + float y1 = py + cubic_h1(fy); + float z0 = pz + cubic_h0(fz); + float z1 = pz + cubic_h1(fz); + + using namespace nanovdb; + + return g0z * (g0y * (g0x * s(Vec3f(x0, y0, z0)) + g1x * s(Vec3f(x1, y0, z0))) + + g1y * (g0x * s(Vec3f(x0, y1, z0)) + g1x * s(Vec3f(x1, y1, z0)))) + + g1z * (g0y * (g0x * s(Vec3f(x0, y0, z1)) + g1x * s(Vec3f(x1, y0, z1))) + + g1y * (g0x * s(Vec3f(x0, y1, z1)) + g1x * s(Vec3f(x1, y1, z1)))); +} + +inline __device__ float _LERP_(float t, float s1, float s2) +{ + //return (1 - t) * s1 + t * s2; + return fma(t, s2, fma(-t, s1, s1)); +} + +template +inline __device__ float nanoSampling(Acc& acc, nanovdb::Vec3f& point_indexd) { + + using GridTypeNVDB = nanovdb::NanoGrid; + + if constexpr(3 > Order) { + using Sampler = nanovdb::SampleFromVoxels; + return Sampler(acc)(point_indexd); + } + + if constexpr(3 == Order) { + nanovdb::SampleFromVoxels s(acc); + return interp_tricubic_nanovdb(s, point_indexd[0], point_indexd[1], point_indexd[2]); + } + + if constexpr(4 == Order) { + RadiancePRD* prd = getPRD(); + auto uuu = nanovdb::Vec3f(prd->rndf(), prd->rndf(), prd->rndf()); + uuu -= nanovdb::Vec3f(0.5f); + auto pick = nanovdb::RoundDown(point_indexd + uuu); + auto coord = nanovdb::Coord(pick[0], pick[1], pick[2]); + return acc.getValue(coord); + } + + return 0.0f; + + // auto point_floor = nanovdb::RoundDown(point_indexd); + // auto point_a = nanovdb::Coord(point_floor[0], point_floor[1], point_floor[2]); + // auto delta = point_indexd - point_floor; + + // auto value_000 = acc.getValue(point_a); + // auto value_100 = acc.getValue(point_a + nanovdb::Coord(1, 0, 0)); + // auto value_010 = acc.getValue(point_a + nanovdb::Coord(0, 1, 0)); + // auto value_110 = acc.getValue(point_a + nanovdb::Coord(1, 1, 0)); + // auto value_001 = acc.getValue(point_a + nanovdb::Coord(0, 0, 1)); + // auto value_101 = acc.getValue(point_a + nanovdb::Coord(1, 0, 1)); + // auto value_011 = acc.getValue(point_a + nanovdb::Coord(0, 1, 1)); + // auto value_111 = acc.getValue(point_a + nanovdb::Coord(1, 1, 1)); + + // auto value_00 = _LERP_(delta[0], value_000, value_100); + // auto value_10 = _LERP_(delta[0], value_010, value_110); + // auto value_01 = _LERP_(delta[0], value_001, value_101); + // auto value_11 = _LERP_(delta[0], value_011, value_111); + + // auto value_0 = _LERP_(delta[1], value_00, value_10); + // auto value_1 = _LERP_(delta[1], value_01, value_11); + + // return _LERP_(delta[2], value_0, value_1); +} + +template +static __inline__ __device__ vec2 samplingVDB(const unsigned long long grid_ptr, vec3 att_pos) { + using GridTypeNVDB = nanovdb::NanoGrid; + + const auto* _grid = reinterpret_cast(grid_ptr); + const auto& _acc = _grid->tree().getAccessor(); + + auto pos_indexed = reinterpret_cast(att_pos); + + if constexpr(WorldSpace) + { + pos_indexed = _grid->worldToIndexF(pos_indexed); + } //_grid->tree().root().maximum(); + + return vec2 { nanoSampling(_acc, pos_indexed), _grid->tree().root().maximum() }; +} + +struct VolumeIn2 { + float3 pos; + float sigma_t; + uint32_t* seed; + unsigned long long sbt_ptr; + + inline float rndf() { + return rnd(*seed); + } + + vec3 _local_pos_ = vec3(CUDART_NAN_F); + vec3 localPosLazy() { + if (isfinite(_local_pos_.x)) return _local_pos_; + + using GridTypeNVDB = GridTypeNVDB0; + const HitGroupData* sbt_data = reinterpret_cast( sbt_ptr ); + + assert(sbt_data != nullptr); + + const auto grid_ptr = sbt_data->vdb_grids[0]; + const auto* _grid = reinterpret_cast(grid_ptr); + //const auto& _acc = _grid->tree().getAccessor(); + auto pos_indexed = reinterpret_cast(pos); + pos_indexed = _grid->worldToIndexF(pos_indexed); + + _local_pos_ = reinterpret_cast(pos_indexed); + return _local_pos_; + }; + + vec3 _uniform_pos_ = vec3(CUDART_NAN_F); + vec3 uniformPosLazy() { + if (isfinite(_uniform_pos_.x)) return _uniform_pos_; + + using GridTypeNVDB = GridTypeNVDB0; + const HitGroupData* sbt_data = reinterpret_cast( sbt_ptr ); + + assert(sbt_data != nullptr); + + const auto grid_ptr = sbt_data->vdb_grids[0]; + const auto* _grid = reinterpret_cast(grid_ptr); + + auto bbox = _grid->indexBBox(); + + nanovdb::Coord boundsMin( bbox.min() ); + nanovdb::Coord boundsMax( bbox.max() + nanovdb::Coord( 1 ) ); // extend by one unit + + vec3 min = { + static_cast( boundsMin[0] ), + static_cast( boundsMin[1] ), + static_cast( boundsMin[2] )}; + vec3 max = { + static_cast( boundsMax[0] ), + static_cast( boundsMax[1] ), + static_cast( boundsMax[2] )}; + + auto local_pos = localPosLazy(); + + auto _uniform_pos_ = (local_pos - min) / (max - min); + _uniform_pos_ = clamp(_uniform_pos_, vec3(0.0f), vec3(1.0f)); + + // assert(_uniform_pos_.x >= 0); + // assert(_uniform_pos_.y >= 0); + // assert(_uniform_pos_.z >= 0); + return _uniform_pos_; + }; +}; + + +extern "C" __device__ VolumeOut __direct_callable__evalmat(const float4* uniforms, VolumeIn& _attrs) { + + VolumeIn2 attrs{_attrs.pos, _attrs.sigma_t, _attrs.seed, _attrs.sbt_ptr }; + auto& prd = attrs; + + auto att_pos = attrs.pos; + auto att_clr = vec3(0); + auto att_uv = vec3(0); + auto att_nrm = vec3(0); + auto att_tang = vec3(0); + + HitGroupData* sbt_data = reinterpret_cast(attrs.sbt_ptr); + auto zenotex = sbt_data->textures; + auto vdb_grids = sbt_data->vdb_grids; + auto vdb_max_v = sbt_data->vdb_max_v; + +#ifndef _FALLBACK_ + + //GENERATED_BEGIN_MARK + auto vol_sample_anisotropy = 0.0f; + auto vol_sample_density = 0.0f; + + vec3 vol_sample_emission = vec3(0.0f); + vec3 vol_sample_albedo = vec3(0.5f); + //GENERATED_END_MARK +#else + auto vol_sample_anisotropy = 0.0f; + auto vol_sample_density = 0.1f; + + vec3 tmp = { 1, 0, 1 }; + + vec3 vol_sample_emission = tmp / 50.f; + vec3 vol_sample_albedo = tmp; +#endif // _FALLBACK_ + +VolumeOut output; + +#if _USING_NANOVDB_ + + output.albedo = clamp(vol_sample_albedo, 0.0f, 1.0f); + output.anisotropy = clamp(vol_sample_anisotropy, -1.0f, 1.0f); + + output.density = clamp(vol_sample_density, 0.0f, 1.0f); + output.emission = max(vol_sample_emission, vec3(0.0f)); + + if constexpr(VolumeEmissionScaler == VolumeEmissionScalerType::Raw) { + //output.emission = output.emission; + } else if constexpr(VolumeEmissionScaler == VolumeEmissionScalerType::Density) { + output.emission = output.density * output.emission; + } else if constexpr(VolumeEmissionScaler == VolumeEmissionScalerType::Absorption) { + + auto sigma_t = attrs.sigma_t; + + float sigma_a = sigma_t * output.density * average(1.0f - output.albedo); + sigma_a = fmaxf(sigma_a, 0.0f); + auto tmp = output.emission * sigma_a; + output.step_scale = 1.0f / fmaxf(sigma_t, average(tmp)); + output.emission = tmp / sigma_t; + } + +#else + //USING 3D ARRAY + //USING 3D Noise +#endif + return output; +} \ No newline at end of file diff --git a/zenovis/xinxinoptix/DeflMatShader.cu b/zenovis/xinxinoptix/DeflMatShader.cu index 45249da79f..315e760246 100644 --- a/zenovis/xinxinoptix/DeflMatShader.cu +++ b/zenovis/xinxinoptix/DeflMatShader.cu @@ -11,157 +11,9 @@ #include "IOMat.h" #include "Light.h" -#define _SPHERE_ 0 -//COMMON_CODE - #include "DisneyBRDF.h" #include "DisneyBSDF.h" -template -static __inline__ __device__ MatOutput evalMat(cudaTextureObject_t zenotex[], float4* uniforms, MatInput const &attrs) { - - /* MODMA */ - auto att_pos = attrs.pos; - auto att_clr = attrs.clr; - auto att_uv = attrs.uv; - auto att_nrm = attrs.nrm; - auto att_tang = attrs.tang; - auto att_instPos = attrs.instPos; - auto att_instNrm = attrs.instNrm; - auto att_instUv = attrs.instUv; - auto att_instClr = attrs.instClr; - auto att_instTang = attrs.instTang; - auto att_NoL = attrs.NoL; - auto att_LoV = attrs.LoV; - auto att_N = attrs.N; - auto att_T = attrs.T; - auto att_L = attrs.L; - auto att_V = attrs.V; - auto att_H = attrs.H; - auto att_reflectance = attrs.reflectance; - auto att_fresnel = attrs.fresnel; - /** generated code here beg **/ - //GENERATED_BEGIN_MARK - /* MODME */ - float mat_base = 1.0f; - vec3 mat_basecolor = vec3(1.0f, 1.0f, 1.0f); - float mat_roughness = 0.5f; - float mat_metallic = 0.0f; - vec3 mat_metalColor = vec3(1.0f,1.0f,1.0f); - float mat_specular = 0.0f; - float mat_specularTint = 0.0f; - float mat_anisotropic = 0.0f; - float mat_anisoRotation = 0.0f; - - float mat_subsurface = 0.0f; - vec3 mat_sssParam = vec3(0.0f,0.0f,0.0f); - vec3 mat_sssColor = vec3(0.0f,0.0f,0.0f); - float mat_scatterDistance = 0.0f; - float mat_scatterStep = 0.0f; - - float mat_sheen = 0.0f; - float mat_sheenTint = 0.0f; - - - float mat_clearcoat = 0.0f; - vec3 mat_clearcoatColor = vec3(1.0f,1.0f,1.0f); - float mat_clearcoatRoughness = 0.0f; - float mat_clearcoatIOR = 1.5f; - float mat_opacity = 0.0f; - - float mat_specTrans = 0.0f; - vec3 mat_transColor = vec3(1.0f,1.0f,1.0f); - vec3 mat_transTint = vec3(1.0f,1.0f,1.0f); - float mat_transTintDepth = 0.0f; - float mat_transDistance = 0.0f; - vec3 mat_transScatterColor = vec3(1.0f,1.0f,1.0f); - float mat_ior = 1.0f; - - float mat_flatness = 0.0f; - float mat_thin = 0.0f; - float mat_doubleSide= 0.0f; - float mat_smoothness = 0.0f; - vec3 mat_normal = vec3(0.0f, 0.0f, 1.0f); - float mat_emissionIntensity = float(0); - vec3 mat_emission = vec3(1.0f, 1.0f, 1.0f); - float mat_displacement = 0.0f; - float mat_shadowReceiver = 0.0f; - float mat_NoL = 1.0f; - float mat_LoV = 1.0f; - vec3 mat_reflectance = att_reflectance; - - //GENERATED_END_MARK - /** generated code here end **/ - MatOutput mats; - if constexpr(isDisplacement) - { - mats.reflectance = mat_reflectance; - return mats; - }else { - /* MODME */ - mats.basecolor = mat_base * mat_basecolor; - mats.roughness = clamp(mat_roughness, 0.01, 0.99); - mats.metallic = clamp(mat_metallic, 0.0f, 1.0f); - mats.metalColor = mat_metalColor; - mats.specular = mat_specular; - mats.specularTint = mat_specularTint; - mats.anisotropic = clamp(mat_anisotropic, 0.0f, 1.0f); - mats.anisoRotation = clamp(mat_anisoRotation, 0.0f, 1.0f); - - mats.subsurface = mat_subsurface; - mats.sssColor = mat_sssColor; - mats.sssParam = mat_sssParam; - mats.scatterDistance = max(0.0f,mat_scatterDistance); - mats.scatterStep = clamp(mat_scatterStep,0.0f,1.0f); - - mats.sheen = mat_sheen; - mats.sheenTint = mat_sheenTint; - - mats.clearcoat = clamp(mat_clearcoat, 0.0f, 1.0f); - mats.clearcoatColor = mat_clearcoatColor; - mats.clearcoatRoughness = clamp(mat_clearcoatRoughness, 0.01, 0.99); - mats.clearcoatIOR = mat_clearcoatIOR; - - mats.specTrans = clamp(mat_specTrans, 0.0f, 1.0f); - mats.transColor = mat_transColor; - mats.transTint = mat_transTint; - mats.transTintDepth = max(0.0f,mat_transTintDepth); - mats.transDistance = max(mat_transDistance,0.1f); - mats.transScatterColor = mat_transScatterColor; - mats.ior = max(0.0f,mat_ior); - - - mats.opacity = mat_opacity; - mats.nrm = mat_normal; - mats.emission = mat_emissionIntensity * mat_emission; - - - - mats.flatness = mat_flatness; - mats.thin = mat_thin; - mats.doubleSide = mat_doubleSide; - mats.shadowReceiver = mat_shadowReceiver; - - - mats.smoothness = mat_smoothness; - return mats; - } -} - -static __inline__ __device__ MatOutput evalMaterial(cudaTextureObject_t zenotex[], float4* uniforms, MatInput const &attrs) -{ - return evalMat(zenotex, uniforms, attrs); -} - -static __inline__ __device__ MatOutput evalGeometry(cudaTextureObject_t zenotex[], float4* uniforms, MatInput const &attrs) -{ - return evalMat(zenotex, uniforms, attrs); -} - -static __inline__ __device__ MatOutput evalReflectance(cudaTextureObject_t zenotex[], float4* uniforms, MatInput const &attrs) -{ - return evalMat(zenotex, uniforms, attrs); -} __forceinline__ __device__ float3 interp(float2 barys, float3 a, float3 b, float3 c) { float w0 = 1 - barys.x - barys.y; @@ -295,7 +147,8 @@ extern "C" __global__ void __anyhit__shadow_cutout() unsigned short isLight = 0;//rt_data->lightMark[vert_aux_offset + primIdx]; #endif - MatOutput mats = evalMaterial(rt_data->textures, rt_data->uniforms, attrs); + //MatOutput mats = evalMaterial(rt_data->textures, rt_data->uniforms, attrs); + MatOutput mats = optixDirectCall( rt_data->dc_index, rt_data->textures, rt_data->uniforms, attrs ); if(length(attrs.tang)>0) { @@ -526,7 +379,9 @@ extern "C" __global__ void __closesthit__radiance() #endif - MatOutput mats = evalMaterial(rt_data->textures, rt_data->uniforms, attrs); + //MatOutput mats = evalMaterial(rt_data->textures, rt_data->uniforms, attrs); + MatOutput mats = optixDirectCall( rt_data->dc_index, rt_data->textures, rt_data->uniforms, attrs ); + #if _SPHERE_ diff --git a/zenovis/xinxinoptix/DisneyBSDF.h b/zenovis/xinxinoptix/DisneyBSDF.h index df5a8c951a..51c729a778 100644 --- a/zenovis/xinxinoptix/DisneyBSDF.h +++ b/zenovis/xinxinoptix/DisneyBSDF.h @@ -53,11 +53,13 @@ namespace DisneyBSDF{ } static __inline__ __device__ void - setup_subsurface_radius(float eta, vec3 albedo, vec3 &radius) + setup_subsurface_radius(float eta, vec3 albedo, vec3 &radius, bool fixedRadius) { - #if _SSS_FIXED_RADIUS_ - radius = radius * 0.25f / M_PIf; - #else + if (fixedRadius) { + radius = radius * 0.25f / M_PIf; + return; + } + float inv_eta = 1.0f/eta; float F_dr = inv_eta * (-1.440f * inv_eta + 0.710f) + 0.668f + 0.0636f * eta; float fourthirdA = (4.0f / 3.0f) * (1.0f + F_dr) / @@ -67,7 +69,6 @@ namespace DisneyBSDF{ alpha_prime.y = bssrdf_dipole_compute_alpha_prime(albedo.y, fourthirdA); alpha_prime.z = bssrdf_dipole_compute_alpha_prime(albedo.z, fourthirdA); radius = radius * sqrt(3.0f * abs(vec3(1.0) - alpha_prime)); - #endif } static __inline__ __device__ void subsurface_random_walk_remap(const float albedo, @@ -118,10 +119,10 @@ namespace DisneyBSDF{ } static __inline__ __device__ - void CalculateExtinction2(vec3 albedo, vec3 radius, vec3 &sigma_t, vec3 &alpha, float eta) + void CalculateExtinction2(vec3 albedo, vec3 radius, vec3 &sigma_t, vec3 &alpha, float eta, bool fixedRadius) { vec3 r = radius; - setup_subsurface_radius(eta, albedo, r); + setup_subsurface_radius(eta, albedo, r, fixedRadius); subsurface_random_walk_remap(albedo.x, r.x, 0, sigma_t.x, alpha.x); subsurface_random_walk_remap(albedo.y, r.y, 0, sigma_t.y, alpha.y); subsurface_random_walk_remap(albedo.z, r.z, 0, sigma_t.z, alpha.z); @@ -636,7 +637,7 @@ namespace DisneyBSDF{ prd->ss_alpha = color; if (isSS) { medium = PhaseFunctions::isotropic; - CalculateExtinction2(color, sssRadius, prd->sigma_t, prd->ss_alpha, 1.4f); + CalculateExtinction2(color, sssRadius, prd->sigma_t, prd->ss_alpha, 1.4f, mat.sssFxiedRadius); } } } diff --git a/zenovis/xinxinoptix/IOMat.h b/zenovis/xinxinoptix/IOMat.h index 9af9677126..90d01a0d73 100644 --- a/zenovis/xinxinoptix/IOMat.h +++ b/zenovis/xinxinoptix/IOMat.h @@ -34,11 +34,10 @@ struct MatOutput { float transDistance; vec3 transScatterColor; - - float subsurface; vec3 sssColor; vec3 sssParam; + bool sssFxiedRadius; float scatterDistance; float scatterStep; float smoothness; diff --git a/zenovis/xinxinoptix/Light.cu b/zenovis/xinxinoptix/Light.cu index bc0af5b820..68ac51703b 100644 --- a/zenovis/xinxinoptix/Light.cu +++ b/zenovis/xinxinoptix/Light.cu @@ -14,15 +14,6 @@ #include "Sampling.h" #include "LightTree.h" -//COMMON_CODE - -static __inline__ __device__ void evalSurface(float4* uniforms) { - - //GENERATED_BEGIN_MARK - - //GENERATED_END_MARK -} - static __inline__ __device__ bool checkLightGAS(uint instanceId) { return ( instanceId >= params.maxInstanceID-2 ); } diff --git a/zenovis/xinxinoptix/OptiXStuff.h b/zenovis/xinxinoptix/OptiXStuff.h index c6b4652588..0fd85f8c2c 100644 --- a/zenovis/xinxinoptix/OptiXStuff.h +++ b/zenovis/xinxinoptix/OptiXStuff.h @@ -131,7 +131,7 @@ inline void executeOptixTask(OptixTask theTask, tbb::task_group& _c_group) { m_maxNumAdditionalTasks, &numAdditionalTasksCreated ); - for( unsigned int i = 0; i < numAdditionalTasksCreated; ++i ) + for( size_t i = 0; i < numAdditionalTasksCreated; ++i ) { // Capture additionalTasks[i] by value since it will go out of scope. OptixTask task = additionalTasks[i]; @@ -164,9 +164,8 @@ static std::vector readData(std::string const& filename) return data; } -inline bool createModule(OptixModule &m, OptixDeviceContext &context, const char *source, const char *location, tbb::task_group* _c_group = nullptr) +inline bool createModule(OptixModule &module, OptixDeviceContext &context, const char *source, const char *name, const char *macro=nullptr, tbb::task_group* _c_group = nullptr) { - //OptixModule m; OptixModuleCompileOptions module_compile_options = {}; module_compile_options.maxRegisterCount = OPTIX_COMPILE_DEFAULT_MAX_REGISTER_COUNT; #if defined( NDEBUG ) @@ -185,17 +184,23 @@ inline bool createModule(OptixModule &m, OptixDeviceContext &context, const char //TODO: the file path problem bool is_success=false; - const std::vector compilerOptions { - "-std=c++17", "-default-device", //"-extra-device-vectorization" + std::vector compilerOptions { + "-std=c++17", "-default-device" + //,"-extra-device-vectorization" #if !defined( NDEBUG ) - "-lineinfo", //"-G"//"--dopt=on", + ,"-lineinfo" //"-G"//"--dopt=on", #endif - //"--gpu-architecture=compute_60", - "--relocatable-device-code=true" - //"--extensible-whole-program" + // "--gpu-architecture=compute_60", + ,"--relocatable-device-code=true" + // "--extensible-whole-program" + ,"--split-compile=0" }; - const char* input = sutil::getInputData( nullptr, nullptr, source, location, inputSize, is_success, nullptr, compilerOptions); + if (macro != nullptr) { + compilerOptions.push_back(macro); + } + + const char* input = sutil::getInputData( source, macro, name, inputSize, is_success, nullptr, compilerOptions); if(is_success==false) { @@ -203,9 +208,8 @@ inline bool createModule(OptixModule &m, OptixDeviceContext &context, const char } if (_c_group == nullptr) { - OPTIX_CHECK( - optixModuleCreate( context, &module_compile_options, &pipeline_compile_options, input, inputSize, log, &sizeof_log, &m ) + optixModuleCreate( context, &module_compile_options, &pipeline_compile_options, input, inputSize, log, &sizeof_log, &module ) ); } else { @@ -218,7 +222,7 @@ inline bool createModule(OptixModule &m, OptixDeviceContext &context, const char input, inputSize, log, &sizeof_log, - &m, + &module, &firstTask) ); @@ -794,118 +798,167 @@ inline void addTexture(std::string path) zeno::log_info("-{}", i->first); } } -struct rtMatShader -{ - raii m_ptx_module ; - OptixModule* moduleIS = nullptr; - //the below two things are just like vertex shader and frag shader in real time rendering - //the two are linked to codes modeling the rayHit and occlusion test of an particular "Material" - //of an Object. - raii m_radiance_hit_group ; - raii m_occlusion_hit_group ; - std::string m_shaderFile ; - std::string m_hittingEntry ; - std::string m_shadingEntry ; - std::string m_occlusionEntry ; - std::map m_texs; - bool has_vdb{}; - void clearTextureRecords() - { - m_texs.clear(); - } - void addTexture(int i, std::string name) - { - m_texs[i] = name; - } - cudaTextureObject_t getTexture(int i) - { - if(m_texs.find(i)!=m_texs.end()) - { - if(g_tex.find(m_texs[i])!=g_tex.end()) - { - return g_tex[m_texs[i]]->texture; - } - return 0; - } - return 0; +struct OptixShaderCore { + raii module {}; + OptixModule* moduleIS = nullptr; + + raii m_radiance_hit_group {}; + raii m_occlusion_hit_group {}; + + std::string _source; + + std::string _hittingEntry; + std::string _shadingEntry; + std::string _occlusionEntry; + + OptixShaderCore() {} + ~OptixShaderCore() { + module.reset(); + moduleIS = nullptr; + + m_radiance_hit_group.reset(); + m_occlusion_hit_group.reset(); } - rtMatShader() {} - rtMatShader(const char *shaderFile, std::string shadingEntry, std::string occlusionEntry) + + OptixShaderCore(const char *shaderSource, std::string shadingEntry, std::string occlusionEntry) { - m_shaderFile = shaderFile; - m_shadingEntry = shadingEntry; - m_occlusionEntry = occlusionEntry; + _source = shaderSource; + + _shadingEntry = shadingEntry; + _occlusionEntry = occlusionEntry; } - rtMatShader(const char *shaderFile, std::string shadingEntry, std::string occlusionEntry, std::string hittingEntry) + OptixShaderCore(const char *shaderSource, std::string shadingEntry, std::string occlusionEntry, std::string hittingEntry) { - m_shaderFile = shaderFile; - m_shadingEntry = shadingEntry; - m_occlusionEntry = occlusionEntry; + _source = shaderSource; - m_hittingEntry = hittingEntry; + _hittingEntry = hittingEntry; + _shadingEntry = shadingEntry; + _occlusionEntry = occlusionEntry; } - bool loadProgram(uint idx, tbb::task_group* _c_group = nullptr) + bool loadProgram(uint idx, const char* macro=nullptr, tbb::task_group* _c_group = nullptr) { - // try { - // createModule(m_ptx_module.reset(), context, m_shaderFile.c_str(), "MatShader.cu"); - // createRTProgramGroups(context, m_ptx_module, - // "OPTIX_PROGRAM_GROUP_KIND_CLOSEHITGROUP", - // m_shadingEntry, m_radiance_hit_group); - - // createRTProgramGroups(context, m_ptx_module, - // "OPTIX_PROGRAM_GROUP_KIND_ANYHITGROUP", - // m_occlusionEntry, m_occlusion_hit_group); - // } catch (sutil::Exception const &e) { - // throw std::runtime_error((std::string)"cannot create program group. Log:\n" + e.what() + "\n===BEG===\n" + m_shaderFile + "\n===END===\n"); - // } - std::string tmp_name = "MatShader.cu"; tmp_name = "$" + std::to_string(idx) + tmp_name; - - if(createModule(m_ptx_module.reset(), context, m_shaderFile.c_str(), tmp_name.c_str(), _c_group)) + + if(createModule(module.reset(), context, _source.c_str(), tmp_name.c_str(), macro, _c_group)) { std::cout<<"module created"< core{}; + + std::string callable {}; + raii callable_module {}; + raii callable_prog_group {}; + + std::map m_texs {}; + bool has_vdb {}; + std::string parameters{}; + + OptixShaderWrapper() = default; + ~OptixShaderWrapper() = default; + + OptixShaderWrapper(OptixShaderWrapper&& ref) = default; + + OptixShaderWrapper(std::shared_ptr _core_, const std::string& callableSource) + { + core = _core_; callable = callableSource; + } + + bool loadProgram(uint idx, bool fallback=false, tbb::task_group* _c_group = nullptr) + { + std::string tmp_name = "Callable.cu"; + tmp_name = "$" + std::to_string(idx) + tmp_name; + + std::string macro; + if (fallback) { + macro = "--define-macro=_FALLBACK_"; + } + + auto callable_done = createModule(callable_module.reset(), context, callable.c_str(), tmp_name.c_str(), macro.empty()? nullptr:macro.c_str()); + if (callable_done) { + + // Callable programs + OptixProgramGroupOptions callable_prog_group_options = {}; + OptixProgramGroupDesc callable_prog_group_descs[1] = {}; + + callable_prog_group_descs[0].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES; + callable_prog_group_descs[0].callables.moduleDC = callable_module; + callable_prog_group_descs[0].callables.entryFunctionNameDC = "__direct_callable__evalmat"; + + char LOG[2048]; + size_t LOG_SIZE = sizeof( LOG ); + OPTIX_CHECK( + optixProgramGroupCreate( context, callable_prog_group_descs, 1, &callable_prog_group_options, LOG, &LOG_SIZE, &callable_prog_group.reset()); + ); + return true; + } + + return false; } + void clearTextureRecords() + { + m_texs.clear(); + } + void addTexture(int i, std::string name) + { + m_texs[i] = name; + } + cudaTextureObject_t getTexture(int i) + { + if(m_texs.find(i)!=m_texs.end()) + { + if(g_tex.find(m_texs[i])!=g_tex.end()) + { + return g_tex[m_texs[i]]->texture; + } + return 0; + } + return 0; + } }; -inline std::vector rtMaterialShaders;//just have an arry of shaders + +inline std::vector rtMaterialShaders;//just have an arry of shaders + inline void createPipeline() { OptixPipelineLinkOptions pipeline_link_options = {}; pipeline_link_options.maxTraceDepth = 2; -// #if defined( NDEBUG ) -// pipeline_link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_MINIMAL; -// #else -// pipeline_link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL; -// #endif - size_t num_progs = 3 + rtMaterialShaders.size() * 2; + num_progs += rtMaterialShaders.size(); // callables; + OptixProgramGroup* program_groups = new OptixProgramGroup[num_progs]; program_groups[0] = raygen_prog_group; program_groups[1] = radiance_miss_group; program_groups[2] = occlusion_miss_group; for(size_t i=0;im_radiance_hit_group; + program_groups[3 + i*2 + 1] = rtMaterialShaders[i].core->m_occlusion_hit_group; + + program_groups[3 + 2 * rtMaterialShaders.size() + i] = rtMaterialShaders[i].callable_prog_group; } char log[2048]; size_t sizeof_log = sizeof( log ); @@ -933,8 +986,9 @@ inline void createPipeline() OPTIX_CHECK( optixUtilAccumulateStackSizes( occlusion_miss_group, &stack_sizes, pipeline ) ); for(int i=0;im_radiance_hit_group, &stack_sizes, pipeline ) ); + OPTIX_CHECK( optixUtilAccumulateStackSizes( rtMaterialShaders[i].core->m_occlusion_hit_group, &stack_sizes, pipeline ) ); + OPTIX_CHECK( optixUtilAccumulateStackSizes( rtMaterialShaders[i].callable_prog_group, &stack_sizes, pipeline ) ); } uint32_t max_trace_depth = 2; uint32_t max_cc_depth = 0; diff --git a/zenovis/xinxinoptix/SDK/sutil/sutil.cpp b/zenovis/xinxinoptix/SDK/sutil/sutil.cpp index a87d4d24cd..c5f72571a6 100644 --- a/zenovis/xinxinoptix/SDK/sutil/sutil.cpp +++ b/zenovis/xinxinoptix/SDK/sutil/sutil.cpp @@ -877,15 +877,14 @@ const char *lookupIncFile(const char *name) { return getIncFileTab().at(it - pathtab.begin()); } -static bool getPtxFromCuString( std::string& ptx, - const char* sample_directory, +inline bool getPtxFromCuString( std::string& ptx, const char* cu_source, const char* name, const char** log_string, const std::vector& compiler_options) { // Create program - nvrtcProgram prog = 0; + nvrtcProgram prog; NVRTC_CHECK_ERROR( nvrtcCreateProgram( &prog, cu_source, name, getIncFileTab().size(), getIncFileTab().data(), getIncPathTab().data() ) ); // Gather NVRTC options @@ -1093,10 +1092,9 @@ static const char* getOptixHeader() { } #endif -const char* getInputData( const char* sample, - const char* sampleDir, - const char* filename, - const char* location, +const char* getInputData( const char* source, + const char* macro, + const char* name, size_t& dataSize, bool & is_success, const char** log, @@ -1106,7 +1104,7 @@ const char* getInputData( const char* sample, *log = NULL; std::string * ptx, cu; - std::string key = std::string( filename ) + ";" + ( sample ? sample : "" ); + std::string key = std::string( source ) + (macro!=nullptr? std::string(macro):""); std::map::iterator elem = g_ptxSourceCache.map.find( key ); if( elem == g_ptxSourceCache.map.end() ) @@ -1115,7 +1113,7 @@ const char* getInputData( const char* sample, #if CUDA_NVRTC_ENABLED //getCuStringFromFile( cu, location, sampleDir, filename ); //cu.replace(cu.find("#include \n"), strlen("#include \n"), getOptixHeader()); - is_success = getPtxFromCuString( *ptx, sampleDir, filename, location, log, compilerOptions ); + is_success = getPtxFromCuString( *ptx, source, name, log, compilerOptions ); #else getInputDataFromFile( *ptx, sample, filename ); #endif diff --git a/zenovis/xinxinoptix/SDK/sutil/sutil.h b/zenovis/xinxinoptix/SDK/sutil/sutil.h index 2cec06bbbe..107287e83b 100644 --- a/zenovis/xinxinoptix/SDK/sutil/sutil.h +++ b/zenovis/xinxinoptix/SDK/sutil/sutil.h @@ -144,18 +144,15 @@ SUTILAPI void calculateCameraVariables( double SUTILAPI currentTime(); // Get input data, either pre-compiled with NVCC or JIT compiled by NVRTC. -SUTILAPI const char* getInputData( const char* sampleName, // Name of the sample, used to locate the input file. NULL = only search the common /cuda dir - const char* sampleDir, // Directory name for the sample (typically the same as the sample name). - const char* filename, // Cuda C input file name - const char* location, +SUTILAPI const char* getInputData( const char* source, + const char* macro, + const char* name, size_t& dataSize, bool &is_success, const char** log = NULL, // (Optional) pointer to compiler log string. If *log == NULL there is no output. Only valid until the next getInputData call const std::vector& compilerOptions = {CUDA_NVRTC_OPTIONS} ); // Optional vector of compiler options. - - // Ensures that width and height have the minimum size to prevent launch errors. SUTILAPI void ensureMinimumSize( int& width, // Will be assigned the minimum suitable width if too small. diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp index 5face93b90..139e9dabda 100644 --- a/zenovis/xinxinoptix/optixPathTracer.cpp +++ b/zenovis/xinxinoptix/optixPathTracer.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #ifdef __linux__ #include #include @@ -68,6 +69,7 @@ #include #include #include +#include #include "LightBounds.h" #include "LightTree.h" @@ -145,9 +147,13 @@ struct Record T data; }; +struct EmptyData {}; + typedef Record RayGenRecord; typedef Record MissRecord; + typedef Record HitGroupRecord; +typedef Record CallablesRecord; //struct Vertex @@ -237,6 +243,7 @@ struct PathTracerState raii d_raygen_record; raii d_miss_records; raii d_hitgroup_records; + raii d_callable_records; OptixShaderBindingTable sbt = {}; }; @@ -1120,6 +1127,8 @@ static void createSBT( PathTracerState& state ) state.d_raygen_record.reset(); state.d_miss_records.reset(); state.d_hitgroup_records.reset(); + state.d_callable_records.reset(); + state.d_gas_output_buffer.reset(); state.accum_buffer_p.reset(); state.albedo_buffer_p.reset(); @@ -1169,6 +1178,7 @@ static void createSBT( PathTracerState& state ) )); std::vector hitgroup_records(hitgroup_record_count); + std::vector callable_records(shader_count); for( int j = 0; j < shader_count; ++j ) { @@ -1177,6 +1187,8 @@ static void createSBT( PathTracerState& state ) const uint sbt_idx = RAY_TYPE_COUNT * j; + OPTIX_CHECK( optixSbtRecordPackHeader( shader_ref.callable_prog_group, &callable_records[j] ) ); + if (!has_vdb) { hitgroup_records[sbt_idx] = {}; @@ -1201,8 +1213,8 @@ static void createSBT( PathTracerState& state ) hitgroup_records[sbt_idx+1] = hitgroup_records[sbt_idx]; // SBT for occlusion ray-type for ith material - OPTIX_CHECK( optixSbtRecordPackHeader( shader_ref.m_radiance_hit_group, &hitgroup_records[sbt_idx] ) ); - OPTIX_CHECK( optixSbtRecordPackHeader( shader_ref.m_occlusion_hit_group, &hitgroup_records[sbt_idx+1] ) ); + OPTIX_CHECK( optixSbtRecordPackHeader( shader_ref.core->m_radiance_hit_group, &hitgroup_records[sbt_idx] ) ); + OPTIX_CHECK( optixSbtRecordPackHeader( shader_ref.core->m_occlusion_hit_group, &hitgroup_records[sbt_idx+1] ) ); } else { @@ -1233,13 +1245,29 @@ static void createSBT( PathTracerState& state ) rec.data.textures[t] = shader_ref.getTexture(t); } + { + using namespace rapidjson; + Document document; + document.Parse(shader_ref.parameters.c_str()); + + auto vol_depth = document["vol_depth"].GetInt(); + auto vol_extin = document["vol_extinction"].GetFloat(); + + rec.data.vol_depth = vol_depth; + rec.data.vol_extinction = vol_extin; + } + hitgroup_records[sbt_idx] = rec; hitgroup_records[sbt_idx+1] = rec; - OPTIX_CHECK(optixSbtRecordPackHeader( shader_ref.m_radiance_hit_group, &hitgroup_records[sbt_idx] ) ); - OPTIX_CHECK(optixSbtRecordPackHeader( shader_ref.m_occlusion_hit_group, &hitgroup_records[sbt_idx+1] ) ); + OPTIX_CHECK(optixSbtRecordPackHeader( shader_ref.core->m_radiance_hit_group, &hitgroup_records[sbt_idx] ) ); + OPTIX_CHECK(optixSbtRecordPackHeader( shader_ref.core->m_occlusion_hit_group, &hitgroup_records[sbt_idx+1] ) ); } + + + hitgroup_records[sbt_idx].data.dc_index = j; + hitgroup_records[sbt_idx+1].data.dc_index = j; } CUDA_CHECK( cudaMemcpy( @@ -1257,6 +1285,18 @@ static void createSBT( PathTracerState& state ) state.sbt.hitgroupRecordStrideInBytes = static_cast( hitgroup_record_size ); state.sbt.hitgroupRecordCount = hitgroup_records.size(); //state.sbt.exceptionRecord; + + { + raii& d_callable_records = state.d_callable_records; + size_t sizeof_callable_record = sizeof( CallablesRecord ); + CUDA_CHECK( cudaMalloc( reinterpret_cast( &d_callable_records ), sizeof_callable_record * shader_count ) ); + CUDA_CHECK( cudaMemcpy( reinterpret_cast( (CUdeviceptr)d_callable_records ), callable_records.data(), + sizeof_callable_record * shader_count, cudaMemcpyHostToDevice ) ); + + state.sbt.callablesRecordBase = d_callable_records; + state.sbt.callablesRecordCount = shader_count; + state.sbt.callablesRecordStrideInBytes = static_cast( sizeof_callable_record ); + } } static void cleanupState( PathTracerState& state ) @@ -2348,75 +2388,114 @@ void buildLightTree() { } } +inline std::map, std::shared_ptr> shaderCoreLUT {}; + void optixupdatematerial(std::vector> &shaders) { camera_changed = true; - //static bool hadOnce = false; - if (OptixUtil::ray_module.handle==0) { - //OPTIX_CHECK( optixModuleDestroy( OptixUtil::ray_module ) ); + CppTimer theTimer; + theTimer.tick(); + //static bool hadOnce = false; + if (OptixUtil::ray_module.handle==0) { + + OptixUtil::_compile_group.run([&] () { + if (!OptixUtil::createModule( OptixUtil::ray_module, state.context, sutil::lookupIncFile("PTKernel.cu"), "PTKernel.cu")) throw std::runtime_error("base ray module failed to compile"); - - } //hadOnce = true; + + OptixUtil::createRenderGroups(state.context, OptixUtil::ray_module); + }); + + OptixUtil::_compile_group.run([&] () { + auto shader_string = sutil::lookupIncFile("DeflMatShader.cu"); + + auto shaderCore = std::make_shared(shader_string, "__closesthit__radiance", "__anyhit__shadow_cutout"); + shaderCore->loadProgram(0); + shaderCoreLUT.emplace(std::tuple{"DeflMatShader.cu", ShaderMaker::Mesh}, shaderCore); + }); + + OptixUtil::_compile_group.run([&] () { + auto shader_string = sutil::lookupIncFile("DeflMatShader.cu"); + + auto shaderCore = std::make_shared(shader_string, "__closesthit__radiance", "__anyhit__shadow_cutout"); + shaderCore->moduleIS = &OptixUtil::sphere_module; + shaderCore->loadProgram(0, "--define-macro=_SPHERE_"); + shaderCoreLUT.emplace(std::tuple{"DeflMatShader.cu", ShaderMaker::Sphere}, shaderCore); + }); + + OptixUtil::_compile_group.run([&] () { + auto shader_string = sutil::lookupIncFile("Light.cu"); + + auto shaderCore = std::make_shared(shader_string, "__closesthit__radiance", "__anyhit__shadow_cutout"); + shaderCore->loadProgram(0); + shaderCoreLUT.emplace(std::tuple{"Light.cu", ShaderMaker::Mesh}, shaderCore); + }); + + OptixUtil::_compile_group.run([&] () { + auto shader_string = sutil::lookupIncFile("Light.cu"); + + auto shaderCore = std::make_shared(shader_string, "__closesthit__radiance", "__anyhit__shadow_cutout"); + shaderCore->moduleIS = &OptixUtil::sphere_module; + shaderCore->loadProgram(0); + shaderCoreLUT.emplace(std::tuple{"Light.cu", ShaderMaker::Sphere}, shaderCore); + }); + + OptixUtil::_compile_group.run([&] () { + auto shader_string = sutil::lookupIncFile("volume.cu"); + + auto shaderCore = std::make_shared(shader_string, + "__closesthit__radiance_volume", "__anyhit__occlusion_volume", "__intersection__volume"); + shaderCore->loadProgram(0); + shaderCoreLUT.emplace(std::tuple{"volume.cu", ShaderMaker::Volume}, shaderCore); + }); + + OptixUtil::_compile_group.wait(); + } //hadOnce = true; OptixUtil::rtMaterialShaders.resize(0); - OptixUtil::rtMaterialShaders.reserve(shaders.size()); + OptixUtil::rtMaterialShaders.resize(shaders.size()); for (int i = 0; i < shaders.size(); i++) { - auto& shader_string = shaders[i]->source; - if (shader_string.empty()) zeno::log_error("shader {} is empty", i); + +OptixUtil::_compile_group.run([&shaders, i] () { auto marker = std::string("//PLACEHOLDER"); auto marker_length = marker.length(); - auto start_marker = shader_string.find(marker); + auto& callable_string = shaders[i]->callable; + auto start_marker = callable_string.find(marker); if (start_marker != std::string::npos) { - auto end_marker = shader_string.find(marker, start_marker + marker_length); + auto end_marker = callable_string.find(marker, start_marker + marker_length); - shader_string.replace(start_marker, marker_length, "/*PLACEHOLDER"); - shader_string.replace(end_marker, marker_length, "PLACEHOLDER*/"); + callable_string.replace(start_marker, marker_length, "/*PLACEHOLDER"); + callable_string.replace(end_marker, marker_length, "PLACEHOLDER*/"); } - const static std::string sphere_macro0 = "#define _SPHERE_ 0"; - const static std::string sphere_macro1 = "#define _SPHERE_ 1"; + std::shared_ptr shaderCore = nullptr; + auto key = std::tuple{shaders[i]->filename, shaders[i]->mark}; - switch(shaders[i]->mark) { - case(ShaderMaker::Mesh): { + if (shaderCoreLUT.count(key) > 0) { + shaderCore = shaderCoreLUT.at(key); + } - auto macro_pos = shader_string.find(sphere_macro1); - if (macro_pos != std::string::npos) { - shader_string.replace(macro_pos, sphere_macro1.size(), sphere_macro0); - } + OptixUtil::rtMaterialShaders[i].core = shaderCore; + OptixUtil::rtMaterialShaders[i].parameters = shaders[i]->parameters; + OptixUtil::rtMaterialShaders[i].callable = shaders[i]->callable; - OptixUtil::rtMaterialShaders.emplace_back(shader_string.c_str(), - "__closesthit__radiance", - "__anyhit__shadow_cutout"); + switch(shaders[i]->mark) { + case(ShaderMaker::Mesh): { break; } - case(ShaderMaker::Sphere): { - - auto macro_pos = shader_string.find(sphere_macro0); - if (macro_pos != std::string::npos) { - shader_string.replace(macro_pos, sphere_macro0.size(), sphere_macro1); - } - - OptixUtil::rtMaterialShaders.emplace_back(shader_string.c_str(), - "__closesthit__radiance", - "__anyhit__shadow_cutout"); - OptixUtil::rtMaterialShaders.back().moduleIS = &OptixUtil::sphere_module.handle; + case(ShaderMaker::Sphere): { break; } - case(ShaderMaker::Volume): { - OptixUtil::rtMaterialShaders.emplace_back(shader_string.c_str(), - "__closesthit__radiance_volume", - "__anyhit__occlusion_volume", - "__intersection__volume"); - OptixUtil::rtMaterialShaders.back().has_vdb = true; + case(ShaderMaker::Volume): { + OptixUtil::rtMaterialShaders[i].has_vdb = true; break; } default: {} @@ -2432,10 +2511,10 @@ void optixupdatematerial(std::vector> &shaders) OptixUtil::rtMaterialShaders[i].addTexture(j, texs[j]); } } - } +}); //_compile_group + } //for - CppTimer theTimer; - theTimer.tick(); +OptixUtil::_compile_group.wait(); uint task_count = OptixUtil::rtMaterialShaders.size(); //std::vector task_groups(task_count); @@ -2444,15 +2523,11 @@ void optixupdatematerial(std::vector> &shaders) OptixUtil::_compile_group.run([&shaders, i] () { printf("now compiling %d'th shader \n", i); - if(OptixUtil::rtMaterialShaders[i].loadProgram(i, nullptr)==false) + if(OptixUtil::rtMaterialShaders[i].loadProgram(i)==false) { std::cout<<"shader compiling failed, using fallback shader instead"<fallback->c_str(); - //OptixUtil::rtMaterialShaders[i].m_hittingEntry = ""; - //OptixUtil::rtMaterialShaders[i].m_shadingEntry = "__closesthit__radiance"; - //OptixUtil::rtMaterialShaders[i].m_occlusionEntry = "__anyhit__shadow_cutout"; - std::cout<> &shaders) OptixUtil::_compile_group.wait(); theTimer.tock("Done Optix Shader Compile:"); - OptixUtil::createRenderGroups(state.context, OptixUtil::ray_module); if (OptixUtil::sky_tex.has_value()) { state.params.sky_texture = OptixUtil::g_tex[OptixUtil::sky_tex.value()]->texture; state.params.skynx = OptixUtil::sky_nx_map[OptixUtil::sky_tex.value()]; @@ -2517,14 +2591,6 @@ void optixupdateend() { state.radiance_miss_group = OptixUtil::radiance_miss_group; state.occlusion_miss_group = OptixUtil::occlusion_miss_group; - //state.radiance_hit_group = OptixUtil::radiance_hit_group; - //state.occlusion_hit_group = OptixUtil::occlusion_hit_group; - //state.radiance_hit_group2 = OptixUtil::radiance_hit_group2; - //state.occlusion_hit_group2 = OptixUtil::occlusion_hit_group2; - //state.ptx_module2 = createModule(state.context, "optixPathTracer.cu"); - //createModule( state ); - //createProgramGroups( state ); - //createPipeline( state ); createSBT( state ); printf("SBT created \n"); @@ -3671,6 +3737,9 @@ void optixcleanup() { raygen_prog_group .handle=0; radiance_miss_group .handle=0; occlusion_miss_group .handle=0; + + OptixUtil::shaderCoreLUT.clear(); + output_buffer_o .reset(); output_buffer_diffuse .reset(); output_buffer_specular .reset(); diff --git a/zenovis/xinxinoptix/optixPathTracer.h b/zenovis/xinxinoptix/optixPathTracer.h index 04b8e69488..3cb0c3d6f7 100644 --- a/zenovis/xinxinoptix/optixPathTracer.h +++ b/zenovis/xinxinoptix/optixPathTracer.h @@ -255,6 +255,9 @@ struct MissData }; struct HitGroupData { + uint16_t dc_index; + uint16_t vol_depth=99; + float vol_extinction=1.0f; //float4* vertices; float4* uv; float4* nrm; diff --git a/zenovis/xinxinoptix/volume.cu b/zenovis/xinxinoptix/volume.cu index c25ef58382..1c76a1a1f4 100644 --- a/zenovis/xinxinoptix/volume.cu +++ b/zenovis/xinxinoptix/volume.cu @@ -12,343 +12,8 @@ #include #include -enum struct VolumeEmissionScalerType { - Raw, Density, Absorption -}; - -//PLACEHOLDER -static const int _vol_depth = 99; -static const float _vol_extinction = 1.0f; using DataTypeNVDB0 = nanovdb::Fp32; using GridTypeNVDB0 = nanovdb::NanoGrid; -#define VolumeEmissionScaler VolumeEmissionScalerType::Raw -//PLACEHOLDER - -#define _USING_NANOVDB_ true -#define _DELTA_TRACKING_ true -//COMMON_CODE - -/* w0, w1, w2, and w3 are the four cubic B-spline basis functions. */ -inline __device__ float cubic_w0(float a) -{ - return (1.0f / 6.0f) * (a * (a * (-a + 3.0f) - 3.0f) + 1.0f); -} -inline __device__ float cubic_w1(float a) -{ - return (1.0f / 6.0f) * (a * a * (3.0f * a - 6.0f) + 4.0f); -} -inline __device__ float cubic_w2(float a) -{ - return (1.0f / 6.0f) * (a * (a * (-3.0f * a + 3.0f) + 3.0f) + 1.0f); -} -inline __device__ float cubic_w3(float a) -{ - return (1.0f / 6.0f) * (a * a * a); -} - -/* g0 and g1 are the two amplitude functions. */ -inline __device__ float cubic_g0(float a) -{ - return cubic_w0(a) + cubic_w1(a); -} -inline __device__ float cubic_g1(float a) -{ - return cubic_w2(a) + cubic_w3(a); -} - -/* h0 and h1 are the two offset functions */ -inline __device__ float cubic_h0(float a) -{ - return (cubic_w1(a) / cubic_g0(a)) - 1.0f; -} -inline __device__ float cubic_h1(float a) -{ - return (cubic_w3(a) / cubic_g1(a)) + 1.0f; -} - -template -inline __device__ float interp_tricubic_nanovdb(S &s, float x, float y, float z) -{ - float px = floorf(x); - float py = floorf(y); - float pz = floorf(z); - float fx = x - px; - float fy = y - py; - float fz = z - pz; - - float g0x = cubic_g0(fx); - float g1x = cubic_g1(fx); - float g0y = cubic_g0(fy); - float g1y = cubic_g1(fy); - float g0z = cubic_g0(fz); - float g1z = cubic_g1(fz); - - float x0 = px + cubic_h0(fx); - float x1 = px + cubic_h1(fx); - float y0 = py + cubic_h0(fy); - float y1 = py + cubic_h1(fy); - float z0 = pz + cubic_h0(fz); - float z1 = pz + cubic_h1(fz); - - using namespace nanovdb; - - return g0z * (g0y * (g0x * s(Vec3f(x0, y0, z0)) + g1x * s(Vec3f(x1, y0, z0))) + - g1y * (g0x * s(Vec3f(x0, y1, z0)) + g1x * s(Vec3f(x1, y1, z0)))) + - g1z * (g0y * (g0x * s(Vec3f(x0, y0, z1)) + g1x * s(Vec3f(x1, y0, z1))) + - g1y * (g0x * s(Vec3f(x0, y1, z1)) + g1x * s(Vec3f(x1, y1, z1)))); -} - -inline __device__ float _LERP_(float t, float s1, float s2) -{ - //return (1 - t) * s1 + t * s2; - return fma(t, s2, fma(-t, s1, s1)); -} - -template -inline __device__ float nanoSampling(Acc& acc, nanovdb::Vec3f& point_indexd) { - - using GridTypeNVDB = nanovdb::NanoGrid; - - if constexpr(3 > Order) { - using Sampler = nanovdb::SampleFromVoxels; - return Sampler(acc)(point_indexd); - } - - if constexpr(3 == Order) { - nanovdb::SampleFromVoxels s(acc); - return interp_tricubic_nanovdb(s, point_indexd[0], point_indexd[1], point_indexd[2]); - } - - if constexpr(4 == Order) { - RadiancePRD* prd = getPRD(); - auto uuu = nanovdb::Vec3f(prd->rndf(), prd->rndf(), prd->rndf()); - uuu -= nanovdb::Vec3f(0.5f); - auto pick = nanovdb::RoundDown(point_indexd + uuu); - auto coord = nanovdb::Coord(pick[0], pick[1], pick[2]); - return acc.getValue(coord); - } - - return 0.0f; - - // auto point_floor = nanovdb::RoundDown(point_indexd); - // auto point_a = nanovdb::Coord(point_floor[0], point_floor[1], point_floor[2]); - // auto delta = point_indexd - point_floor; - - // auto value_000 = acc.getValue(point_a); - // auto value_100 = acc.getValue(point_a + nanovdb::Coord(1, 0, 0)); - // auto value_010 = acc.getValue(point_a + nanovdb::Coord(0, 1, 0)); - // auto value_110 = acc.getValue(point_a + nanovdb::Coord(1, 1, 0)); - // auto value_001 = acc.getValue(point_a + nanovdb::Coord(0, 0, 1)); - // auto value_101 = acc.getValue(point_a + nanovdb::Coord(1, 0, 1)); - // auto value_011 = acc.getValue(point_a + nanovdb::Coord(0, 1, 1)); - // auto value_111 = acc.getValue(point_a + nanovdb::Coord(1, 1, 1)); - - // auto value_00 = _LERP_(delta[0], value_000, value_100); - // auto value_10 = _LERP_(delta[0], value_010, value_110); - // auto value_01 = _LERP_(delta[0], value_001, value_101); - // auto value_11 = _LERP_(delta[0], value_011, value_111); - - // auto value_0 = _LERP_(delta[1], value_00, value_10); - // auto value_1 = _LERP_(delta[1], value_01, value_11); - - // return _LERP_(delta[2], value_0, value_1); -} - -struct VolumeIn { - vec3 pos; - - vec3 _local_pos_ = vec3(CUDART_NAN_F); - __inline__ __device__ vec3 localPosLazy() { - if ( isnan(_local_pos_.x) ) { - using GridTypeNVDB = GridTypeNVDB0; - - const HitGroupData* sbt_data = reinterpret_cast( optixGetSbtDataPointer() ); - - const auto grid_ptr = sbt_data->vdb_grids[0]; - const auto* _grid = reinterpret_cast(grid_ptr); - //const auto& _acc = _grid->tree().getAccessor(); - auto pos_indexed = reinterpret_cast(pos); - pos_indexed = _grid->worldToIndexF(pos_indexed); - - _local_pos_ = reinterpret_cast(pos_indexed); - } - return _local_pos_; - } - - vec3 _uniform_pos_ = vec3(CUDART_NAN_F); - __inline__ __device__ vec3 uniformPosLazy() { - if ( isnan(_uniform_pos_.x) ) { - using GridTypeNVDB = GridTypeNVDB0; - - const HitGroupData* sbt_data = reinterpret_cast( optixGetSbtDataPointer() ); - - const auto grid_ptr = sbt_data->vdb_grids[0]; - const auto* _grid = reinterpret_cast(grid_ptr); - - auto bbox = _grid->indexBBox(); - - nanovdb::Coord boundsMin( bbox.min() ); - nanovdb::Coord boundsMax( bbox.max() + nanovdb::Coord( 1 ) ); // extend by one unit - - vec3 min = { - static_cast( boundsMin[0] ), - static_cast( boundsMin[1] ), - static_cast( boundsMin[2] )}; - vec3 max = { - static_cast( boundsMax[0] ), - static_cast( boundsMax[1] ), - static_cast( boundsMax[2] )}; - - auto local_pos = localPosLazy(); - - _uniform_pos_ = (local_pos - min) / (max - min); - _uniform_pos_ = clamp(_uniform_pos_, vec3(0.0f), vec3(1.0f)); - - assert(_uniform_pos_.x >= 0); - assert(_uniform_pos_.y >= 0); - assert(_uniform_pos_.z >= 0); - } - return _uniform_pos_; - } -}; - -struct VolumeOut { - float max_density; - float density; - - float anisotropy; - vec3 emission; - vec3 albedo; -}; - -template -static __inline__ __device__ vec2 samplingVDB(const unsigned long long grid_ptr, vec3 att_pos) { - using GridTypeNVDB = nanovdb::NanoGrid; - - const auto* _grid = reinterpret_cast(grid_ptr); - const auto& _acc = _grid->tree().getAccessor(); - - auto pos_indexed = reinterpret_cast(att_pos); - - if constexpr(WorldSpace) - { - pos_indexed = _grid->worldToIndexF(pos_indexed); - } //_grid->tree().root().maximum(); - - return vec2 { nanoSampling(_acc, pos_indexed), _grid->tree().root().maximum() }; -} - -static __inline__ __device__ VolumeOut evalVolume(float4* uniforms, VolumeIn &attrs, RadiancePRD &prd) { - - auto att_pos = attrs.pos; - auto att_clr = vec3(0); - auto att_uv = vec3(0); - auto att_nrm = vec3(0); - auto att_tang = vec3(0); - - HitGroupData* sbt_data = (HitGroupData*)optixGetSbtDataPointer(); - auto zenotex = sbt_data->textures; - auto vdb_grids = sbt_data->vdb_grids; - auto vdb_max_v = sbt_data->vdb_max_v; - - //GENERATED_BEGIN_MARK - auto vol_sample_anisotropy = 0.0f; - auto vol_sample_density = 0.0f; - - vec3 vol_sample_emission = vec3(0.0f); - vec3 vol_sample_albedo = vec3(0.5f); - //GENERATED_END_MARK - -#if _USING_NANOVDB_ - - VolumeOut output; - - output.albedo = clamp(vol_sample_albedo, 0.0f, 1.0f); - output.anisotropy = clamp(vol_sample_anisotropy, -1.0f, 1.0f); //clamp(vol_sample_anisotropy, -0.99999f, 0.99999f); - - output.density = clamp(vol_sample_density, 0.0f, 1.0f); - output.emission = max(vol_sample_emission, vec3(0.0f)); - - return output; -#else - //USING 3D ARRAY - //USING 3D Noise -#endif -} - -// ---------------------------------------------------------------------------- -// Volume programs -// ---------------------------------------------------------------------------- - -inline __device__ void confine( const nanovdb::BBox &bbox, nanovdb::Vec3f &iVec ) -{ - // NanoVDB's voxels and tiles are formed from half-open intervals, i.e. - // voxel[0, 0, 0] spans the set [0, 1) x [0, 1) x [0, 1). To find a point's voxel, - // its coordinates are simply truncated to integer. Ray-box intersections yield - // pairs of points that, because of numerical errors, fall randomly on either side - // of the voxel boundaries. - // This confine method, given a point and a (integer-based/Coord-based) bounding - // box, moves points outside the bbox into it. That means coordinates at lower - // boundaries are snapped to the integer boundary, and in case of the point being - // close to an upper boundary, it is move one EPS below that bound and into the volume. - - // get the tighter box around active values - auto iMin = nanovdb::Vec3f( bbox.min() ); - auto iMax = nanovdb::Vec3f( bbox.max() ) + nanovdb::Vec3f( 1.0f ); - - // move the start and end points into the bbox - float eps = 1e-7f; - if( iVec[0] < iMin[0] ) iVec[0] = iMin[0]; - if( iVec[1] < iMin[1] ) iVec[1] = iMin[1]; - if( iVec[2] < iMin[2] ) iVec[2] = iMin[2]; - if( iVec[0] >= iMax[0] ) iVec[0] = iMax[0] - fmaxf( 1.0f, fabsf( iVec[0] ) ) * eps; - if( iVec[1] >= iMax[1] ) iVec[1] = iMax[1] - fmaxf( 1.0f, fabsf( iVec[1] ) ) * eps; - if( iVec[2] >= iMax[2] ) iVec[2] = iMax[2] - fmaxf( 1.0f, fabsf( iVec[2] ) ) * eps; -} - -inline __hostdev__ void confine( const nanovdb::BBox &bbox, nanovdb::Vec3f &iStart, nanovdb::Vec3f &iEnd ) -{ - confine( bbox, iStart ); - confine( bbox, iEnd ); -} - -template -inline __device__ float transmittanceHDDA( - const nanovdb::Vec3f& start, - const nanovdb::Vec3f& end, - AccT& acc, const float opacity ) -{ - - // transmittance along a ray through the volume is computed by - // taking the negative exponential of volume's density integrated - // along the ray. - float transmittance = 1.f; - auto dir = end - start; - auto len = dir.length(); - nanovdb::Ray ray( start, dir / len, 0.0f, len ); - nanovdb::Coord ijk = nanovdb::RoundDown( ray.start() ); // first hit of bbox - - // Use NanoVDB's HDDA line digitization for fast integration. - // This algorithm (http://www.museth.org/Ken/Publications_files/Museth_SIG14.pdf) - // can skip over sparse parts of the data structure. - // - nanovdb::HDDA > hdda( ray, acc.getDim( ijk, ray ) ); - - float t = 0.0f; - float density = acc.getValue( ijk ) * opacity; - while( hdda.step()) - { - float dt = hdda.time() - t; // compute length of ray-segment intersecting current voxel/tile - transmittance *= expf( -density * dt ); - t = hdda.time(); - ijk = hdda.voxel(); - - density = acc.getValue( ijk ) * opacity; - hdda.update( ray, acc.getDim( ijk, ray ) ); // if necessary adjust DDA step size - } - - return transmittance; -} extern "C" __global__ void __intersection__volume() { @@ -428,13 +93,13 @@ extern "C" __global__ void __closesthit__radiance_volume() float3 emitting = make_float3(0.0); float3 scattering = make_float3(1.0); - float sigma_t = _vol_extinction; + float sigma_t = sbt_data->vol_extinction; float v_density = 0.0; VolumeOut vol_out; auto new_dir = ray_dir; - auto level = _vol_depth; + auto level = sbt_data->vol_depth; auto step_scale = 1.0f/sigma_t; while(--level > 0) { @@ -465,24 +130,14 @@ extern "C" __global__ void __closesthit__radiance_volume() new_orig = ray_orig + (t0+t_ele) * ray_dir; - VolumeIn vol_in { new_orig }; - - vol_out = evalVolume(sbt_data->uniforms, vol_in, *prd); + VolumeIn vol_in { new_orig, sigma_t, &prd->seed, reinterpret_cast(sbt_data) }; + + vol_out = optixDirectCall( sbt_data->dc_index, sbt_data->uniforms, vol_in); v_density = vol_out.density; + emitting += vol_out.emission; + + step_scale = fminf(step_scale, vol_out.step_scale) ; - if constexpr(VolumeEmissionScaler == VolumeEmissionScalerType::Raw) { - emitting += vol_out.emission; - } else if constexpr(VolumeEmissionScaler == VolumeEmissionScalerType::Density) { - emitting += vol_out.density * vol_out.emission; - } else if constexpr(VolumeEmissionScaler == VolumeEmissionScalerType::Absorption) { - - float sigma_a = sigma_t * vol_out.density * average(1.0f - vol_out.albedo); - sigma_a = fmaxf(sigma_a, 0.0f); - auto tmp = vol_out.emission * sigma_a; - step_scale = 1.0f / fmaxf(sigma_t, average(tmp)); - emitting += tmp / sigma_t; - } - if (prd->rndf() > v_density) { // null scattering v_density = 0.0f; continue; } @@ -557,9 +212,9 @@ extern "C" __global__ void __anyhit__occlusion_volume() float3 test_point = ray_orig; float3 transmittance = make_float3(1.0f); - const float sigma_t = _vol_extinction; + const float sigma_t = sbt_data->vol_extinction; - auto level = _vol_depth; + auto level = sbt_data->vol_depth; while(--level > 0) { auto prob = prd->rndf(); @@ -571,8 +226,8 @@ extern "C" __global__ void __anyhit__occlusion_volume() break; } // over shoot, outside of volume - VolumeIn vol_in { test_point }; - VolumeOut vol_out = evalVolume(sbt_data->uniforms, vol_in, *prd); + VolumeIn vol_in { test_point, sigma_t, &prd->seed, reinterpret_cast(sbt_data) }; + VolumeOut vol_out = optixDirectCall( sbt_data->dc_index, sbt_data->uniforms, vol_in ); const auto v_density = vol_out.density; diff --git a/zenovis/xinxinoptix/volume.h b/zenovis/xinxinoptix/volume.h index 7988336e1d..420f705046 100644 --- a/zenovis/xinxinoptix/volume.h +++ b/zenovis/xinxinoptix/volume.h @@ -10,6 +10,25 @@ namespace nanovdb { using Fp32 = float; }; +struct VolumeIn { + float3 pos; + float sigma_t; + uint32_t* seed; + + unsigned long long sbt_ptr; +}; + +struct VolumeOut { + float step_scale=__FLT_MAX__; + + float max_density; + float density; + + float anisotropy; + float3 emission; + float3 albedo; +}; + namespace pbrt { struct HenyeyGreenstein { diff --git a/zenovis/xinxinoptix/xinxinoptixapi.h b/zenovis/xinxinoptix/xinxinoptixapi.h index 340512f4ce..1dfe5a30f7 100644 --- a/zenovis/xinxinoptix/xinxinoptixapi.h +++ b/zenovis/xinxinoptix/xinxinoptixapi.h @@ -18,10 +18,12 @@ enum ShaderMaker { struct ShaderPrepared { ShaderMaker mark; std::string matid; - std::string source; - std::vector tex_names; + std::string filename; - std::shared_ptr fallback; + std::string callable; + std::string parameters; + + std::vector tex_names; }; namespace xinxinoptix { From 1567f6feaa28faa6ddf715fe39aea6e5804711ad Mon Sep 17 00:00:00 2001 From: iaomw Date: Tue, 26 Dec 2023 16:09:08 +0800 Subject: [PATCH 4/4] Cuda buildin half type --- zenovis/xinxinoptix/optixPathTracer.cpp | 13 ++++++++++--- zenovis/xinxinoptix/zxxglslvec.h | 8 ++++++-- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp index 839c9409a2..3fa3e6b8c5 100644 --- a/zenovis/xinxinoptix/optixPathTracer.cpp +++ b/zenovis/xinxinoptix/optixPathTracer.cpp @@ -169,6 +169,8 @@ typedef Record CallablesRecord; //uint32_t v1, v2, v3, pad; //}; +#include +// #include //struct Instance //{ @@ -217,15 +219,20 @@ ushort2 halfNormal(float4 in) #ifdef USE_SHORT ushort3 toHalfColor(float4 in) { + return make_ushort3((unsigned short)(in.x*65536.0f), (unsigned short)(in.y*65536.0f), (unsigned short)(in.z*65536.0f)); } ushort3 toHalf(float4 in) { - return make_ushort3((unsigned short)(in.x*65536.0f), - (unsigned short)(in.y*65536.0f), - (unsigned short)(in.z*65536.0f)); + half hx = __float2half(in.x); + half hy = __float2half(in.y); + half hz = __float2half(in.z); + + return make_ushort3(*(unsigned short*)&(hx), + *(unsigned short*)&(hy), + *(unsigned short*)&(hz)); } ushort3 halfNormal(float4 in) diff --git a/zenovis/xinxinoptix/zxxglslvec.h b/zenovis/xinxinoptix/zxxglslvec.h index 7071b10f0d..8d34014750 100644 --- a/zenovis/xinxinoptix/zxxglslvec.h +++ b/zenovis/xinxinoptix/zxxglslvec.h @@ -1,5 +1,6 @@ #pragma once +#include #include __forceinline__ __device__ float to_radians(float degrees) { @@ -1344,8 +1345,11 @@ __forceinline__ __device__ float3 decodeNormal(uchar3 c) __forceinline__ __device__ float3 decodeColor(ushort3 c) { - vec3 cout = vec3((float)(c.x), (float)(c.y), (float)(c.z)) / 65536.0f; - return make_float3(cout.x, cout.y, cout.z); + half& hx = reinterpret_cast(c.x); + half& hy = reinterpret_cast(c.y); + half& hz = reinterpret_cast(c.z); + + return { __half2float(hx), __half2float(hy), __half2float(hz) }; } __forceinline__ __device__ float3 decodeNormal(ushort3 c) {