From e800207b75df6fba4558e20e06961979afa2e51b Mon Sep 17 00:00:00 2001 From: zhxx1987 Date: Fri, 15 Dec 2023 23:23:55 +0800 Subject: [PATCH] reduce mem requirement --- zenovis/xinxinoptix/DeflMatShader.cu | 48 +++++++++++++-------------- zenovis/xinxinoptix/optixPathTracer.h | 15 ++++++++- zenovis/xinxinoptix/zxxglslvec.h | 30 +++++++++++++++++ 3 files changed, 68 insertions(+), 25 deletions(-) diff --git a/zenovis/xinxinoptix/DeflMatShader.cu b/zenovis/xinxinoptix/DeflMatShader.cu index 39662f612a..8a431e708c 100644 --- a/zenovis/xinxinoptix/DeflMatShader.cu +++ b/zenovis/xinxinoptix/DeflMatShader.cu @@ -253,11 +253,11 @@ extern "C" __global__ void __anyhit__shadow_cutout() /* MODMA */ float2 barys = optixGetTriangleBarycentrics(); - float3 n0 = normalize( *(float3*)&(rt_data->nrm[ vert_idx_offset+0 ]) ); + float3 n0 = normalize( decodeNormal(rt_data->nrm[ vert_idx_offset+0 ]) ); n0 = dot(n0, N_Local)>0.8f?n0:N_Local; - float3 n1 = normalize( *(float3*)&(rt_data->nrm[ vert_idx_offset+1 ]) ); + float3 n1 = normalize( decodeNormal(rt_data->nrm[ vert_idx_offset+1 ]) ); n1 = dot(n1, N_Local)>0.8f?n1:N_Local; - float3 n2 = normalize( *(float3*)&(rt_data->nrm[ vert_idx_offset+2 ]) ); + float3 n2 = normalize( decodeNormal(rt_data->nrm[ vert_idx_offset+2 ]) ); n2 = dot(n2, N_Local)>0.8f?n2:N_Local; N_Local = normalize(interp(barys, n0, n1, n2)); @@ -273,15 +273,15 @@ extern "C" __global__ void __anyhit__shadow_cutout() attrs.pos = P; attrs.nrm = N; - const float3& uv0 = *(float3*)&( rt_data->uv[ vert_idx_offset+0 ] ); - const float3& uv1 = *(float3*)&( rt_data->uv[ vert_idx_offset+1 ] ); - const float3& uv2 = *(float3*)&( rt_data->uv[ vert_idx_offset+2 ] ); - const float3& clr0 = *(float3*)&( rt_data->clr[ vert_idx_offset+0 ] ); - const float3& clr1 = *(float3*)&( rt_data->clr[ vert_idx_offset+1 ] ); - const float3& clr2 = *(float3*)&( rt_data->clr[ vert_idx_offset+2 ] ); - const float3& tan0 = *(float3*)&( rt_data->tan[ vert_idx_offset+0 ] ); - const float3& tan1 = *(float3*)&( rt_data->tan[ vert_idx_offset+1 ] ); - const float3& tan2 = *(float3*)&( rt_data->tan[ vert_idx_offset+2 ] ); + const float3& uv0 = decodeColor( rt_data->uv[ vert_idx_offset+0 ] ); + const float3& uv1 = decodeColor( rt_data->uv[ vert_idx_offset+1 ] ); + const float3& uv2 = decodeColor( rt_data->uv[ vert_idx_offset+2 ] ); + const float3& clr0 = decodeColor( rt_data->clr[ vert_idx_offset+0 ] ); + const float3& clr1 = decodeColor( rt_data->clr[ vert_idx_offset+1 ] ); + const float3& clr2 = decodeColor( rt_data->clr[ vert_idx_offset+2 ] ); + const float3& tan0 = decodeNormal( rt_data->tan[ vert_idx_offset+0 ] ); + const float3& tan1 = decodeNormal( rt_data->tan[ vert_idx_offset+1 ] ); + const float3& tan2 = decodeNormal( rt_data->tan[ vert_idx_offset+2 ] ); attrs.uv = interp(barys, uv0, uv1, uv2);//todo later attrs.clr = interp(barys, clr0, clr1, clr2); @@ -506,15 +506,15 @@ extern "C" __global__ void __closesthit__radiance() float3 N = N_World; attrs.nrm = N; - const float3& uv0 = *(float3*)&( rt_data->uv[ vert_idx_offset+0 ] ); - const float3& uv1 = *(float3*)&( rt_data->uv[ vert_idx_offset+1 ] ); - const float3& uv2 = *(float3*)&( rt_data->uv[ vert_idx_offset+2 ] ); - const float3& clr0 = *(float3*)&( rt_data->clr[ vert_idx_offset+0 ] ); - const float3& clr1 = *(float3*)&( rt_data->clr[ vert_idx_offset+1 ] ); - const float3& clr2 = *(float3*)&( rt_data->clr[ vert_idx_offset+2 ] ); - const float3& tan0 = *(float3*)&( rt_data->tan[ vert_idx_offset+0 ] ); - const float3& tan1 = *(float3*)&( rt_data->tan[ vert_idx_offset+1 ] ); - const float3& tan2 = *(float3*)&( rt_data->tan[ vert_idx_offset+2 ] ); + const float3& uv0 = decodeColor( rt_data->uv[ vert_idx_offset+0 ] ); + const float3& uv1 = decodeColor( rt_data->uv[ vert_idx_offset+1 ] ); + const float3& uv2 = decodeColor( rt_data->uv[ vert_idx_offset+2 ] ); + const float3& clr0 = decodeColor( rt_data->clr[ vert_idx_offset+0 ] ); + const float3& clr1 = decodeColor( rt_data->clr[ vert_idx_offset+1 ] ); + const float3& clr2 = decodeColor( rt_data->clr[ vert_idx_offset+2 ] ); + const float3& tan0 = decodeNormal( rt_data->tan[ vert_idx_offset+0 ] ); + const float3& tan1 = decodeNormal( rt_data->tan[ vert_idx_offset+1 ] ); + const float3& tan2 = decodeNormal( rt_data->tan[ vert_idx_offset+2 ] ); attrs.uv = interp(barys, uv0, uv1, uv2);//todo later attrs.clr = interp(barys, clr0, clr1, clr2); @@ -540,13 +540,13 @@ extern "C" __global__ void __closesthit__radiance() #else - float3 n0 = normalize( *(float3*)&(rt_data->nrm[ vert_idx_offset+0 ]) ); + float3 n0 = normalize( decodeNormal(rt_data->nrm[ vert_idx_offset+0 ]) ); n0 = dot(n0, N_Local)>(1-mats.smoothness)?n0:N_Local; - float3 n1 = normalize( *(float3*)&(rt_data->nrm[ vert_idx_offset+1 ]) ); + float3 n1 = normalize( decodeNormal(rt_data->nrm[ vert_idx_offset+1 ]) ); n1 = dot(n1, N_Local)>(1-mats.smoothness)?n1:N_Local; - float3 n2 = normalize( *(float3*)&(rt_data->nrm[ vert_idx_offset+2 ]) ); + float3 n2 = normalize( decodeNormal(rt_data->nrm[ vert_idx_offset+2 ]) ); n2 = dot(n2, N_Local)>(1-mats.smoothness)?n2:N_Local; N_Local = normalize(interp(barys, n0, n1, n2)); diff --git a/zenovis/xinxinoptix/optixPathTracer.h b/zenovis/xinxinoptix/optixPathTracer.h index 04b8e69488..1bcb138fcc 100644 --- a/zenovis/xinxinoptix/optixPathTracer.h +++ b/zenovis/xinxinoptix/optixPathTracer.h @@ -1,5 +1,5 @@ #pragma once - +#define USE_SHORT 1 #include #include @@ -253,13 +253,26 @@ struct MissData { float4 bg_color; }; + struct HitGroupData { //float4* vertices; +#ifdef USE_SHORT_COMPACT + ushort2* uv; + ushort2* nrm; + ushort2* clr; + ushort2* tan; +#elifdef USE_SHORT + ushort3* uv; + ushort3* nrm; + ushort3* clr; + ushort3* tan; +#else float4* uv; float4* nrm; float4* clr; float4* tan; +#endif unsigned short* lightMark; uint32_t* auxOffset; diff --git a/zenovis/xinxinoptix/zxxglslvec.h b/zenovis/xinxinoptix/zxxglslvec.h index 3c59ff024d..7071b10f0d 100644 --- a/zenovis/xinxinoptix/zxxglslvec.h +++ b/zenovis/xinxinoptix/zxxglslvec.h @@ -1329,4 +1329,34 @@ __forceinline__ __device__ vec2 shuffled_scrambled_sobol_2d(unsigned int index, p.x = nested_uniform_scramble(p.x, hash_combine(seed, 0u)); p.y = nested_uniform_scramble(p.y, hash_combine(seed, 1u)); return vec2(p.x, p.y)*exp2(-32.); +} + +__forceinline__ __device__ float3 decodeColor(uchar3 c) +{ + vec3 cout = vec3((float)(c.x), (float)(c.y), (float)(c.z)) / 255.0f; + return make_float3(cout.x, cout.y, cout.z); +} +__forceinline__ __device__ float3 decodeNormal(uchar3 c) +{ + vec3 cout = vec3((float)(c.x), (float)(c.y), (float)(c.z)) / 255.0 * 2.0f - 1.0f; + return make_float3(cout.x, cout.y, cout.z); +} + +__forceinline__ __device__ float3 decodeColor(ushort3 c) +{ + vec3 cout = vec3((float)(c.x), (float)(c.y), (float)(c.z)) / 65536.0f; + return make_float3(cout.x, cout.y, cout.z); +} +__forceinline__ __device__ float3 decodeNormal(ushort3 c) +{ + vec3 cout = vec3((float)(c.x), (float)(c.y), (float)(c.z)) / 65536.0f * 2.0f - 1.0f; + return make_float3(cout.x, cout.y, cout.z); +} +__forceinline__ __device__ float3 decodeColor(float4 c) +{ + return make_float3(c.x, c.y, c.z); +} +__forceinline__ __device__ float3 decodeNormal(float4 c) +{ + return make_float3(c.x, c.y, c.z); } \ No newline at end of file