Skip to content

Commit

Permalink
reduce mem requirement
Browse files Browse the repository at this point in the history
  • Loading branch information
zhxx1987 committed Dec 15, 2023
1 parent ab04fa2 commit e800207
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 25 deletions.
48 changes: 24 additions & 24 deletions zenovis/xinxinoptix/DeflMatShader.cu
Original file line number Diff line number Diff line change
Expand Up @@ -253,11 +253,11 @@ extern "C" __global__ void __anyhit__shadow_cutout()
/* MODMA */
float2 barys = optixGetTriangleBarycentrics();

float3 n0 = normalize( *(float3*)&(rt_data->nrm[ vert_idx_offset+0 ]) );
float3 n0 = normalize( decodeNormal(rt_data->nrm[ vert_idx_offset+0 ]) );
n0 = dot(n0, N_Local)>0.8f?n0:N_Local;
float3 n1 = normalize( *(float3*)&(rt_data->nrm[ vert_idx_offset+1 ]) );
float3 n1 = normalize( decodeNormal(rt_data->nrm[ vert_idx_offset+1 ]) );
n1 = dot(n1, N_Local)>0.8f?n1:N_Local;
float3 n2 = normalize( *(float3*)&(rt_data->nrm[ vert_idx_offset+2 ]) );
float3 n2 = normalize( decodeNormal(rt_data->nrm[ vert_idx_offset+2 ]) );
n2 = dot(n2, N_Local)>0.8f?n2:N_Local;

N_Local = normalize(interp(barys, n0, n1, n2));
Expand All @@ -273,15 +273,15 @@ extern "C" __global__ void __anyhit__shadow_cutout()
attrs.pos = P;
attrs.nrm = N;

const float3& uv0 = *(float3*)&( rt_data->uv[ vert_idx_offset+0 ] );
const float3& uv1 = *(float3*)&( rt_data->uv[ vert_idx_offset+1 ] );
const float3& uv2 = *(float3*)&( rt_data->uv[ vert_idx_offset+2 ] );
const float3& clr0 = *(float3*)&( rt_data->clr[ vert_idx_offset+0 ] );
const float3& clr1 = *(float3*)&( rt_data->clr[ vert_idx_offset+1 ] );
const float3& clr2 = *(float3*)&( rt_data->clr[ vert_idx_offset+2 ] );
const float3& tan0 = *(float3*)&( rt_data->tan[ vert_idx_offset+0 ] );
const float3& tan1 = *(float3*)&( rt_data->tan[ vert_idx_offset+1 ] );
const float3& tan2 = *(float3*)&( rt_data->tan[ vert_idx_offset+2 ] );
const float3& uv0 = decodeColor( rt_data->uv[ vert_idx_offset+0 ] );
const float3& uv1 = decodeColor( rt_data->uv[ vert_idx_offset+1 ] );
const float3& uv2 = decodeColor( rt_data->uv[ vert_idx_offset+2 ] );
const float3& clr0 = decodeColor( rt_data->clr[ vert_idx_offset+0 ] );
const float3& clr1 = decodeColor( rt_data->clr[ vert_idx_offset+1 ] );
const float3& clr2 = decodeColor( rt_data->clr[ vert_idx_offset+2 ] );
const float3& tan0 = decodeNormal( rt_data->tan[ vert_idx_offset+0 ] );
const float3& tan1 = decodeNormal( rt_data->tan[ vert_idx_offset+1 ] );
const float3& tan2 = decodeNormal( rt_data->tan[ vert_idx_offset+2 ] );

attrs.uv = interp(barys, uv0, uv1, uv2);//todo later
attrs.clr = interp(barys, clr0, clr1, clr2);
Expand Down Expand Up @@ -506,15 +506,15 @@ extern "C" __global__ void __closesthit__radiance()
float3 N = N_World;
attrs.nrm = N;

const float3& uv0 = *(float3*)&( rt_data->uv[ vert_idx_offset+0 ] );
const float3& uv1 = *(float3*)&( rt_data->uv[ vert_idx_offset+1 ] );
const float3& uv2 = *(float3*)&( rt_data->uv[ vert_idx_offset+2 ] );
const float3& clr0 = *(float3*)&( rt_data->clr[ vert_idx_offset+0 ] );
const float3& clr1 = *(float3*)&( rt_data->clr[ vert_idx_offset+1 ] );
const float3& clr2 = *(float3*)&( rt_data->clr[ vert_idx_offset+2 ] );
const float3& tan0 = *(float3*)&( rt_data->tan[ vert_idx_offset+0 ] );
const float3& tan1 = *(float3*)&( rt_data->tan[ vert_idx_offset+1 ] );
const float3& tan2 = *(float3*)&( rt_data->tan[ vert_idx_offset+2 ] );
const float3& uv0 = decodeColor( rt_data->uv[ vert_idx_offset+0 ] );
const float3& uv1 = decodeColor( rt_data->uv[ vert_idx_offset+1 ] );
const float3& uv2 = decodeColor( rt_data->uv[ vert_idx_offset+2 ] );
const float3& clr0 = decodeColor( rt_data->clr[ vert_idx_offset+0 ] );
const float3& clr1 = decodeColor( rt_data->clr[ vert_idx_offset+1 ] );
const float3& clr2 = decodeColor( rt_data->clr[ vert_idx_offset+2 ] );
const float3& tan0 = decodeNormal( rt_data->tan[ vert_idx_offset+0 ] );
const float3& tan1 = decodeNormal( rt_data->tan[ vert_idx_offset+1 ] );
const float3& tan2 = decodeNormal( rt_data->tan[ vert_idx_offset+2 ] );

attrs.uv = interp(barys, uv0, uv1, uv2);//todo later
attrs.clr = interp(barys, clr0, clr1, clr2);
Expand All @@ -540,13 +540,13 @@ extern "C" __global__ void __closesthit__radiance()

#else

float3 n0 = normalize( *(float3*)&(rt_data->nrm[ vert_idx_offset+0 ]) );
float3 n0 = normalize( decodeNormal(rt_data->nrm[ vert_idx_offset+0 ]) );
n0 = dot(n0, N_Local)>(1-mats.smoothness)?n0:N_Local;

float3 n1 = normalize( *(float3*)&(rt_data->nrm[ vert_idx_offset+1 ]) );
float3 n1 = normalize( decodeNormal(rt_data->nrm[ vert_idx_offset+1 ]) );
n1 = dot(n1, N_Local)>(1-mats.smoothness)?n1:N_Local;

float3 n2 = normalize( *(float3*)&(rt_data->nrm[ vert_idx_offset+2 ]) );
float3 n2 = normalize( decodeNormal(rt_data->nrm[ vert_idx_offset+2 ]) );
n2 = dot(n2, N_Local)>(1-mats.smoothness)?n2:N_Local;

N_Local = normalize(interp(barys, n0, n1, n2));
Expand Down
15 changes: 14 additions & 1 deletion zenovis/xinxinoptix/optixPathTracer.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#pragma once

#define USE_SHORT 1
#include <optix.h>
#include <Shape.h>

Expand Down Expand Up @@ -253,13 +253,26 @@ struct MissData
{
float4 bg_color;
};

struct HitGroupData
{
//float4* vertices;
#ifdef USE_SHORT_COMPACT
ushort2* uv;
ushort2* nrm;
ushort2* clr;
ushort2* tan;
#elifdef USE_SHORT
ushort3* uv;
ushort3* nrm;
ushort3* clr;
ushort3* tan;
#else
float4* uv;
float4* nrm;
float4* clr;
float4* tan;
#endif
unsigned short* lightMark;
uint32_t* auxOffset;

Expand Down
30 changes: 30 additions & 0 deletions zenovis/xinxinoptix/zxxglslvec.h
Original file line number Diff line number Diff line change
Expand Up @@ -1329,4 +1329,34 @@ __forceinline__ __device__ vec2 shuffled_scrambled_sobol_2d(unsigned int index,
p.x = nested_uniform_scramble(p.x, hash_combine(seed, 0u));
p.y = nested_uniform_scramble(p.y, hash_combine(seed, 1u));
return vec2(p.x, p.y)*exp2(-32.);
}

__forceinline__ __device__ float3 decodeColor(uchar3 c)
{
vec3 cout = vec3((float)(c.x), (float)(c.y), (float)(c.z)) / 255.0f;
return make_float3(cout.x, cout.y, cout.z);
}
__forceinline__ __device__ float3 decodeNormal(uchar3 c)
{
vec3 cout = vec3((float)(c.x), (float)(c.y), (float)(c.z)) / 255.0 * 2.0f - 1.0f;
return make_float3(cout.x, cout.y, cout.z);
}

__forceinline__ __device__ float3 decodeColor(ushort3 c)
{
vec3 cout = vec3((float)(c.x), (float)(c.y), (float)(c.z)) / 65536.0f;
return make_float3(cout.x, cout.y, cout.z);
}
__forceinline__ __device__ float3 decodeNormal(ushort3 c)
{
vec3 cout = vec3((float)(c.x), (float)(c.y), (float)(c.z)) / 65536.0f * 2.0f - 1.0f;
return make_float3(cout.x, cout.y, cout.z);
}
__forceinline__ __device__ float3 decodeColor(float4 c)
{
return make_float3(c.x, c.y, c.z);
}
__forceinline__ __device__ float3 decodeNormal(float4 c)
{
return make_float3(c.x, c.y, c.z);
}

0 comments on commit e800207

Please sign in to comment.