diff --git a/res/gamedata/shaders/gl/accum_sun.ps b/res/gamedata/shaders/gl/accum_sun.ps new file mode 100644 index 00000000000..8eabbe1b7f5 Binary files /dev/null and b/res/gamedata/shaders/gl/accum_sun.ps differ diff --git a/res/gamedata/shaders/gl/accum_sun.vs b/res/gamedata/shaders/gl/accum_sun.vs new file mode 100644 index 00000000000..c926f7fd9e5 --- /dev/null +++ b/res/gamedata/shaders/gl/accum_sun.vs @@ -0,0 +1,21 @@ +#include "common.h" +#include "iostructs\v_volume.h" + +////////////////////////////////////////////////////////////////////////////////////////// +uniform float4x4 m_texgen; +#ifdef USE_SJITTER +uniform float4x4 m_texgen_J; +#endif + +////////////////////////////////////////////////////////////////////////////////////////// +// Vertex +v2p_volume _main ( float4 P ) +{ + v2p_volume O; + O.hpos = mul( m_WVP, P ); + O.tc = mul( m_texgen, P ); +#ifdef USE_SJITTER + O.tcJ = mul( m_texgen_J, P ); +#endif + return O; +} diff --git a/res/gamedata/shaders/gl/accum_sun_far.ps b/res/gamedata/shaders/gl/accum_sun_far.ps new file mode 100644 index 00000000000..d15072ccb93 Binary files /dev/null and b/res/gamedata/shaders/gl/accum_sun_far.ps differ diff --git a/res/gamedata/shaders/gl/accum_sun_far_msaa.ps b/res/gamedata/shaders/gl/accum_sun_far_msaa.ps new file mode 100644 index 00000000000..a074523cf85 Binary files /dev/null and b/res/gamedata/shaders/gl/accum_sun_far_msaa.ps differ diff --git a/res/gamedata/shaders/gl/accum_sun_far_nomsaa.ps b/res/gamedata/shaders/gl/accum_sun_far_nomsaa.ps new file mode 100644 index 00000000000..a074523cf85 Binary files /dev/null and b/res/gamedata/shaders/gl/accum_sun_far_nomsaa.ps differ diff --git a/res/gamedata/shaders/gl/accum_sun_mask.ps b/res/gamedata/shaders/gl/accum_sun_mask.ps new file mode 100644 index 00000000000..8537d22ed51 Binary files /dev/null and b/res/gamedata/shaders/gl/accum_sun_mask.ps differ diff --git a/res/gamedata/shaders/gl/accum_sun_mask_msaa.ps b/res/gamedata/shaders/gl/accum_sun_mask_msaa.ps new file mode 100644 index 00000000000..ba20fed3944 Binary files /dev/null and b/res/gamedata/shaders/gl/accum_sun_mask_msaa.ps differ diff --git a/res/gamedata/shaders/gl/accum_sun_mask_nomsaa.ps b/res/gamedata/shaders/gl/accum_sun_mask_nomsaa.ps new file mode 100644 index 00000000000..553eb1aa205 Binary files /dev/null and b/res/gamedata/shaders/gl/accum_sun_mask_nomsaa.ps differ diff --git a/res/gamedata/shaders/gl/accum_sun_msaa.ps b/res/gamedata/shaders/gl/accum_sun_msaa.ps new file mode 100644 index 00000000000..3d6fe0840a2 Binary files /dev/null and b/res/gamedata/shaders/gl/accum_sun_msaa.ps differ diff --git a/res/gamedata/shaders/gl/accum_sun_near.ps b/res/gamedata/shaders/gl/accum_sun_near.ps new file mode 100644 index 00000000000..e5ac11dcd12 Binary files /dev/null and b/res/gamedata/shaders/gl/accum_sun_near.ps differ diff --git a/res/gamedata/shaders/gl/accum_sun_near_msaa.ps b/res/gamedata/shaders/gl/accum_sun_near_msaa.ps new file mode 100644 index 00000000000..f2896ea3851 Binary files /dev/null and b/res/gamedata/shaders/gl/accum_sun_near_msaa.ps differ diff --git a/res/gamedata/shaders/gl/accum_sun_near_msaa_minmax.ps b/res/gamedata/shaders/gl/accum_sun_near_msaa_minmax.ps new file mode 100644 index 00000000000..b96feb73c41 Binary files /dev/null and b/res/gamedata/shaders/gl/accum_sun_near_msaa_minmax.ps differ diff --git a/res/gamedata/shaders/gl/accum_sun_near_msaa_nominmax.ps b/res/gamedata/shaders/gl/accum_sun_near_msaa_nominmax.ps new file mode 100644 index 00000000000..883bef6e289 Binary files /dev/null and b/res/gamedata/shaders/gl/accum_sun_near_msaa_nominmax.ps differ diff --git a/res/gamedata/shaders/gl/accum_sun_near_nomsaa.ps b/res/gamedata/shaders/gl/accum_sun_near_nomsaa.ps new file mode 100644 index 00000000000..18deb31d621 Binary files /dev/null and b/res/gamedata/shaders/gl/accum_sun_near_nomsaa.ps differ diff --git a/res/gamedata/shaders/gl/accum_sun_near_nomsaa_minmax.ps b/res/gamedata/shaders/gl/accum_sun_near_nomsaa_minmax.ps new file mode 100644 index 00000000000..be3fc8bf992 Binary files /dev/null and b/res/gamedata/shaders/gl/accum_sun_near_nomsaa_minmax.ps differ diff --git a/res/gamedata/shaders/gl/accum_sun_near_nomsaa_nominmax.ps b/res/gamedata/shaders/gl/accum_sun_near_nomsaa_nominmax.ps new file mode 100644 index 00000000000..2c03c75be3e Binary files /dev/null and b/res/gamedata/shaders/gl/accum_sun_near_nomsaa_nominmax.ps differ diff --git a/res/gamedata/shaders/gl/accum_sun_nomsaa.ps b/res/gamedata/shaders/gl/accum_sun_nomsaa.ps new file mode 100644 index 00000000000..f6daf0fec2f Binary files /dev/null and b/res/gamedata/shaders/gl/accum_sun_nomsaa.ps differ diff --git a/res/gamedata/shaders/gl/accum_volumetric.s b/res/gamedata/shaders/gl/accum_volumetric.s index 58636cc60a1..1aff0f243e7 100644 --- a/res/gamedata/shaders/gl/accum_volumetric.s +++ b/res/gamedata/shaders/gl/accum_volumetric.s @@ -6,6 +6,7 @@ function normal (shader, t_base, t_second, t_detail) -- : aref (true,0) : sorting (2, false) shader:sampler ("s_lmap") :texture (t_base): clamp() + shader:sampler ("s_dmap") :texture ("$user$smap_depth") shader:sampler ("s_smap") :texture ("$user$smap_depth") : comp_less () shader:sampler ("s_noise") :texture ("fx\\fx_noise") : f_linear () end \ No newline at end of file diff --git a/res/gamedata/shaders/gl/accum_volumetric_nomsaa.s b/res/gamedata/shaders/gl/accum_volumetric_nomsaa.s index 58636cc60a1..1aff0f243e7 100644 --- a/res/gamedata/shaders/gl/accum_volumetric_nomsaa.s +++ b/res/gamedata/shaders/gl/accum_volumetric_nomsaa.s @@ -6,6 +6,7 @@ function normal (shader, t_base, t_second, t_detail) -- : aref (true,0) : sorting (2, false) shader:sampler ("s_lmap") :texture (t_base): clamp() + shader:sampler ("s_dmap") :texture ("$user$smap_depth") shader:sampler ("s_smap") :texture ("$user$smap_depth") : comp_less () shader:sampler ("s_noise") :texture ("fx\\fx_noise") : f_linear () end \ No newline at end of file diff --git a/res/gamedata/shaders/gl/accum_volumetric_sun.s b/res/gamedata/shaders/gl/accum_volumetric_sun.s index 4f9ced30841..07f9a0f28d8 100644 --- a/res/gamedata/shaders/gl/accum_volumetric_sun.s +++ b/res/gamedata/shaders/gl/accum_volumetric_sun.s @@ -5,6 +5,7 @@ function normal (shader, t_base, t_second, t_detail) : blend (true,blend.one,blend.one) : sorting (2, false) -- TODO: DX10: Implement for near and far phase. + shader:sampler ("s_dmap") :texture ("$user$smap_depth") shader:sampler ("s_smap") :texture ("$user$smap_depth") : comp_less () shader:sampler ("s_position") :texture ("$user$position") shader:sampler ("jitter0") :texture ("$user$jitter_0") : f_none () diff --git a/res/gamedata/shaders/gl/accum_volumetric_sun_minmax.s b/res/gamedata/shaders/gl/accum_volumetric_sun_minmax.s index d687de6ed58..93c9a50d861 100644 --- a/res/gamedata/shaders/gl/accum_volumetric_sun_minmax.s +++ b/res/gamedata/shaders/gl/accum_volumetric_sun_minmax.s @@ -5,6 +5,7 @@ function normal (shader, t_base, t_second, t_detail) : blend (true,blend.one,blend.one) : sorting (2, false) -- TODO: DX10: Implement for near and far phase. + shader:sampler ("s_dmap") :texture ("$user$smap_depth") shader:sampler ("s_smap") :texture ("$user$smap_depth") : comp_less () shader:sampler ("s_position") :texture ("$user$position") shader:sampler ("jitter0") :texture ("$user$jitter_0") : f_none () diff --git a/res/gamedata/shaders/gl/accum_volumetric_sun_msaa0.s b/res/gamedata/shaders/gl/accum_volumetric_sun_msaa0.s index 1ea7410ff25..b505bc4c3b3 100644 --- a/res/gamedata/shaders/gl/accum_volumetric_sun_msaa0.s +++ b/res/gamedata/shaders/gl/accum_volumetric_sun_msaa0.s @@ -5,6 +5,7 @@ function normal (shader, t_base, t_second, t_detail) : blend (true,blend.one,blend.one) : sorting (2, false) -- TODO: DX10: Implement for near and far phase. + shader:sampler ("s_dmap") :texture ("$user$smap_depth") shader:sampler ("s_smap") :texture ("$user$smap_depth") : comp_less () shader:sampler ("s_position") :texture ("$user$position") shader:sampler ("jitter0") :texture ("$user$jitter_0") : f_none () diff --git a/res/gamedata/shaders/gl/accum_volumetric_sun_msaa1.s b/res/gamedata/shaders/gl/accum_volumetric_sun_msaa1.s index 63148dac414..8238162a355 100644 --- a/res/gamedata/shaders/gl/accum_volumetric_sun_msaa1.s +++ b/res/gamedata/shaders/gl/accum_volumetric_sun_msaa1.s @@ -5,6 +5,7 @@ function normal (shader, t_base, t_second, t_detail) : blend (true,blend.one,blend.one) : sorting (2, false) -- TODO: DX10: Implement for near and far phase. + shader:sampler ("s_dmap") :texture ("$user$smap_depth") shader:sampler ("s_smap") :texture ("$user$smap_depth") : comp_less () shader:sampler ("s_position") :texture ("$user$position") shader:sampler ("jitter0") :texture ("$user$jitter_0") : f_none () diff --git a/res/gamedata/shaders/gl/accum_volumetric_sun_msaa2.s b/res/gamedata/shaders/gl/accum_volumetric_sun_msaa2.s index d4bd2c1d5f8..c6f75254b55 100644 --- a/res/gamedata/shaders/gl/accum_volumetric_sun_msaa2.s +++ b/res/gamedata/shaders/gl/accum_volumetric_sun_msaa2.s @@ -5,6 +5,7 @@ function normal (shader, t_base, t_second, t_detail) : blend (true,blend.one,blend.one) : sorting (2, false) -- TODO: DX10: Implement for near and far phase. + shader:sampler ("s_dmap") :texture ("$user$smap_depth") shader:sampler ("s_smap") :texture ("$user$smap_depth") : comp_less () shader:sampler ("s_position") :texture ("$user$position") shader:sampler ("jitter0") :texture ("$user$jitter_0") : f_none () diff --git a/res/gamedata/shaders/gl/accum_volumetric_sun_msaa3.s b/res/gamedata/shaders/gl/accum_volumetric_sun_msaa3.s index f7bcba0a967..b3ba9b0336e 100644 --- a/res/gamedata/shaders/gl/accum_volumetric_sun_msaa3.s +++ b/res/gamedata/shaders/gl/accum_volumetric_sun_msaa3.s @@ -5,6 +5,7 @@ function normal (shader, t_base, t_second, t_detail) : blend (true,blend.one,blend.one) : sorting (2, false) -- TODO: DX10: Implement for near and far phase. + shader:sampler ("s_dmap") :texture ("$user$smap_depth") shader:sampler ("s_smap") :texture ("$user$smap_depth") : comp_less () shader:sampler ("s_position") :texture ("$user$position") shader:sampler ("jitter0") :texture ("$user$jitter_0") : f_none () diff --git a/res/gamedata/shaders/gl/accum_volumetric_sun_msaa4.s b/res/gamedata/shaders/gl/accum_volumetric_sun_msaa4.s index 75d50091cba..06ad0cafff3 100644 --- a/res/gamedata/shaders/gl/accum_volumetric_sun_msaa4.s +++ b/res/gamedata/shaders/gl/accum_volumetric_sun_msaa4.s @@ -5,6 +5,7 @@ function normal (shader, t_base, t_second, t_detail) : blend (true,blend.one,blend.one) : sorting (2, false) -- TODO: DX10: Implement for near and far phase. + shader:sampler ("s_dmap") :texture ("$user$smap_depth") shader:sampler ("s_smap") :texture ("$user$smap_depth") : comp_less () shader:sampler ("s_position") :texture ("$user$position") shader:sampler ("jitter0") :texture ("$user$jitter_0") : f_none () diff --git a/res/gamedata/shaders/gl/accum_volumetric_sun_msaa5.s b/res/gamedata/shaders/gl/accum_volumetric_sun_msaa5.s index 66074584913..37995650780 100644 --- a/res/gamedata/shaders/gl/accum_volumetric_sun_msaa5.s +++ b/res/gamedata/shaders/gl/accum_volumetric_sun_msaa5.s @@ -5,6 +5,7 @@ function normal (shader, t_base, t_second, t_detail) : blend (true,blend.one,blend.one) : sorting (2, false) -- TODO: DX10: Implement for near and far phase. + shader:sampler ("s_dmap") :texture ("$user$smap_depth") shader:sampler ("s_smap") :texture ("$user$smap_depth") : comp_less () shader:sampler ("s_position") :texture ("$user$position") shader:sampler ("jitter0") :texture ("$user$jitter_0") : f_none () diff --git a/res/gamedata/shaders/gl/accum_volumetric_sun_msaa6.s b/res/gamedata/shaders/gl/accum_volumetric_sun_msaa6.s index 40bb6873dd9..b2c5fd9c37d 100644 --- a/res/gamedata/shaders/gl/accum_volumetric_sun_msaa6.s +++ b/res/gamedata/shaders/gl/accum_volumetric_sun_msaa6.s @@ -5,6 +5,7 @@ function normal (shader, t_base, t_second, t_detail) : blend (true,blend.one,blend.one) : sorting (2, false) -- TODO: DX10: Implement for near and far phase. + shader:sampler ("s_dmap") :texture ("$user$smap_depth") shader:sampler ("s_smap") :texture ("$user$smap_depth") : comp_less () shader:sampler ("s_position") :texture ("$user$position") shader:sampler ("jitter0") :texture ("$user$jitter_0") : f_none () diff --git a/res/gamedata/shaders/gl/accum_volumetric_sun_msaa7.s b/res/gamedata/shaders/gl/accum_volumetric_sun_msaa7.s index 75a8ab33563..481f7a1141e 100644 --- a/res/gamedata/shaders/gl/accum_volumetric_sun_msaa7.s +++ b/res/gamedata/shaders/gl/accum_volumetric_sun_msaa7.s @@ -5,6 +5,7 @@ function normal (shader, t_base, t_second, t_detail) : blend (true,blend.one,blend.one) : sorting (2, false) -- TODO: DX10: Implement for near and far phase. + shader:sampler ("s_dmap") :texture ("$user$smap_depth") shader:sampler ("s_smap") :texture ("$user$smap_depth") : comp_less () shader:sampler ("s_position") :texture ("$user$position") shader:sampler ("jitter0") :texture ("$user$jitter_0") : f_none () diff --git a/res/gamedata/shaders/gl/accum_volumetric_sun_nomsaa.s b/res/gamedata/shaders/gl/accum_volumetric_sun_nomsaa.s index 1efc2c6be9d..9abc1d7a5b9 100644 --- a/res/gamedata/shaders/gl/accum_volumetric_sun_nomsaa.s +++ b/res/gamedata/shaders/gl/accum_volumetric_sun_nomsaa.s @@ -5,8 +5,9 @@ function normal (shader, t_base, t_second, t_detail) : blend (true,blend.one,blend.one) : sorting (2, false) -- TODO: DX10: Implement for near and far phase. + shader:sampler ("s_dmap") :texture ("$user$smap_depth") shader:sampler ("s_smap") :texture ("$user$smap_depth") : comp_less () - shader:sampler ("s_smap_minmax") :texture ("$user$smap_depth_minmax") : comp_less () + shader:sampler ("s_smap_minmax") :texture ("$user$smap_depth_minmax") shader:sampler ("s_position") :texture ("$user$position") shader:sampler ("jitter0") :texture ("$user$jitter_0") : f_none () end \ No newline at end of file diff --git a/res/gamedata/shaders/gl/accum_volumetric_sun_nomsaa_minmax.s b/res/gamedata/shaders/gl/accum_volumetric_sun_nomsaa_minmax.s index 8ae4245dccf..cca304627c1 100644 --- a/res/gamedata/shaders/gl/accum_volumetric_sun_nomsaa_minmax.s +++ b/res/gamedata/shaders/gl/accum_volumetric_sun_nomsaa_minmax.s @@ -5,8 +5,9 @@ function normal (shader, t_base, t_second, t_detail) : blend (true,blend.one,blend.one) : sorting (2, false) -- TODO: DX10: Implement for near and far phase. + shader:sampler ("s_dmap") :texture ("$user$smap_depth") shader:sampler ("s_smap") :texture ("$user$smap_depth") : comp_less () - shader:sampler ("s_smap_minmax") :texture ("$user$smap_depth_minmax") : comp_less () + shader:sampler ("s_smap_minmax") :texture ("$user$smap_depth_minmax") shader:sampler ("s_position") :texture ("$user$position") shader:sampler ("jitter0") :texture ("$user$jitter_0") : f_none () end \ No newline at end of file diff --git a/res/gamedata/shaders/gl/common.h b/res/gamedata/shaders/gl/common.h index 80c70acf619..06307b09df4 100644 --- a/res/gamedata/shaders/gl/common.h +++ b/res/gamedata/shaders/gl/common.h @@ -3,12 +3,20 @@ #include "shared\common.h" +// TODO: OGL: Move to cbuffers +uniform half4 hemi_cube_pos_faces; +uniform half4 hemi_cube_neg_faces; +uniform half4 L_material; // 0,0,0,mid +uniform half4 Ldynamic_color; // dynamic light color (rgb1) - spot/point +uniform half4 Ldynamic_pos; // dynamic light pos+1/range(w) - spot/point +uniform half4 Ldynamic_dir; // dynamic light direction - sun + #include "common_defines.h" -//include "common_policies.h" +#include "common_policies.h" #include "common_iostructs.h" #include "common_samplers.h" //include "common_cbuffers.h" -//include "common_functions.h" +#include "common_functions.h" // #define USE_SUPER_SPECULAR diff --git a/res/gamedata/shaders/gl/common_functions.h b/res/gamedata/shaders/gl/common_functions.h new file mode 100644 index 00000000000..a96bbb2d705 --- /dev/null +++ b/res/gamedata/shaders/gl/common_functions.h @@ -0,0 +1,476 @@ +#ifndef common_functions_h_included +#define common_functions_h_included + +// contrast function +float Contrast(float Input, float ContrastPower) +{ + //piecewise contrast function + bool IsAboveHalf = Input > 0.5 ; + float ToRaise = saturate(2*(IsAboveHalf ? 1-Input : Input)); + float Output = 0.5*pow(ToRaise, ContrastPower); + Output = IsAboveHalf ? 1-Output : Output; + return Output; +} + +void tonemap( out float4 low, out float4 high, float3 rgb, float scale) +{ + rgb = rgb*scale; + + const float fWhiteIntensity = 1.7; + + const float fWhiteIntensitySQR = fWhiteIntensity*fWhiteIntensity; + +// low = (rgb/(rgb + 1)).xyzz; + low = ( (rgb*(1+rgb/fWhiteIntensitySQR)) / (rgb+1) ).xyzz; + + high = rgb.xyzz/def_hdr; // 8x dynamic range + +/* + rgb = rgb*scale; + + low = rgb.xyzz; + high = low/def_hdr; // 8x dynamic range +*/ +} + +float4 combine_bloom( float3 low, float4 high) +{ + return float4( low + high.rgb*high.a, 1.f ); +} + +float calc_fogging( float4 w_pos ) +{ + return dot(w_pos,fog_plane); +} + +float2 unpack_tc_base( float2 tc, float du, float dv ) +{ + return (tc.xy + float2 (du,dv))*(32.f/32768.f); //!Increase from 32bit to 64bit floating point +} + +float3 calc_sun_r1( float3 norm_w ) +{ + return L_sun_color*saturate(dot((norm_w),-L_sun_dir_w)); +} + +float3 calc_model_hemi_r1( float3 norm_w ) +{ + return max(0,norm_w.y)*L_hemi_color.rgb; +} + +float3 calc_model_lq_lighting( float3 norm_w ) +{ + return L_material.x*calc_model_hemi_r1(norm_w) + L_ambient.rgb + L_material.y*calc_sun_r1(norm_w); +} + +float3 unpack_normal( float3 v ) { return 2*v-1; } +float3 unpack_bx2( float3 v ) { return 2*v-1; } +float3 unpack_bx4( float3 v ) { return 4*v-2; } //!reduce the amount of stretching from 4*v-2 and increase precision +float2 unpack_tc_lmap( float2 tc ) { return tc*(1.f/32768.f); } // [-1 .. +1 ] +float4 unpack_color( float4 c ) { return c.bgra; } +float4 unpack_D3DCOLOR( float4 c ) { return c.bgra; } +float3 unpack_D3DCOLOR( float3 c ) { return c.bgr; } + +float3 p_hemi( float2 tc ) +{ +// float3 t_lmh = tex2D (s_hemi, tc); +// float3 t_lmh = s_hemi.Sample( smp_rtlinear, tc); +// return dot(t_lmh,1.f/4.f); + float4 t_lmh = tex2D (s_hemi, tc); + return float3(t_lmh.a); +} + +float get_hemi( float4 lmh) +{ + return lmh.a; +} + +float get_sun( float4 lmh) +{ + return lmh.g; +} + +float3 v_hemi(float3 n) +{ + return L_hemi_color.rgb*(.5f + .5f*n.y); +} + +float3 v_sun(float3 n) +{ + return L_sun_color*dot(n,-L_sun_dir_w); +} + +float3 calc_reflection( float3 pos_w, float3 norm_w ) +{ + return reflect(normalize(pos_w-eye_position), norm_w); +} + +#define USABLE_BIT_1 uint(0x00002000) +#define USABLE_BIT_2 uint(0x00004000) +#define USABLE_BIT_3 uint(0x00008000) +#define USABLE_BIT_4 uint(0x00010000) +#define USABLE_BIT_5 uint(0x00020000) +#define USABLE_BIT_6 uint(0x00040000) +#define USABLE_BIT_7 uint(0x00080000) +#define USABLE_BIT_8 uint(0x00100000) +#define USABLE_BIT_9 uint(0x00200000) +#define USABLE_BIT_10 uint(0x00400000) +#define USABLE_BIT_11 uint(0x00800000) // At least two of those four bit flags must be mutually exclusive (i.e. all 4 bits must not be set together) +#define USABLE_BIT_12 uint(0x01000000) // This is because setting 0x47800000 sets all 5 FP16 exponent bits to 1 which means infinity +#define USABLE_BIT_13 uint(0x02000000) // This will be translated to a +/-MAX_FLOAT in the FP16 render target (0xFBFF/0x7BFF), overwriting the +#define USABLE_BIT_14 uint(0x04000000) // mantissa bits where other bit flags are stored. +#define USABLE_BIT_15 uint(0x80000000) +#define MUST_BE_SET uint(0x40000000) // This flag *must* be stored in the floating-point representation of the bit flag to store + +/* +float2 gbuf_pack_normal( float3 norm ) +{ + float2 res; + + res = 0.5 * ( norm.xy + float2( 1, 1 ) ) ; + res.x *= ( norm.z < 0 ? -1.0 : 1.0 ); + + return res; +} + +float3 gbuf_unpack_normal( float2 norm ) +{ + float3 res; + + res.xy = ( 2.0 * abs( norm ) ) - float2(1,1); + + res.z = ( norm.x < 0 ? -1.0 : 1.0 ) * sqrt( abs( 1 - res.x * res.x - res.y * res.y ) ); + + return res; +} +*/ + +// Holger Gruen AMD - I change normal packing and unpacking to make sure N.z is accessible without ALU cost +// this help the HDAO compute shader to run more efficiently +float2 gbuf_pack_normal( float3 norm ) +{ + float2 res; + + res.x = norm.z; + res.y = 0.5f * ( norm.x + 1.0f ) ; + res.y *= ( norm.y < 0.0f ? -1.0f : 1.0f ); + + return res; +} + +float3 gbuf_unpack_normal( float2 norm ) +{ + float3 res; + + res.z = norm.x; + res.x = ( 2.0f * abs( norm.y ) ) - 1.0f; + res.y = ( norm.y < 0 ? -1.0 : 1.0 ) * sqrt( abs( 1 - res.x * res.x - res.z * res.z ) ); + + return res; +} + +float gbuf_pack_hemi_mtl( float hemi, float mtl ) +{ + uint packed_mtl = uint( ( mtl / 1.333333333 ) * 31.0 ); +// uint packed_hemi = ( MUST_BE_SET + ( uint( hemi * 255.0 ) << 13 ) + ( ( packed_mtl & uint( 31 ) ) << 21 ) ); + // Clamp hemi max value + uint packed_hemi = ( MUST_BE_SET + ( uint( saturate(hemi) * 255.9 ) << 13 ) + ( ( packed_mtl & uint( 31 ) ) << 21 ) ); + + if( ( packed_hemi & USABLE_BIT_13 ) == 0 ) + packed_hemi |= USABLE_BIT_14; + + if( ( packed_mtl & uint( 16 ) ) != 0 ) + packed_hemi |= USABLE_BIT_15; + + return asfloat( packed_hemi ); +} + +float gbuf_unpack_hemi( float mtl_hemi ) +{ +// return float( ( asuint( mtl_hemi ) >> 13 ) & uint(255) ) * (1.0/255.0); + return float( ( asuint( mtl_hemi ) >> 13 ) & uint(255) ) * (1.0/254.8); +} + +float gbuf_unpack_mtl( float mtl_hemi ) +{ + uint packed_mtl = asuint( mtl_hemi ); + uint packed_hemi = ( ( packed_mtl >> 21 ) & uint(15) ) + ( ( packed_mtl & USABLE_BIT_15 ) == 0 ? 0 : 16 ); + return float( packed_hemi ) * (1.0/31.0) * 1.333333333; +} + +#ifndef EXTEND_F_DEFFER +f_deffer pack_gbuffer( float4 norm, float4 pos, float4 col ) +#else +f_deffer pack_gbuffer( float4 norm, float4 pos, float4 col, uint imask ) +#endif +{ + f_deffer res; + +#ifndef GBUFFER_OPTIMIZATION + res.position = pos; + res.Ne = norm; + res.C = col; +#else + res.position = float4( gbuf_pack_normal( norm ), pos.z, gbuf_pack_hemi_mtl( norm.w, pos.w ) ); + res.C = col; +#endif + +#ifdef EXTEND_F_DEFFER + res.mask = imask; +#endif + + return res; +} + +#ifdef GBUFFER_OPTIMIZATION +gbuffer_data gbuffer_load_data( float2 tc, float2 pos2d, int iSample ) +{ + gbuffer_data gbd; + + gbd.P = float3(0,0,0); + gbd.hemi = 0; + gbd.mtl = 0; + gbd.C = 0; + gbd.N = float3(0,0,0); + +#ifndef USE_MSAA + float4 P = s_position.Sample( smp_nofilter, tc ); +#else + float4 P = s_position.Load( int3( pos2d, 0 ), iSample ); +#endif + + // 3d view space pos reconstruction math + // center of the plane (0,0) or (0.5,0.5) at distance 1 is eyepoint(0,0,0) + lookat (assuming |lookat| ==1 + // left/right = (0,0,1) -/+ tan(fHorzFOV/2) * (1,0,0 ) + // top/bottom = (0,0,1) +/- tan(fVertFOV/2) * (0,1,0 ) + // lefttop = ( -tan(fHorzFOV/2), tan(fVertFOV/2), 1 ) + // righttop = ( tan(fHorzFOV/2), tan(fVertFOV/2), 1 ) + // leftbottom = ( -tan(fHorzFOV/2), -tan(fVertFOV/2), 1 ) + // rightbottom = ( tan(fHorzFOV/2), -tan(fVertFOV/2), 1 ) + gbd.P = float3( P.z * ( pos2d * pos_decompression_params.zw - pos_decompression_params.xy ), P.z ); + + // reconstruct N + gbd.N = gbuf_unpack_normal( P.xy ); + + // reconstruct material + gbd.mtl = gbuf_unpack_mtl( P.w ); + + // reconstruct hemi + gbd.hemi = gbuf_unpack_hemi( P.w ); + +#ifndef USE_MSAA + float4 C = tex2D( s_diffuse, tc ); +#else + float4 C = texelFetch( s_diffuse, int2( pos2d ), 0, iSample ); +#endif + + gbd.C = C.xyz; + gbd.gloss = C.w; + + return gbd; +} + +gbuffer_data gbuffer_load_data( float2 tc, float2 pos2d ) +{ + return gbuffer_load_data( tc, pos2d, 0 ); +} + +gbuffer_data gbuffer_load_data_offset( float2 tc, float2 OffsetTC, float2 pos2d ) +{ + float2 delta = ( ( OffsetTC - tc ) * pos_decompression_params2.xy ); + + return gbuffer_load_data( OffsetTC, pos2d + delta, 0 ); +} + +gbuffer_data gbuffer_load_data_offset( float2 tc, float2 OffsetTC, float2 pos2d, uint iSample ) +{ + float2 delta = ( ( OffsetTC - tc ) * pos_decompression_params2.xy ); + + return gbuffer_load_data( OffsetTC, pos2d + delta, iSample ); +} + +#else // GBUFFER_OPTIMIZATION +gbuffer_data gbuffer_load_data( float2 tc, uint iSample ) +{ + gbuffer_data gbd; + +#ifndef USE_MSAA + float4 P = tex2D( s_position, tc ); +#else + float4 P = texelFetch( s_position, int2( tc * pos_decompression_params2.xy ), 0, iSample ); +#endif + + gbd.P = P.xyz; + gbd.mtl = P.w; + +#ifndef USE_MSAA + float4 N = tex2D( s_normal, tc ); +#else + float4 N = texelFetch( s_normal, int2( tc * pos_decompression_params2.xy ), 0, iSample ); +#endif + + gbd.N = N.xyz; + gbd.hemi = N.w; + +#ifndef USE_MSAA + float4 C = tex2D( s_diffuse, tc ); +#else + float4 C = texelFetch( s_diffuse, int2( tc * pos_decompression_params2.xy ), 0, iSample ); +#endif + + + gbd.C = C.xyz; + gbd.gloss = C.w; + + return gbd; +} + +gbuffer_data gbuffer_load_data( float2 tc ) +{ + return gbuffer_load_data( tc, 0 ); +} + +gbuffer_data gbuffer_load_data_offset( float2 tc, float2 OffsetTC, uint iSample ) +{ + return gbuffer_load_data( OffsetTC, iSample ); +} + +#endif // GBUFFER_OPTIMIZATION + +////////////////////////////////////////////////////////////////////////// +// Aplha to coverage code +#if ( defined( MSAA_ALPHATEST_DX10_1_ATOC ) || defined( MSAA_ALPHATEST_DX10_1 ) ) + +#if MSAA_SAMPLES == 2 +uint alpha_to_coverage ( float alpha, float2 pos2d ) +{ + uint mask; + uint pos = uint(pos2d.x) | uint( pos2d.y); + if( alpha < 0.3333 ) + mask = 0; + else if( alpha < 0.6666 ) + mask = 1 << ( pos & 1 ); + else + mask = 3; + + return mask; +} +#endif + +#if MSAA_SAMPLES == 4 +uint alpha_to_coverage ( float alpha, float2 pos2d ) +{ + uint mask; + + float off = float( ( uint(pos2d.x) | uint( pos2d.y) ) & 3 ); + alpha = saturate( alpha - off * ( ( 0.2 / 4.0 ) / 3.0 ) ); + if( alpha < 0.40 ) + { + if( alpha < 0.20 ) + mask = 0; + else if( alpha < 0.40 ) // only one bit set + mask = 1; + } + else + { + if( alpha < 0.60 ) // 2 bits set => 1100 0110 0011 1001 1010 0101 + { + mask = 3; + } + else if( alpha < 0.8 ) // 3 bits set => 1110 0111 1011 1101 + mask = 7; + else + mask = 0xf; + } + + return mask; +} +#endif + +#if MSAA_SAMPLES == 8 +uint alpha_to_coverage ( float alpha, float2 pos2d ) +{ + uint mask; + + float off = float( ( uint(pos2d.x) | uint( pos2d.y) ) & 3 ); + alpha = saturate( alpha - off * ( ( 0.1111 / 8.0 ) / 3.0 ) ); + if( alpha < 0.4444 ) + { + if( alpha < 0.2222 ) + { + if( alpha < 0.1111 ) + mask = 0; + else // only one bit set 0.2222 + mask = 1; + } + else + { + if( alpha < 0.3333 ) // 2 bits set0=> 10000001 + 11000000 .. 00000011 : 8 // 0.2222 + // set1=> 10100000 .. 00000101 + 10000010 + 01000001 : 8 + // set2=> 10010000 .. 00001001 + 10000100 + 01000010 + 00100001 : 8 + // set3=> 10001000 .. 00010001 + 10001000 + 01000100 + 00100010 + 00010001 : 8 + { + mask = 3; + } + else // 3 bits set0 => 11100000 .. 00000111 + 10000011 + 11000001 : 8 ? 0.4444 // 0.3333 + // set1 => 10110000 .. 00001011 + 10000101 + 11000010 + 01100001: 8 + // set2 => 11010000 .. 00001101 + 10000110 + 01000011 + 10100001: 8 + // set3 => 10011000 .. 00010011 + 10001001 + 11000100 + 01100010 + 00110001 : 8 + // set4 => 11001000 .. 00011001 + 10001100 + 01000110 + 00100011 + 10010001 : 8 + { + mask = 0x7; + } + } + } + else + { + if( alpha < 0.6666 ) + { + if( alpha < 0.5555 ) // 4 bits set0 => 11110000 .. 00001111 + 10000111 + 11000011 + 11100001 : 8 // 0.5555 + // set1 => 11011000 .. 00011011 + 10001101 + 11000110 + 01100011 + 10110001 : 8 + // set2 => 11001100 .. 00110011 + 10011001 : 4 make 8 + // set3 => 11000110 + 01100011 + 10110001 + 11011000 + 01101100 + 00110110 + 00011011 + 10001101 : 8 + // set4 => 10111000 .. 00010111 + 10001011 + 11000101 + 11100010 + 01110001 : 8 + // set5 => 10011100 .. 00100111 + 10010011 + 11001001 + 11100100 + 01110010 + 00111001 : 8 + // set6 => 10101010 .. 01010101 : 2 make 8 + // set7 => 10110100 + 01011010 + 00101101 + 10010110 + 01001011 + 10100101 + 11010010 + 01101001 : 8 + // set8 => 10011010 + 01001101 + 10100110 + 01010011 + 10101001 + 11010100 + 01101010 + 00110101 : 8 + { + mask = 0xf; + } + else // 5 bits set0 => 11111000 01111100 00111110 00011111 10001111 11000111 11100011 11110001 : 8 // 0.6666 + // set1 => 10111100 : 8 + // set2 => 10011110 : 8 + // set3 => 11011100 : 8 + // set4 => 11001110 : 8 + // set5 => 11011010 : 8 + // set6 => 10110110 : 8 + { + mask = 0x1F; + } + } + else + { + if( alpha < 0.7777 ) // 6 bits set0 => 11111100 01111110 00111111 10011111 11001111 11100111 11110011 11111001 : 8 + // set1 => 10111110 : 8 + // set2 => 11011110 : 8 + { + mask = 0x3F; + } + else if( alpha < 0.8888 ) // 7 bits set0 => 11111110 :8 + { + mask = 0x7F; + } + else // all 8 bits set + mask = 0xFF; + } + } + + return mask; +} +#endif +#endif + + + +#endif // common_functions_h_included diff --git a/res/gamedata/shaders/gl/common_policies.h b/res/gamedata/shaders/gl/common_policies.h new file mode 100644 index 00000000000..bd77da3d37c --- /dev/null +++ b/res/gamedata/shaders/gl/common_policies.h @@ -0,0 +1,39 @@ +#ifndef common_policies_h_included +#define common_policies_h_included + +// Define default sample index for MSAA +#ifndef ISAMPLE +#define ISAMPLE 0 +#endif // ISAMPLE + +// redefine sample index +#ifdef MSAA_OPTIMIZATION +#undef ISAMPLE +#define ISAMPLE iSample +#endif // MSAA_OPTIMIZATION + +///////////////////////////////////////////////////////////////////////////// +// GLD_P - gbuffer_load_data +#ifdef GBUFFER_OPTIMIZATION + #define GLD_P( _tc, _pos2d, _iSample ) _tc, _pos2d, _iSample +#else // GBUFFER_OPTIMIZATION + #define GLD_P( _tc, _pos2d, _iSample ) _tc, _iSample +#endif // GBUFFER_OPTIMIZATION + +///////////////////////////////////////////////////////////////////////////// +// CS_P +#ifdef USE_MSAA +# ifdef GBUFFER_OPTIMIZATION +# define CS_P( _P, _N, _tc0, _tcJ, _pos2d, _iSample ) _P, _N, _tc0, _tcJ, _pos2d, _iSample +# else // GBUFFER_OPTIMIZATION +# define CS_P( _P, _N, _tc0, _tcJ, _pos2d, _iSample ) _P, _N, _tc0, _tcJ, _iSample +# endif // GBUFFER_OPTIMIZATION +#else +# ifdef GBUFFER_OPTIMIZATION +# define CS_P( _P, _N, _tc0, _tcJ, _pos2d, _iSample ) _P, _N, _tc0, _tcJ, _pos2d +# else // GBUFFER_OPTIMIZATION +# define CS_P( _P, _N, _tc0, _tcJ, _pos2d, _iSample ) _P, _N, _tc0, _tcJ +# endif +#endif + +#endif // common_policies_h_included \ No newline at end of file diff --git a/res/gamedata/shaders/gl/common_samplers.h b/res/gamedata/shaders/gl/common_samplers.h index 79038f75b62..8d470e6dea7 100644 --- a/res/gamedata/shaders/gl/common_samplers.h +++ b/res/gamedata/shaders/gl/common_samplers.h @@ -5,6 +5,7 @@ #define Texture3D uniform sampler3D #define Texture2DMS uniform sampler2DMS #define TextureCube uniform samplerCube​ +#define Texture2DShadow uniform sampler2DShadow ////////////////////////////////////////////////////////////////////////////////////////// // Geometry phase / deferring // diff --git a/res/gamedata/shaders/gl/gather.ps b/res/gamedata/shaders/gl/gather.ps new file mode 100644 index 00000000000..e808ee4d620 Binary files /dev/null and b/res/gamedata/shaders/gl/gather.ps differ diff --git a/res/gamedata/shaders/gl/iostructs/p_TL_sun.h b/res/gamedata/shaders/gl/iostructs/p_TL_sun.h new file mode 100644 index 00000000000..181a934c079 --- /dev/null +++ b/res/gamedata/shaders/gl/iostructs/p_TL_sun.h @@ -0,0 +1,47 @@ + +out vec4 SV_Target; +#ifdef MSAA_OPTIMIZATION +in int gl_SampleID; +#endif +#ifdef GBUFFER_OPTIMIZATION +in vec4 gl_FragCoord; +#endif + +layout(location = TEXCOORD0) in float2 p_TL_Tex0 ; // TEXCOORD0; +layout(location = COLOR) in float4 p_TL_Color ; // COLOR; + +#ifdef MSAA_OPTIMIZATION +#ifdef GBUFFER_OPTIMIZATION +float4 _main ( p_TL I, float4 pos2d, uint iSample ); +#else +float4 _main ( p_TL I, uint iSample ); +#endif +#else +#ifdef GBUFFER_OPTIMIZATION +float4 _main ( p_TL I, float4 pos2d ); +#else +float4 _main ( p_TL I ); +#endif +#endif + +void main() +{ + p_TL I; + I.Tex0 = p_TL_Tex0; + I.Color = p_TL_Color; + + SV_Target = _main (I); +#ifdef MSAA_OPTIMIZATION +#ifdef GBUFFER_OPTIMIZATION + SV_Target = _main ( I, gl_FragCoord, gl_SampleID ); +#else + SV_Target = _main ( I, gl_SampleID ); +#endif +#else +#ifdef GBUFFER_OPTIMIZATION + SV_Target = _main ( I, gl_FragCoord ); +#else + SV_Target = _main ( I ); +#endif +#endif +} diff --git a/res/gamedata/shaders/gl/iostructs/p_aa_AA_sun.h b/res/gamedata/shaders/gl/iostructs/p_aa_AA_sun.h new file mode 100644 index 00000000000..b03dd7f46f5 --- /dev/null +++ b/res/gamedata/shaders/gl/iostructs/p_aa_AA_sun.h @@ -0,0 +1,54 @@ + +out vec4 SV_Target; +#ifdef GBUFFER_OPTIMIZATION +in vec4 gl_FragCoord; +#endif +#ifdef MSAA_OPTIMIZATION +in int gl_SampleID; +#endif + +layout(location = TEXCOORD0) in float2 p_aa_AA_sun_tc ; // TEXCOORD0; +layout(location = TEXCOORD1) in float2 p_aa_AA_sun_unused ; // TEXCOORD1; +layout(location = TEXCOORD2) in float2 p_aa_AA_sun_LT ; // TEXCOORD2; +layout(location = TEXCOORD3) in float2 p_aa_AA_sun_RT ; // TEXCOORD3; +layout(location = TEXCOORD4) in float2 p_aa_AA_sun_LB ; // TEXCOORD4; +layout(location = TEXCOORD5) in float2 p_aa_AA_sun_RB ; // TEXCOORD5; + +#ifdef MSAA_OPTIMIZATION +#ifdef GBUFFER_OPTIMIZATION +float4 _main ( p_aa_AA_sun I, float4 pos2d, uint iSample ); +#else +float4 _main ( p_aa_AA_sun I, uint iSample ); +#endif +#else +#ifdef GBUFFER_OPTIMIZATION +float4 _main ( p_aa_AA_sun I, float4 pos2d ); +#else +float4 _main ( p_aa_AA_sun I ); +#endif +#endif + +void main() +{ + p_aa_AA_sun I; + I.tc = p_aa_AA_sun_tc; + I.unused = p_aa_AA_sun_unused; + I.LT = p_aa_AA_sun_LT; + I.RT = p_aa_AA_sun_RT; + I.LB = p_aa_AA_sun_LB; + I.RB = p_aa_AA_sun_RB; + +#ifdef MSAA_OPTIMIZATION +#ifdef GBUFFER_OPTIMIZATION + SV_Target = _main ( I, gl_FragCoord, gl_SampleID ); +#else + SV_Target = _main ( I, gl_SampleID ); +#endif +#else +#ifdef GBUFFER_OPTIMIZATION + SV_Target = _main ( I, gl_FragCoord ); +#else + SV_Target = _main ( I ); +#endif +#endif +} diff --git a/res/gamedata/shaders/gl/iostructs/p_volume.h b/res/gamedata/shaders/gl/iostructs/p_volume.h new file mode 100644 index 00000000000..8859f49bd8f --- /dev/null +++ b/res/gamedata/shaders/gl/iostructs/p_volume.h @@ -0,0 +1,32 @@ + +out vec4 SV_Target; +in vec4 gl_FragCoord; +#ifdef MSAA_OPTIMIZATION +in int gl_SampleID; +#endif + +layout(location = TEXCOORD0) in float4 v2p_volume_tc ; // TEXCOORD0; +#ifdef USE_SJITTER +layout(location = TEXCOORD1) in float4 v2p_volume_tcJ ; // TEXCOORD1; +#endif + +#ifdef MSAA_OPTIMIZATION +float4 _main ( v2p_volume I, uint iSample ); +#else +float4 _main ( v2p_volume I ); +#endif + +void main() +{ + v2p_volume I; + I.tc = v2p_volume_tc; +#ifdef USE_SJITTER + I.tcJ = v2p_volume_tcJ; +#endif + +#ifdef MSAA_OPTIMIZATION + SV_Target = _main ( I, gl_SampleID ); +#else + SV_Target = _main ( I ); +#endif +} diff --git a/res/gamedata/shaders/gl/iostructs/v_volume.h b/res/gamedata/shaders/gl/iostructs/v_volume.h new file mode 100644 index 00000000000..a7cc447e2fd --- /dev/null +++ b/res/gamedata/shaders/gl/iostructs/v_volume.h @@ -0,0 +1,20 @@ + +out gl_PerVertex { vec4 gl_Position; }; + +layout(location = POSITION) in float4 v_volume_P; + +layout(location = TEXCOORD0) out float4 v2p_volume_tc ; // TEXCOORD0; +#ifdef USE_SJITTER +layout(location = TEXCOORD1) out float4 v2p_volume_tcJ ; // TEXCOORD1; +#endif + +v2p_volume _main ( float4 P ); + +void main() +{ + v2p_volume O = _main ( v_volume_P ); + v2p_volume_tc = O.tc; +#ifdef USE_SJITTER + v2p_volume_tcJ = O.tcJ; +#endif +} diff --git a/res/gamedata/shaders/gl/lmodel.h b/res/gamedata/shaders/gl/lmodel.h new file mode 100644 index 00000000000..645121ace35 --- /dev/null +++ b/res/gamedata/shaders/gl/lmodel.h @@ -0,0 +1,64 @@ +#ifndef LMODEL_H +#define LMODEL_H + +#include "common.h" + +////////////////////////////////////////////////////////////////////////////////////////// +// Lighting formulas // +float4 plight_infinity( float m, float3 pnt, float3 normal, float3 light_direction ) +{ + float3 N = normal; // normal + float3 V = -normalize (pnt); // vector2eye + float3 L = -light_direction; // vector2light + float3 H = normalize (L+V); // float-angle-vector + return tex3D (s_material, float3( dot(L,N), dot(H,N), m ) ); // sample material +} +/* +float plight_infinity2( float m, float3 pnt, float3 normal, float3 light_direction ) +{ + float3 N = normal; // normal + float3 V = -normalize (pnt); // vector2eye + float3 L = -light_direction; // vector2light + float3 H = normalize (L+V); // float-angle-vector + float3 R = reflect (-V,N); + float s = saturate(dot(L,R)); + s = saturate(dot(H,N)); + float f = saturate(dot(-V,R)); + s *= f; + float4 r = tex3D (s_material, float3( dot(L,N), s, m ) ); // sample material + r.w = pow(saturate(s),4); + return r ; +} +*/ + +float4 plight_local( float m, float3 pnt, float3 normal, float3 light_position, float light_range_rsq, out float rsqr ) +{ + float3 N = normal; // normal + float3 L2P = pnt-light_position; // light2point + float3 V = -normalize (pnt); // vector2eye + float3 L = -normalize (float3(L2P)); // vector2light + float3 H = normalize (L+V); // float-angle-vector + rsqr = dot (L2P,L2P); // distance 2 light (squared) + float att = saturate (1 - rsqr*light_range_rsq); // q-linear attenuate + float4 light = tex3D (s_material, float3( dot(L,N), dot(H,N), m ) ); // sample material + return att*light; +} + +// TODO: DX10: Remove path without blending +float4 blendp( float4 value, float4 tcp) +{ +// #ifndef FP16_BLEND +// value += (float4)tex2Dproj (s_accumulator, tcp); // emulate blend +// #endif + return value; +} + +float4 blend( float4 value, float2 tc) +{ +// #ifndef FP16_BLEND +// value += (float4)tex2D (s_accumulator, tc); // emulate blend +// #endif + return value; +} + +#endif \ No newline at end of file diff --git a/res/gamedata/shaders/gl/shadow.h b/res/gamedata/shaders/gl/shadow.h new file mode 100644 index 00000000000..26e7d8d4d33 --- /dev/null +++ b/res/gamedata/shaders/gl/shadow.h @@ -0,0 +1,836 @@ +#ifndef SHADOW_H +#define SHADOW_H + +#include "common.h" + +//uniform sampler s_smap : register(ps,s0); // 2D/cube shadowmap +//Texture2D s_smap; // 2D/cube shadowmap +// Used for RGBA texture too ?! +Texture2D s_dmap; // 2D/cube depthmap +Texture2DShadow s_smap; // 2D/cube shadowmap + +Texture2D s_smap_minmax; // 2D/cube shadowmap +#include "gather.ps" + +//SamplerComparisonState smp_smap; // Special comare sampler +//sampler smp_jitter; + +Texture2D jitter0; +Texture2D jitter1; +//uniform sampler2D jitter2; +//uniform sampler2D jitter3; +//uniform float4 jitterS; + +Texture2D jitterMipped; + +#ifndef USE_ULTRA_SHADOWS +#define KERNEL 0.6f +#else +#define KERNEL 1.0f +#endif + +float modify_light( float light ) +{ + return ( light > 0.7 ? 1.0 : lerp( 0.0, 1.0, saturate( light / 0.7 ) ) ); +} + +////////////////////////////////////////////////////////////////////////////////////////// +// hardware + PCF +////////////////////////////////////////////////////////////////////////////////////////// +float sample_hw_pcf (float4 tc,float4 shift) +{ + const float ts = KERNEL / float(SMAP_size); + return tex2Dproj( s_smap, tc + tc.w * shift * ts ); +} + +#define GS2 3 + +float shadow_hw( float4 tc ) +{ + float s0 = sample_hw_pcf( tc, float4( -1, -1, 0, 0) ); + float s1 = sample_hw_pcf( tc, float4( +1, -1, 0, 0) ); + float s2 = sample_hw_pcf( tc, float4( -1, +1, 0, 0) ); + float s3 = sample_hw_pcf( tc, float4( +1, +1, 0, 0) ); + + return (s0+s1+s2+s3)/4.f; +} + +#if SUN_QUALITY>=4 +#define FILTER_SIZE 11 +#define FS FILTER_SIZE +#define FS2 ( FILTER_SIZE / 2 ) + +const float W2[11][11] = + { { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 }, + { 0.0,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.0 }, + { 0.0,0.2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.2,0.0 }, + { 0.0,0.2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.2,0.0 }, + { 0.0,0.2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.2,0.0 }, + { 0.0,0.2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.2,0.0 }, + { 0.0,0.2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.2,0.0 }, + { 0.0,0.2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.2,0.0 }, + { 0.0,0.2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.2,0.0 }, + { 0.0,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.0 }, + { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 }, + }; + +const float W1[11][11] = + { { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 }, + { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 }, + { 0.0,0.0,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.0,0.0 }, + { 0.0,0.0,0.2,1.0,1.0,1.0,1.0,1.0,0.2,0.0,0.0 }, + { 0.0,0.0,0.2,1.0,1.0,1.0,1.0,1.0,0.2,0.0,0.0 }, + { 0.0,0.0,0.2,1.0,1.0,1.0,1.0,1.0,0.2,0.0,0.0 }, + { 0.0,0.0,0.2,1.0,1.0,1.0,1.0,1.0,0.2,0.0,0.0 }, + { 0.0,0.0,0.2,1.0,1.0,1.0,1.0,1.0,0.2,0.0,0.0 }, + { 0.0,0.0,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.0,0.0 }, + { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 }, + { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 }, + }; + +const float W0[11][11] = + { { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 }, + { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 }, + { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 }, + { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 }, + { 0.0,0.0,0.0,0.0,0.1,0.1,0.1,0.0,0.0,0.0,0.0 }, + { 0.0,0.0,0.0,0.0,0.1,1.0,0.1,0.0,0.0,0.0,0.0 }, + { 0.0,0.0,0.0,0.0,0.1,0.1,0.1,0.0,0.0,0.0,0.0 }, + { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 }, + { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 }, + { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 }, + { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 }, + }; + +float Fw( int r, int c, float fL ) +{ + return (1.0-fL) * (1.0-fL) * (1.0-fL) * W0[r][c] + + 3.0f * (1.0-fL) * (1.0-fL) * fL * W1[r][c] + + 3.0f * fL * fL * (1.0-fL) * W2[r][c] + + fL * fL * fL * 1.0f; +} + +#define BLOCKER_FILTER_SIZE 11 +#define BFS BLOCKER_FILTER_SIZE +#define BFS2 ( BLOCKER_FILTER_SIZE / 2 ) + +#define SUN_WIDTH 300.0f + +// uses gather for DX11/10.1 and visibilty encoding for DX10.0 +float shadow_extreme_quality( float3 tc ) +{ + float s = 0.0f; + float2 stc = ( SMAP_size * tc.xy ) + float2( 0.5, 0.5 ); + float2 tcs = floor( stc ); + float2 fc; + int row; + int col; + float w = 0.0; + float avgBlockerDepth = 0; + float blockerCount = 0; + float fRatio; + float4 v1[ FS2 + 1 ]; + float2 v0[ FS2 + 1 ]; + float2 off; + + fc = stc - tcs; + tc.xy = tc.xy - ( (1.0f/SMAP_size) * fc ); + tc.z -= 0.0001f; + +#if defined(SM_4_1) || defined( SM_5) + // find number of blockers and sum up blocker depth + for( row = -BFS2; row <= BFS2; row += 2 ) + { + for( col = -BFS2; col <= BFS2; col += 2 ) + { + float4 d4 = textureGatherOffset( s_smap, tc.xy, int2( col, row ) ); + float4 b4 = ( tc.zzzz <= d4 ) ? (0.0f).xxxx : (1.0f).xxxx; + + blockerCount += dot( b4, (1.0f).xxxx ); + avgBlockerDepth += dot( d4, b4 ); + } + } +#else // SM_4_0 + uint vmask[ FS + 1 ]; + + for( col = 0; col <= FS; ++col ) + vmask[ col ] = uint(0); + + for( row = -FS2; row <= FS2; row +=2 ) + { + for( int col = -FS2; col <= FS2; col +=2 ) + { + float4 d4; + float b; + + d4.w = textureLodOffset (s_dmap, tc.xy, 0, int2( col, row ) ).x; + b = ( tc.z <= d4.w ) ? (0.0f) : (1.0f); + vmask[ col + FS2 + 0 ] += ( ( tc.z <= d4.w ) ? ( uint(1) << uint( row + FS2 + 0 ) ) : uint(0) ); + blockerCount += b; + avgBlockerDepth += d4.w * b; + + d4.z = textureLodOffset (s_dmap, tc.xy, 0, int2( col+1, row ) ).x; + b = ( tc.z <= d4.z ) ? (0.0f) : (1.0f); + vmask[ col + FS2 + 1 ] += ( ( tc.z <= d4.z ) ? ( uint(1) << uint( row + FS2 + 0 ) ) : uint(0) ); + blockerCount += b; + avgBlockerDepth += d4.z * b; + + d4.x = textureLodOffset (s_dmap, tc.xy, 0, int2( col, row+1 ) ).x; + vmask[ col + FS2 + 0 ] += ( ( tc.z <= d4.x ) ? ( uint(1) << uint( row + FS2 + 1 ) ) : uint(0) ); + b = ( tc.z <= d4.x ) ? (0.0f) : (1.0f); + blockerCount += b; + avgBlockerDepth += d4.x * b; + + d4.y = textureLodOffset (s_dmap, tc.xy, 0, int2( col+1, row+1 ) ).x; + vmask[ col + FS2 + 1 ] += ( ( tc.z <= d4.y ) ? ( uint(1) << uint( row + FS2 + 1 ) ) : uint(0) ); + b = ( tc.z <= d4.y ) ? (0.0f) : (1.0f); + blockerCount += b; + avgBlockerDepth += d4.y * b; + } + } +#endif + + // compute ratio average blocker depth vs. pixel depth + if( blockerCount > 0.0 ) + { + avgBlockerDepth /= blockerCount; + fRatio = saturate( ( ( tc.z - avgBlockerDepth ) * SUN_WIDTH ) / avgBlockerDepth ); + fRatio *= fRatio; + } + else + { + fRatio = 0.0; + } + + for( row = 0; row < FS; ++row ) + { + for( col = 0; col < FS; ++col ) + w += Fw(row,col,fRatio); + } + + // filter shadow map samples using the dynamic weights + for( row = -FS2; row <= FS2; row += 2 ) + { + for( int col = -FS2; col <= FS2; col += 2 ) + { +#if ( defined(SM_5) ) || ( defined(SM_4_1) ) + v1[(col+FS2)/2] = textureGatherOffset (s_smap, tc.xy, tc.z, + int2( col, row ) ); +#else + v1[(col+FS2)/2].w = ( ( vmask[ col + FS2 + 0 ] & ( uint(1) << uint( row + FS2 + 0 ) ) ) ? 1.0f : 0.0f ); + v1[(col+FS2)/2].z = ( ( vmask[ col + FS2 + 1 ] & ( uint(1) << uint( row + FS2 + 0 ) ) ) ? 1.0f : 0.0f ); + v1[(col+FS2)/2].x = ( ( vmask[ col + FS2 + 0 ] & ( uint(1) << uint( row + FS2 + 1 ) ) ) ? 1.0f : 0.0f ); + v1[(col+FS2)/2].y = ( ( vmask[ col + FS2 + 1 ] & ( uint(1) << uint( row + FS2 + 1 ) ) ) ? 1.0f : 0.0f ); +#endif + if( col == -FS2 ) + { + s += ( 1 - fc.y ) * ( v1[0].w * ( Fw(row+FS2,0,fRatio) - Fw(row+FS2,0,fRatio) * fc.x ) + v1[0].z * ( fc.x * ( Fw(row+FS2,0,fRatio) - Fw(row+FS2,1,fRatio) ) + Fw(row+FS2,1,fRatio) ) ); + s += ( fc.y ) * ( v1[0].x * ( Fw(row+FS2,0,fRatio) - Fw(row+FS2,0,fRatio) * fc.x ) + v1[0].y * ( fc.x * ( Fw(row+FS2,0,fRatio) - Fw(row+FS2,1,fRatio) ) + Fw(row+FS2,1,fRatio) ) ); + if( row > -FS2 ) + { + s += ( 1 - fc.y ) * ( v0[0].x * ( Fw(row+FS2-1,0,fRatio) - Fw(row+FS2-1,0,fRatio) * fc.x ) + v0[0].y * ( fc.x * ( Fw(row+FS2-1,0,fRatio) - Fw(row+FS2-1,1,fRatio) ) + Fw(row+FS2-1,1,fRatio) ) ); + s += ( fc.y ) * ( v1[0].w * ( Fw(row+FS2-1,0,fRatio) - Fw(row+FS2-1,0,fRatio) * fc.x ) + v1[0].z * ( fc.x * ( Fw(row+FS2-1,0,fRatio) - Fw(row+FS2-1,1,fRatio) ) + Fw(row+FS2-1,1,fRatio) ) ); + } + } + else if( col == FS2 ) + { + s += ( 1 - fc.y ) * ( v1[FS2].w * ( fc.x * ( Fw(row+FS2,FS-2,fRatio) - Fw(row+FS2,FS-1,fRatio) ) + Fw(row+FS2,FS-1,fRatio) ) + v1[FS2].z * fc.x * Fw(row+FS2,FS-1,fRatio) ); + s += ( fc.y ) * ( v1[FS2].x * ( fc.x * ( Fw(row+FS2,FS-2,fRatio) - Fw(row+FS2,FS-1,fRatio) ) + Fw(row+FS2,FS-1,fRatio) ) + v1[FS2].y * fc.x * Fw(row+FS2,FS-1,fRatio) ); + if( row > -FS2 ) + { + s += ( 1 - fc.y ) * ( v0[FS2].x * ( fc.x * ( Fw(row+FS2-1,FS-2,fRatio) - Fw(row+FS2-1,FS-1,fRatio) ) + Fw(row+FS2-1,FS-1,fRatio) ) + v0[FS2].y * fc.x * Fw(row+FS2-1,FS-1,fRatio) ); + s += ( fc.y ) * ( v1[FS2].w * ( fc.x * ( Fw(row+FS2-1,FS-2,fRatio) - Fw(row+FS2-1,FS-1,fRatio) ) + Fw(row+FS2-1,FS-1,fRatio) ) + v1[FS2].z * fc.x * Fw(row+FS2-1,FS-1,fRatio) ); + } + } + else + { + s += ( 1 - fc.y ) * ( v1[(col+FS2)/2].w * ( fc.x * ( Fw(row+FS2,col+FS2-1,fRatio) - Fw(row+FS2,col+FS2+0,fRatio) ) + Fw(row+FS2,col+FS2+0,fRatio) ) + + v1[(col+FS2)/2].z * ( fc.x * ( Fw(row+FS2,col+FS2-0,fRatio) - Fw(row+FS2,col+FS2+1,fRatio) ) + Fw(row+FS2,col+FS2+1,fRatio) ) ); + s += ( fc.y ) * ( v1[(col+FS2)/2].x * ( fc.x * ( Fw(row+FS2,col+FS2-1,fRatio) - Fw(row+FS2,col+FS2+0,fRatio) ) + Fw(row+FS2,col+FS2+0,fRatio) ) + + v1[(col+FS2)/2].y * ( fc.x * ( Fw(row+FS2,col+FS2-0,fRatio) - Fw(row+FS2,col+FS2+1,fRatio) ) + Fw(row+FS2,col+FS2+1,fRatio) ) ); + if( row > -FS2 ) + { + s += ( 1 - fc.y ) * ( v0[(col+FS2)/2].x * ( fc.x * ( Fw(row+FS2-1,col+FS2-1,fRatio) - Fw(row+FS2-1,col+FS2+0,fRatio) ) + Fw(row+FS2-1,col+FS2+0,fRatio) ) + + v0[(col+FS2)/2].y * ( fc.x * ( Fw(row+FS2-1,col+FS2-0,fRatio) - Fw(row+FS2-1,col+FS2+1,fRatio) ) + Fw(row+FS2-1,col+FS2+1,fRatio) ) ); + s += ( fc.y ) * ( v1[(col+FS2)/2].w * ( fc.x * ( Fw(row+FS2-1,col+FS2-1,fRatio) - Fw(row+FS2-1,col+FS2+0,fRatio) ) + Fw(row+FS2-1,col+FS2+0,fRatio) ) + + v1[(col+FS2)/2].z * ( fc.x * ( Fw(row+FS2-1,col+FS2-0,fRatio) - Fw(row+FS2-1,col+FS2+1,fRatio) ) + Fw(row+FS2-1,col+FS2+1,fRatio) ) ); + } + } + if( row != FS2 ) + v0[(col+FS2)/2] = v1[(col+FS2)/2].xy; + } + } + + return s/w; +} + +float4 Fw( int r, int c ) +{ + return float4( W0[r][c], W1[r][c], W2[r][c], 1.0f ); +} + +//====================================================================================== +// This shader computes the contact hardening shadow filter +//====================================================================================== +float shadow_extreme_quality_fused( float3 tc ) +{ + float4 s = (0.0f).xxxx; + float2 stc = ( SMAP_size * tc.xy ) + float2( 0.5, 0.5 ); + float2 tcs = floor( stc ); + float2 fc; + int row; + int col; + float w = 0.0; + float avgBlockerDepth = 0; + float blockerCount = 0; + float fRatio; + float4 v1[ FS2 + 1 ]; + float2 v0[ FS2 + 1 ]; + float2 off; + + fc = stc - tcs; + tc.xy = tc.xy - ( fc * (1.0f/SMAP_size) ); + + // filter shadow map samples using the dynamic weights + for( row = -FS2; row <= FS2; row += 2 ) + { + for( col = -FS2; col <= FS2; col += 2 ) + { + float4 d4; + +#ifndef PS_4 + d4 = textureGather( s_dmap, tc.xy + (1.0f/SMAP_size) * float2( col, row ) ); +#else + d4.w = textureLod( s_dmap, tc.xy + (1.0f/SMAP_size) * float2( col, row ), 0 ).x; + d4.z = textureLod( s_dmap, tc.xy + (1.0f/SMAP_size) * float2( col+1, row ) , 0 ).x; + d4.y = textureLod( s_dmap, tc.xy + (1.0f/SMAP_size) * float2( col+1, row+1 ), 0 ).x; + d4.x = textureLod( s_dmap, tc.xy + (1.0f/SMAP_size) * float2( col, row+1 ), 0 ).x; +#endif + float4 b4 = ( tc.zzzz <= d4 ) ? (0.0f).xxxx : (1.0f).xxxx; + + v1[(col+FS2)/2] = ( tc.zzzz <= d4 ) ? (1.0f).xxxx : (0.0f).xxxx; + blockerCount += dot( b4, (1.0).xxxx ); + avgBlockerDepth += dot( d4, b4 ); + + if( col == -FS2 ) + { + s += ( 1 - fc.y ) * ( v1[0].w * ( Fw(row+FS2,0) - + Fw(row+FS2,0) * fc.x ) + v1[0].z * + ( fc.x * ( Fw(row+FS2,0) - + Fw(row+FS2,1) ) + + Fw(row+FS2,1) ) ); + s += ( fc.y ) * ( v1[0].x * ( Fw(row+FS2,0) - + Fw(row+FS2,0) * fc.x ) + + v1[0].y * ( fc.x * ( Fw(row+FS2,0) - + Fw(row+FS2,1) ) + + Fw(row+FS2,1) ) ); + if( row > -FS2 ) + { + s += ( 1 - fc.y ) * ( v0[0].x * ( Fw(row+FS2-1,0) - + Fw(row+FS2-1,0) * fc.x ) + v0[0].y * + ( fc.x * ( Fw(row+FS2-1,0) - + Fw(row+FS2-1,1) ) + + Fw(row+FS2-1,1) ) ); + s += ( fc.y ) * ( v1[0].w * ( Fw(row+FS2-1,0) - + Fw(row+FS2-1,0) * fc.x ) + v1[0].z * + ( fc.x * ( Fw(row+FS2-1,0) - + Fw(row+FS2-1,1) ) + + Fw(row+FS2-1,1) ) ); + } + } + else if( col == FS2 ) + { + s += ( 1 - fc.y ) * ( v1[FS2].w * ( fc.x * ( Fw(row+FS2,FS-2) - + Fw(row+FS2,FS-1) ) + + Fw(row+FS2,FS-1) ) + v1[FS2].z * fc.x * + Fw(row+FS2,FS-1) ); + s += ( fc.y ) * ( v1[FS2].x * ( fc.x * ( Fw(row+FS2,FS-2) - + Fw(row+FS2,FS-1) ) + + Fw(row+FS2,FS-1) ) + v1[FS2].y * fc.x * + Fw(row+FS2,FS-1) ); + if( row > -FS2 ) + { + s += ( 1 - fc.y ) * ( v0[FS2].x * ( fc.x * + ( Fw(row+FS2-1,FS-2) - + Fw(row+FS2-1,FS-1) ) + + Fw(row+FS2-1,FS-1) ) + + v0[FS2].y * fc.x * Fw(row+FS2-1,FS-1) ); + s += ( fc.y ) * ( v1[FS2].w * ( fc.x * + ( Fw(row+FS2-1,FS-2) - + Fw(row+FS2-1,FS-1) ) + + Fw(row+FS2-1,FS-1) ) + + v1[FS2].z * fc.x * Fw(row+FS2-1,FS-1) ); + } + } + else + { + s += ( 1 - fc.y ) * ( v1[(col+FS2)/2].w * ( fc.x * + ( Fw(row+FS2,col+FS2-1) - + Fw(row+FS2,col+FS2+0) ) + + Fw(row+FS2,col+FS2+0) ) + + v1[(col+FS2)/2].z * ( fc.x * + ( Fw(row+FS2,col+FS2-0) - + Fw(row+FS2,col+FS2+1) ) + + Fw(row+FS2,col+FS2+1) ) ); + s += ( fc.y ) * ( v1[(col+FS2)/2].x * ( fc.x * + ( Fw(row+FS2,col+FS2-1) - + Fw(row+FS2,col+FS2+0) ) + + Fw(row+FS2,col+FS2+0) ) + + v1[(col+FS2)/2].y * ( fc.x * + ( Fw(row+FS2,col+FS2-0) - + Fw(row+FS2,col+FS2+1) ) + + Fw(row+FS2,col+FS2+1) ) ); + if( row > -FS2 ) + { + s += ( 1 - fc.y ) * ( v0[(col+FS2)/2].x * ( fc.x * + ( Fw(row+FS2-1,col+FS2-1) - + Fw(row+FS2-1,col+FS2+0) ) + + Fw(row+FS2-1,col+FS2+0) ) + + v0[(col+FS2)/2].y * ( fc.x * + ( Fw(row+FS2-1,col+FS2-0) - + Fw(row+FS2-1,col+FS2+1) ) + + Fw(row+FS2-1,col+FS2+1) ) ); + s += ( fc.y ) * ( v1[(col+FS2)/2].w * ( fc.x * + ( Fw(row+FS2-1,col+FS2-1) - + Fw(row+FS2-1,col+FS2+0) ) + + Fw(row+FS2-1,col+FS2+0) ) + + v1[(col+FS2)/2].z * ( fc.x * + ( Fw(row+FS2-1,col+FS2-0) - + Fw(row+FS2-1,col+FS2+1) ) + + Fw(row+FS2-1,col+FS2+1) ) ); + } + } + + if( row != FS2 ) + { + v0[(col+FS2)/2] = v1[(col+FS2)/2].xy; + } + } + } + + // compute ratio using formulas from PCSS + if( blockerCount > 0.0 ) + { + avgBlockerDepth /= blockerCount; + fRatio = saturate( ( ( tc.z - avgBlockerDepth ) * SUN_WIDTH ) / avgBlockerDepth ); + fRatio *= fRatio; + } + else + { + fRatio = 0.0; + } + + // sum up weights of dynamic filter matrix + for( row = 0; row < FS; ++row ) + { + for( col = 0; col < FS; ++col ) + { + w += Fw(row,col,fRatio); + } + } + + return dot(s, float4((1.0f-fRatio)*(1.0f-fRatio)*(1.0f-fRatio), + 3.0f * (1.0-fRatio)*(1.0-fRatio)*fRatio, + 3.0f * fRatio*fRatio*(1.0-fRatio), + fRatio*fRatio*fRatio ) )/w; +} +#endif + +#ifdef SM_4_1 + +float dx10_1_hw_hq_7x7( float3 tc ) +{ + float s = 0.0f; + float2 stc = ( SMAP_size * tc.xy ) + float2( 0.5, 0.5 ); + float2 tcs = floor( stc ); + float2 fc; + int row; + int col; + + fc.xy = stc - tcs; + tc.xy = tcs * ( 1.0 / SMAP_size ); + + // loop over the rows + for( row = -GS2; row <= GS2; row += 2 ) + { + [unroll]for( col = -GS2; col <= GS2; col += 2 ) + { + float4 v = ( tc.zzzz <= textureGatherOffset( s_dmap, tc.xy, int2( col, row ) ) ) ? (1.0).xxxx : (0.0).xxxx; + + if( row == -GS2 ) // top row + { + if( col == -GS2 ) // left + s += dot( float4( 1.0-fc.x, 1.0, 1.0-fc.y, (1.0-fc.x)*(1.0-fc.y) ), v ); + else if( col == GS2 ) // right + s += dot( float4( 1.0f, fc.x, fc.x*(1.0-fc.y), 1.0-fc.y ), v ); + else // center + s += dot( float4( 1.0, 1.0, 1.0-fc.y, 1.0-fc.y ), v ); + } + else if( row == GS2 ) // bottom row + { + if( col == -GS2 ) // left + s += dot( float4( (1.0-fc.x)*fc.y, fc.y, 1.0, (1.0-fc.x) ), v ); + else if( col == GS2 ) // right + s += dot( float4( fc.y, fc.x*fc.y, fc.x, 1.0 ), v ); + else // center + s += dot( float4(fc.yy,1.0,1.0), v ); + } + else // center rows + { + if( col == -GS2 ) // left + s += dot( float4( (1.0-fc.x), 1.0, 1.0, (1.0-fc.x) ), v ); + else if( col == GS2 ) // right + s += dot( float4( 1.0, fc.x, fc.x, 1.0 ), v ); + else // center + s += dot( (1.0).xxxx, v ); + } + } + } + + return s*(1.0/49.0); +} + +#endif + +float dx10_0_hw_hq_7x7( float4 tc ) +{ + tc.xyz /= tc.w; + + float s = 0.0; + float2 stc = ( SMAP_size * tc.xy ) + float2( 0.5, 0.5 ); + float2 tcs = floor( stc ); + float2 fc; + + fc = stc - tcs; + tc.xy = tc.xy - ( fc * ( 1.0/SMAP_size ) ); + + float2 pwAB = ( float2( 2.0 ) - fc ); + float2 tcAB = float2( 1.0/SMAP_size ) / pwAB; + float2 tcM = float2(0.5/SMAP_size ); + float2 pwGH = ( float2( 1.0 ) + fc ); + float2 tcGH = (1.0/SMAP_size) * ( fc / pwGH ); + + for( int row = -GS2; row <= GS2; row += 2 ) + { + for( int col = -GS2; col <= GS2; col += 2 ) + { + if( row == -GS2 ) // top row + { + if( col == -GS2 ) // left + s += ( pwAB.x * pwAB.y ) * textureOffset( s_smap, float3(tc.xy + tcAB, tc.z), int2( col, row ) ); + else if( col == GS2 ) // right + s += ( pwGH.x * pwAB.y ) * textureOffset( s_smap, float3(tc.xy + float2( tcGH.x, tcAB.y), tc.z), int2( col, row ) ); + else // center + s += ( 2.0 * pwAB.y ) * textureOffset( s_smap, float3(tc.xy + float2( tcM.x, tcAB.y), tc.z), int2( col, row ) ); + } + else if( row == GS2 ) // bottom row + { + if( col == -GS2 ) // left + s += ( pwAB.x * pwGH.y ) * textureOffset( s_smap, float3(tc.xy + float2( tcAB.x, tcGH.y ), tc.z), int2( col, row ) ); + else if( col == GS2 ) // right + s += ( pwGH.x * pwGH.y ) * textureOffset( s_smap, float3(tc.xy + tcGH, tc.z), int2( col, row ) ); + else // center + s += ( 2.0 * pwGH.y ) * textureOffset( s_smap, float3(tc.xy + float2( tcM.x, tcGH.y ), tc.z), int2( col, row ) ); + } + else // center rows + { + if( col == -GS2 ) // left + s += ( pwAB.x * 2.0 ) * textureOffset( s_smap, float3(tc.xy + float2( tcAB.x, tcM.y ), tc.z), int2( col, row ) ); + else if( col == GS2 ) // right + s += ( pwGH.x * 2.0 ) * textureOffset( s_smap, float3(tc.xy + float2( tcGH.x, tcM.y), tc.z), int2( col, row ) ); + else // center + s += ( 2.0 * 2.0 ) * textureOffset( s_smap, float3(tc.xy + tcM, tc.z), int2( col, row ) ); + } + } + } + + return s/49.0; +} + +#ifdef SM_MINMAX +bool cheap_reject( float3 tc, inout bool full_light ) +{ + float4 plane0 = sm_minmax_gather( tc.xy, int2( -1,-1 ) ); + float4 plane1 = sm_minmax_gather( tc.xy, int2( 1,-1 ) ); + float4 plane2 = sm_minmax_gather( tc.xy, int2( -1, 1 ) ); + float4 plane3 = sm_minmax_gather( tc.xy, int2( 1, 1 ) ); + bool plane = all( greaterThanEqual( plane0, float4(0) ) && greaterThanEqual( plane1, float4(0) ) && greaterThanEqual( plane2, float4(0) ) && greaterThanEqual( plane3, float4(0) ) ); + + if( !plane ) // if there are no proper plane equations in the support region + { + bool no_plane = all( lessThan( plane0, float4(0) ) && lessThan( plane1, float4(0) ) && lessThan( plane2, float4(0) ) && lessThan( plane3, float4(0) ) ); + float4 z = float4( tc.z - 0.0005 ); + bool reject = all( greaterThan( z, -plane0 ) && greaterThan( z, -plane1 ) && greaterThan( z, -plane2 ) && greaterThan( z, -plane3 ) ); + if( no_plane && reject ) + { + full_light = false; + return true; + } + else + { + return false; + } + } + else // plane equation detected + { + // compute corrected z for texel pos + const float scale = float( SMAP_size / 4 ); + float2 fc = frac( tc.xy * scale ); + float z = lerp( lerp( plane0.y, plane1.x, fc.x ), lerp( plane2.z, plane3.w, fc.x ), fc.y ); + + // do minmax test with new z + full_light = ( ( tc.z - 0.0001 ) <= z ); + + return true; + } +} + +#endif // SM_MINMAX + +float shadow_hw_hq( float4 tc ) +{ +#ifdef SM_MINMAX + bool full_light = false; + bool cheap_path = cheap_reject( tc.xyz / tc.w, full_light ); + + if( cheap_path ) + { + if( full_light == true ) + return 1.0; + else + return sample_hw_pcf( tc, float4(0) ); + } + else + { +#if SUN_QUALITY>=4 // extreme quality + return shadow_extreme_quality( tc.xyz / tc.w ); +#else // SUN_QUALITY<4 +#ifdef SM_4_1 + return dx10_1_hw_hq_7x7( tc.xyz / tc.w ); +#else // SM_4_1 + return dx10_0_hw_hq_7x7( tc ); +#endif // SM_4_1 +#endif //SUN_QUALITY==4 + } +#else // SM_MINMAX +#if SUN_QUALITY>=4 // extreme quality + return shadow_extreme_quality( tc.xyz / tc.w ); +#else // SUN_QUALITY<4 +#ifdef SM_4_1 + return dx10_1_hw_hq_7x7( tc.xyz / tc.w ); +#else // SM_4_1 + return dx10_0_hw_hq_7x7( tc ); +#endif // SM_4_1 +#endif //SUN_QUALITY==4 +#endif // SM_MINMAX +} + +////////////////////////////////////////////////////////////////////////////////////////// +// D24X8+PCF +////////////////////////////////////////////////////////////////////////////////////////// + +float4 test (float4 tc, float2 offset) +{ + + float4 tcx = float4 (tc.xy + tc.w*offset, tc.zw); + return float4(tex2Dproj (s_smap,tcx)); +} + +/*half shadowtest_sun (float4 tc, float4 tcJ) // jittered sampling +{ + half4 r; + + const float scale = (0.5f/float(SMAP_size)); + + float texsize = 2*SMAP_size; + float2 tc_J = tc.xy/tc.w*texsize/8.0f; + float2 fr = frac(tc_J)*.5f; + +// half4 J0 = tex2D (jitter0,fr)*scale; +// half4 J1 = tex2D (jitter1,fr)*scale*2; + float4 J0 = jitter0.Sample( smp_jitter, fr )*scale; +// float4 J1 = jitter1.Sample( smp_jitter, fr )*scale; + + float k = 0.99f/float(SMAP_size); + r.x = test (tc,J0.xy+float2(-k,-k)).x; + r.y = test (tc,J0.wz+float2( k,-k)).y; + + r.z = test (tc,J0.xy+float2(-k, k)).z; + r.w = test (tc,J0.wz+float2( k, k)).x; + + half4 f; + float k1 = 1.5f/float(SMAP_size); + f.x = test (tc,-J0.xy+float2(-k1,0)).x; + f.y = test (tc,-J0.wz+float2( 0,-k1)).y; + + f.z = test (tc,-J0.xy+float2( k1, 0)).z; + f.w = test (tc,-J0.wz+float2( 0, k1)).x; + + half res = ( r.x + r.y + r.z + r.w + f.x + f.y + f.z + f.w )*1.f/(4.f + 4.f ); + return res; +}*/ +half shadowtest_sun (float4 tc, float4 tcJ) // jittered sampling +{ + half4 r; + + // const float scale = (2.0f/float(SMAP_size)); + const float scale = (0.7f/float(SMAP_size)); + + + float2 tc_J = frac(tc.xy/tc.w*SMAP_size/4.0f )*.5f; + float4 J0 = tex2D (jitter0,tc_J)*scale; + //half4 J1 = tex2D (jitter1,tc_J)*scale; + + const float k = .5f/float(SMAP_size); + r.x = test (tc, J0.xy+half2(-k,-k)).x; + r.y = test (tc, J0.wz+half2( k,-k)).y; + r.z = test (tc,-J0.xy+half2(-k, k)).z; + r.w = test (tc,-J0.wz+half2( k, k)).x; + + return dot(r,float4(1.f/4.f)); +} + +half shadow_high (float4 tc) // jittered sampling +{ + + const float scale = (0.5f/float(SMAP_size)); + + float2 tc_J = frac(tc.xy/tc.w*SMAP_size/4.0f )*.5f; + float4 J0 = tex2D (jitter0,tc_J)*scale; + + const float k = 1.f/float(SMAP_size); + half4 r; + r.x = test (tc,J0.xy+half2(-k,-k)).x; + r.y = test (tc,J0.wz+half2( k,-k)).y; + + r.z = test (tc,J0.xy+half2(-k, k)).z; + r.w = test (tc,J0.wz+half2( k, k)).x; + + + const float k1 = 1.3f/float(SMAP_size); + half4 r1; + r1.x = test (tc,-J0.xy+half2(-k1,0)).x; + r1.y = test (tc,-J0.wz+half2( 0,-k1)).y; + + r1.z = test (tc,-2*J0.xy+half2( k1, 0)).z; + r1.w = test (tc,-2*J0.wz+half2( 0, k1)).x; + + return ( r.x + r.y + r.z + r.w + r1.x + r1.y + r1.z + r1.w )*1.f/8.f; +} + +float shadow( float4 tc ) +{ +#ifdef USE_ULTRA_SHADOWS +# ifdef SM_MINMAX + return modify_light( shadow_hw_hq( tc ) ); +# else + return shadow_hw_hq( tc ); +# endif +#else +# if SUN_QUALITY>=2 // Hight quality + //return shadowtest_sun ( tc, float4(0,0,0,0) ); // jittered sampling; + return shadow_hw (tc); +# else + return shadow_hw (tc); +# endif +#endif +} + +float shadow_volumetric( float4 tc ) +{ + return sample_hw_pcf (tc,float4(-1,-1,0,0)); +} + + +#ifdef SM_MINMAX + +////////////////////////////////////////////////////////////////////////////////////////// +// hardware + PCF +////////////////////////////////////////////////////////////////////////////////////////// + +float shadow_dx10_1( float4 tc, float2 tcJ, float2 pos2d ) +{ + return shadow( tc ); +} + +float shadow_dx10_1_sunshafts( float4 tc, float2 pos2d ) +{ + float3 t = tc.xyz / tc.w; + float minmax = textureLod( s_smap_minmax, t.xy, 0 ).x; + bool umbra = ( ( minmax < 0 ) && ( t.z > -minmax ) ); + + if( umbra ) + { + return 0.0; + } + else + { + return shadow_hw( tc ); + } +} + +#endif + + +////////////////////////////////////////////////////////////////////////////////////////// +// testbed + +//uniform sampler2D jitter0; +//uniform sampler2D jitter1; +float shadowtest (float4 tc, float4 tcJ) // jittered sampling +{ + float4 r; + + const float scale = (2.7f/float(SMAP_size)); + + float4 J0 = tex2Dproj (jitter0,tcJ)*scale; + float4 J1 = tex2Dproj (jitter1,tcJ)*scale; + + r.x = test (tc,J0.xy).x; + r.y = test (tc,J0.wz).y; + r.z = test (tc,J1.xy).z; + r.w = test (tc,J1.wz).x; + + return dot(r,float4(1.f/4.f)); +} + +float shadow_rain (float4 tc, float2 tcJ) // jittered sampling +{ + float4 r; + + const float scale = (4.0f/float(SMAP_size)); +// float4 J0 = jitter0.Sample( smp_jitter, tcJ )*scale; +// float4 J1 = jitter1.Sample( smp_jitter, tcJ )*scale; + float4 J0 = tex2D( jitter0, tcJ )*scale; + float4 J1 = tex2D( jitter1, tcJ )*scale; + + r.x = test (tc,J0.xy).x; + r.y = test (tc,J0.wz).y; + r.z = test (tc,J1.xy).z; + r.w = test (tc,J1.wz).x; + +// float4 J0 = jitterMipped.Sample( smp_base, tcJ )*scale; + +// r.x = test (tc,J0.xy).x; +// r.y = test (tc,J0.wz).y; +// r.z = test (tc,J0.yz).z; +// r.w = test (tc,J0.xw).x; + + return dot(r,float4(1.f/4.f)); +} + +////////////////////////////////////////////////////////////////////////////////////////// +#ifdef USE_SUNMASK +float3x4 m_sunmask; // ortho-projection +float sunmask( float4 P ) +{ + float2 tc = mul( m_sunmask, P ); // + return tex2D( s_lmap, tc ).w; // A8 +} +#else +float sunmask( float4 P ) { return 1.f; } // +#endif +////////////////////////////////////////////////////////////////////////////////////////// +uniform float4x4 m_shadow; + +#endif \ No newline at end of file diff --git a/res/gamedata/shaders/gl/shared/common.h b/res/gamedata/shaders/gl/shared/common.h index 460e3c2606f..00bf7e045df 100644 --- a/res/gamedata/shaders/gl/shared/common.h +++ b/res/gamedata/shaders/gl/shared/common.h @@ -42,6 +42,8 @@ void sincos(float x, out float s, out float c) { s = sin(x); c = cos(x); } #define tex2Dlod(s,t) textureLod(s,t.xy,t.w) #define tex3D texture #define texCUBE texture +#define asuint floatBitsToUint +#define asfloat uintBitsToFloat // Semantics assignment, maximum 16 slots #define COLOR 0 diff --git a/src/Layers/xrRenderGL/glState.cpp b/src/Layers/xrRenderGL/glState.cpp index da68bca45b0..724c24aa265 100644 --- a/src/Layers/xrRenderGL/glState.cpp +++ b/src/Layers/xrRenderGL/glState.cpp @@ -247,6 +247,12 @@ void glState::UpdateSamplerState(u32 stage, u32 name, u32 value) case D3DSAMP_MAXANISOTROPY: /* DWORD maximum anisotropy */ CHK_GL(glSamplerParameteri(m_samplerArray[stage], GL_TEXTURE_MAX_ANISOTROPY_EXT, value)); break; + case XRDX10SAMP_COMPARISONFILTER: + CHK_GL(glSamplerParameteri(m_samplerArray[stage], GL_TEXTURE_COMPARE_MODE, value ? (GLint)GL_COMPARE_REF_TO_TEXTURE : (GLint)GL_NONE)); + break; + case XRDX10SAMP_COMPARISONFUNC: + CHK_GL(glSamplerParameteri(m_samplerArray[stage], GL_TEXTURE_COMPARE_FUNC, value)); + break; default: // Assume this is an OpenGL sampler parameter CHK_GL(glSamplerParameteri(m_samplerArray[stage], (GLenum)name, value));