Skip to content

Commit

Permalink
Merge pull request #170 from SaiyansKing/master
Browse files Browse the repository at this point in the history
Merge latest changes from @SaiyansKing

Merge before trying to incorporate other sources (@lucifer602288 and/or @Shoun2137)

Fixes rendering of skeletal mesh particle effects (like Undead Dragon particles in G2)
fixes #169 (fully or in part)
  • Loading branch information
kirides authored Sep 12, 2024
2 parents 053172f + 4046b4e commit 64a70c2
Show file tree
Hide file tree
Showing 21 changed files with 459 additions and 131 deletions.
6 changes: 2 additions & 4 deletions D3D11Engine/D2DSettingsDialog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -370,13 +370,11 @@ XRESULT D2DSettingsDialog::InitControls() {

SV_Slider* visualFXDDSlider = new SV_Slider( MainView, MainPanel );
visualFXDDSlider->SetPositionAndSize( D2D1::Point2F( 10, 22 ), D2D1::SizeF( 150, 15 ) );

visualFXDDSlider->AlignUnder( visualFXDDLabel, 5 );

visualFXDDSlider->AlignUnder( visualFXDDLabel, 5 );
visualFXDDSlider->SetDataToUpdate( &Engine::GAPI->GetRendererState().RendererSettings.VisualFXDrawRadius );
visualFXDDSlider->SetIsIntegralSlider( true );
visualFXDDSlider->SetDisplayMultiplier( 0.001f );
visualFXDDSlider->SetMinMax( 0.0f, 30000.0f );
visualFXDDSlider->SetMinMax( 0.0f, 10000.0f );
visualFXDDSlider->SetValue( Engine::GAPI->GetRendererState().RendererSettings.VisualFXDrawRadius );

SV_Label* worldDDLabel = new SV_Label( MainView, MainPanel );
Expand Down
17 changes: 9 additions & 8 deletions D3D11Engine/D3D11Effect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,14 @@ D3D11Effect::~D3D11Effect() {
HRESULT LoadTextureArray( Microsoft::WRL::ComPtr<ID3D11Device1> pd3dDevice, Microsoft::WRL::ComPtr<ID3D11DeviceContext1> context, char* sTexturePrefix, int iNumTextures, ID3D11Texture2D** ppTex2D, ID3D11ShaderResourceView** ppSRV );

/** Fills a vector of random raindrop data */
void D3D11Effect::FillRandomRaindropData( std::vector<ParticleInstanceInfo>& data ) {
void D3D11Effect::FillRandomRaindropData( std::vector<RainParticleInstanceInfo>& data ) {
/** Base taken from Nvidias Rain-Sample **/

float radius = Engine::GAPI->GetRendererState().RendererSettings.RainRadiusRange;
float height = Engine::GAPI->GetRendererState().RendererSettings.RainHeightRange;

for ( size_t i = 0; i < data.size(); i++ ) {
ParticleInstanceInfo raindrop;
RainParticleInstanceInfo raindrop;
//use rejection sampling to generate random points inside a circle of radius 1 centered at 0, 0
float SeedX;
float SeedZ;
Expand Down Expand Up @@ -118,15 +118,15 @@ XRESULT D3D11Effect::DrawRain() {
e->CreateVertexBuffer( &RainBufferInitial );

UINT numParticles = Engine::GAPI->GetRendererState().RendererSettings.RainNumParticles;
std::vector<ParticleInstanceInfo> particles( numParticles );
std::vector<RainParticleInstanceInfo> particles( numParticles );

// Fill the vector with random raindrop data
FillRandomRaindropData( particles );

// Create vertexbuffers
RainBufferInitial->Init( &particles[0], particles.size() * sizeof( ParticleInstanceInfo ), (D3D11VertexBuffer::EBindFlags)(D3D11VertexBuffer::B_VERTEXBUFFER), D3D11VertexBuffer::U_DEFAULT, D3D11VertexBuffer::CA_NONE, "D3D11Effect::DrawRain::RainBufferInitial" );
RainBufferDrawFrom->Init( &particles[0], particles.size() * sizeof( ParticleInstanceInfo ), (D3D11VertexBuffer::EBindFlags)(D3D11VertexBuffer::B_VERTEXBUFFER | D3D11VertexBuffer::B_STREAM_OUT), D3D11VertexBuffer::U_DEFAULT, D3D11VertexBuffer::CA_NONE, "D3D11Effect::DrawRain::RainBufferDrawFrom" );
RainBufferStreamTo->Init( &particles[0], particles.size() * sizeof( ParticleInstanceInfo ), (D3D11VertexBuffer::EBindFlags)(D3D11VertexBuffer::B_VERTEXBUFFER | D3D11VertexBuffer::B_STREAM_OUT), D3D11VertexBuffer::U_DEFAULT, D3D11VertexBuffer::CA_NONE, "D3D11Effect::DrawRain::RainBufferStreamTo" );
RainBufferInitial->Init( &particles[0], particles.size() * sizeof( RainParticleInstanceInfo ), (D3D11VertexBuffer::EBindFlags)(D3D11VertexBuffer::B_VERTEXBUFFER), D3D11VertexBuffer::U_DEFAULT, D3D11VertexBuffer::CA_NONE, "D3D11Effect::DrawRain::RainBufferInitial" );
RainBufferDrawFrom->Init( &particles[0], particles.size() * sizeof( RainParticleInstanceInfo ), (D3D11VertexBuffer::EBindFlags)(D3D11VertexBuffer::B_VERTEXBUFFER | D3D11VertexBuffer::B_STREAM_OUT), D3D11VertexBuffer::U_DEFAULT, D3D11VertexBuffer::CA_NONE, "D3D11Effect::DrawRain::RainBufferDrawFrom" );
RainBufferStreamTo->Init( &particles[0], particles.size() * sizeof( RainParticleInstanceInfo ), (D3D11VertexBuffer::EBindFlags)(D3D11VertexBuffer::B_VERTEXBUFFER | D3D11VertexBuffer::B_STREAM_OUT), D3D11VertexBuffer::U_DEFAULT, D3D11VertexBuffer::CA_NONE, "D3D11Effect::DrawRain::RainBufferStreamTo" );

firstFrame = true;

Expand All @@ -146,7 +146,7 @@ XRESULT D3D11Effect::DrawRain() {

firstFrame = false;

UINT stride = sizeof( ParticleInstanceInfo );
UINT stride = sizeof( RainParticleInstanceInfo );
UINT offset = 0;

// Bind buffer to draw from last frame
Expand All @@ -156,6 +156,7 @@ XRESULT D3D11Effect::DrawRain() {
e->GetContext()->SOSetTargets( 1, RainBufferStreamTo->GetVertexBuffer().GetAddressOf(), &offset );

// Apply shaders
e->GetContext()->PSSetShader( nullptr, nullptr, 0 );
particleAdvanceVS->Apply();
streamOutGS->Apply();

Expand Down Expand Up @@ -236,7 +237,7 @@ XRESULT D3D11Effect::DrawRain() {
e->GetContext()->PSSetShaderResources( 0, 1, RainTextureArraySRV.GetAddressOf() );

// Draw the vertexbuffer
e->DrawVertexBuffer( RainBufferDrawFrom, numParticles, sizeof( ParticleInstanceInfo ) );
e->DrawVertexBuffer( RainBufferDrawFrom, numParticles, sizeof( RainParticleInstanceInfo ) );

// Reset this
e->GetContext()->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST );
Expand Down
2 changes: 1 addition & 1 deletion D3D11Engine/D3D11Effect.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class D3D11Effect {
protected:

/** Fills a vector of random raindrop data */
void FillRandomRaindropData( std::vector<ParticleInstanceInfo>& data );
void FillRandomRaindropData( std::vector<RainParticleInstanceInfo>& data );

/** Rain */
D3D11VertexBuffer* RainBufferInitial;
Expand Down
6 changes: 1 addition & 5 deletions D3D11Engine/D3D11GraphicsEngine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -867,7 +867,7 @@ XRESULT D3D11GraphicsEngine::OnResize( INT2 newSize ) {
GetDevice().Get(), Resolution.x, Resolution.y, DXGI_FORMAT_R16G16_FLOAT );

GBuffer1_Normals = std::make_unique<RenderToTextureBuffer>(
GetDevice().Get(), Resolution.x, Resolution.y, DXGI_FORMAT_R16G16B16A16_FLOAT );
GetDevice().Get(), Resolution.x, Resolution.y, DXGI_FORMAT_R8G8B8A8_SNORM );

GBuffer0_Diffuse = std::make_unique<RenderToTextureBuffer>(
GetDevice().Get(), Resolution.x, Resolution.y, DXGI_FORMAT_B8G8R8A8_UNORM );
Expand Down Expand Up @@ -5692,10 +5692,6 @@ void D3D11GraphicsEngine::DrawDecalList( const std::vector<zCVob*>& decals,

XMMATRIX mat = view * world * offset * scale;

ParticleInstanceInfo ii;
ii.scale = float2( 50, 50 );
ii.color = 0xFFFFFFFF;

Engine::GAPI->SetWorldTransformXM( mat );
SetupVS_ExPerInstanceConstantBuffer();

Expand Down
6 changes: 3 additions & 3 deletions D3D11Engine/D3D11ShaderManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,12 @@ XRESULT D3D11ShaderManager::Init() {
Shaders.push_back( ShaderInfo( "VS_ParticlePoint", "VS_ParticlePoint.hlsl", "v", 11 ) );
Shaders.back().cBufferSizes.push_back( sizeof( VS_ExConstantBuffer_PerFrame ) );

Shaders.push_back( ShaderInfo( "VS_ParticlePointShaded", "VS_ParticlePointShaded.hlsl", "v", 11 ) );
Shaders.push_back( ShaderInfo( "VS_ParticlePointShaded", "VS_ParticlePointShaded.hlsl", "v", 13 ) );
Shaders.back().cBufferSizes.push_back( sizeof( VS_ExConstantBuffer_PerFrame ) );
Shaders.back().cBufferSizes.push_back( sizeof( ParticlePointShadingConstantBuffer ) );


Shaders.push_back( ShaderInfo( "VS_AdvanceRain", "VS_AdvanceRain.hlsl", "v", 11 ) );
Shaders.push_back( ShaderInfo( "VS_AdvanceRain", "VS_AdvanceRain.hlsl", "v", 13 ) );
Shaders.back().cBufferSizes.push_back( sizeof( AdvanceRainConstantBuffer ) );

Shaders.push_back( ShaderInfo( "VS_Ocean", "VS_Ocean.hlsl", "v", 1 ) );
Expand Down Expand Up @@ -348,7 +348,7 @@ XRESULT D3D11ShaderManager::Init() {
Shaders.push_back( ShaderInfo( "GS_Cubemap", "GS_Cubemap.hlsl", "g" ) );
Shaders.back().cBufferSizes.push_back( sizeof( CubemapGSConstantBuffer ) );

Shaders.push_back( ShaderInfo( "GS_ParticleStreamOut", "VS_AdvanceRain.hlsl", "g", 11 ) );
Shaders.push_back( ShaderInfo( "GS_ParticleStreamOut", "VS_AdvanceRain.hlsl", "g", 13 ) );
Shaders.back().cBufferSizes.push_back( sizeof( ParticleGSInfoConstantBuffer ) );

m.Name = "NORMALMAPPING";
Expand Down
16 changes: 15 additions & 1 deletion D3D11Engine/D3D11VShader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ XRESULT D3D11VShader::LoadShader( const char* vertexShader, int layout, const st
{
{ "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "DIFFUSE", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "SIZE", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "SIZE", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "TYPE", 0, DXGI_FORMAT_R32_UINT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "VELOCITY", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 },
};
Expand All @@ -151,6 +151,15 @@ XRESULT D3D11VShader::LoadShader( const char* vertexShader, int layout, const st
{ "INSTANCE_REMAP_INDEX", 0, DXGI_FORMAT_R32_UINT, 1, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_INSTANCE_DATA, 1},
};

const D3D11_INPUT_ELEMENT_DESC layout13[] =
{
{ "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "DIFFUSE", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "SIZE", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "TYPE", 0, DXGI_FORMAT_R32_UINT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "VELOCITY", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 },
};

switch ( layout ) {
case 1:
LE( engine->GetDevice()->CreateInputLayout( layout1, ARRAYSIZE( layout1 ), vsBlob->GetBufferPointer(),
Expand Down Expand Up @@ -211,6 +220,11 @@ XRESULT D3D11VShader::LoadShader( const char* vertexShader, int layout, const st
LE( engine->GetDevice()->CreateInputLayout( layout12, ARRAYSIZE( layout12 ), vsBlob->GetBufferPointer(),
vsBlob->GetBufferSize(), InputLayout.ReleaseAndGetAddressOf() ) );
break;

case 13:
LE( engine->GetDevice()->CreateInputLayout( layout13, ARRAYSIZE( layout13 ), vsBlob->GetBufferPointer(),
vsBlob->GetBufferSize(), InputLayout.ReleaseAndGetAddressOf() ) );
break;
}

return XR_SUCCESS;
Expand Down
193 changes: 193 additions & 0 deletions D3D11Engine/DLLMain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ extern "C" {
_declspec(dllexport) DWORD AmdPowerXpressRequestHighPerformance = 0x00000001;
}

ZQuantizeHalfFloat QuantizeHalfFloat;
ZQuantizeHalfFloat_X4 QuantizeHalfFloat_X4;
ZUnquantizeHalfFloat UnquantizeHalfFloat;
ZUnquantizeHalfFloat_X4 UnquantizeHalfFloat_X4;
ZUnquantizeHalfFloat_X4 UnquantizeHalfFloat_X8;

static HINSTANCE hLThis = 0;

typedef void (WINAPI* DirectDrawSimple)();
Expand All @@ -38,6 +44,170 @@ WinMainFunc originalWinMain = reinterpret_cast<WinMainFunc>(GothicMemoryLocation
bool FeatureLevel10Compatibility = false;
bool GMPModeActive = false;

unsigned short QuantizeHalfFloat_Scalar( float input )
{
union { float f; unsigned int ui; } u = { input };
unsigned int ui = u.ui;

int s = ( ui >> 16 ) & 0x8000;
int em = ui & 0x7fffffff;

int h = ( em - ( 112 << 23 ) + ( 1 << 12 ) ) >> 13;
h = ( em < ( 113 << 23 ) ) ? 0 : h;
h = ( em >= ( 143 << 23 ) ) ? 0x7c00 : h;
h = ( em > ( 255 << 23 ) ) ? 0x7e00 : h;
return static_cast<unsigned short>(s | h);
}

void QuantizeHalfFloats_X4_SSE2( float* input, unsigned short* output )
{
__m128i v = _mm_castps_si128( _mm_load_ps( input ) );
__m128i s = _mm_and_si128( _mm_srli_epi32( v, 16 ), _mm_set1_epi32( 0x8000 ) );
__m128i em = _mm_and_si128( v, _mm_set1_epi32( 0x7FFFFFFF ) );
__m128i h = _mm_srli_epi32( _mm_sub_epi32( em, _mm_set1_epi32( 0x37FFF000 ) ), 13 );

__m128i mask = _mm_cmplt_epi32( em, _mm_set1_epi32( 0x38800000 ) );
h = _mm_or_si128( _mm_and_si128( mask, _mm_setzero_si128() ), _mm_andnot_si128( mask, h ) );

mask = _mm_cmpgt_epi32( em, _mm_set1_epi32( 0x47800000 - 1 ) );
h = _mm_or_si128( _mm_and_si128( mask, _mm_set1_epi32( 0x7C00 ) ), _mm_andnot_si128( mask, h ) );

mask = _mm_cmpgt_epi32( em, _mm_set1_epi32( 0x7F800000 ) );
h = _mm_or_si128( _mm_and_si128( mask, _mm_set1_epi32( 0x7E00 ) ), _mm_andnot_si128( mask, h ) );

// We need to stay in int16_t range due to signed saturation
__m128i halfs = _mm_sub_epi32( _mm_or_si128( s, h ), _mm_set1_epi32( 32768 ) );
_mm_store_sd( reinterpret_cast<double*>(output), _mm_castsi128_pd( _mm_add_epi16( _mm_packs_epi32( halfs, halfs ), _mm_set1_epi16( 32768 ) ) ) );
}

void QuantizeHalfFloats_X4_SSE41( float* input, unsigned short* output )
{
__m128i v = _mm_castps_si128( _mm_load_ps( input ) );
__m128i s = _mm_and_si128( _mm_srli_epi32( v, 16 ), _mm_set1_epi32( 0x8000 ) );
__m128i em = _mm_and_si128( v, _mm_set1_epi32( 0x7FFFFFFF ) );
__m128i h = _mm_srli_epi32( _mm_sub_epi32( em, _mm_set1_epi32( 0x37FFF000 ) ), 13 );

__m128i mask = _mm_cmplt_epi32( em, _mm_set1_epi32( 0x38800000 ) );
h = _mm_blendv_epi8( h, _mm_setzero_si128(), mask );

mask = _mm_cmpgt_epi32( em, _mm_set1_epi32( 0x47800000 - 1 ) );
h = _mm_blendv_epi8( h, _mm_set1_epi32( 0x7C00 ), mask );

mask = _mm_cmpgt_epi32( em, _mm_set1_epi32( 0x7F800000 ) );
h = _mm_blendv_epi8( h, _mm_set1_epi32( 0x7E00 ), mask );

__m128i halfs = _mm_or_si128( s, h );
_mm_store_sd( reinterpret_cast<double*>(output), _mm_castsi128_pd( _mm_packus_epi32( halfs, halfs ) ) );
}

#ifdef _XM_AVX_INTRINSICS_
unsigned short QuantizeHalfFloat_F16C( float input )
{
return static_cast<unsigned short>(_mm_cvtsi128_si32( _mm_cvtps_ph( _mm_set_ss( input ), _MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC ) ));
}

void QuantizeHalfFloats_X4_F16C( float* input, unsigned short* output )
{
_mm_store_sd( reinterpret_cast<double*>(output), _mm_castsi128_pd( _mm_cvtps_ph( _mm_load_ps( input ), _MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC ) ) );
}
#endif

float UnquantizeHalfFloat_Scalar( unsigned short input )
{
unsigned int s = input & 0x8000;
unsigned int m = input & 0x03FF;
unsigned int e = input & 0x7C00;
e += 0x0001C000;

float out;
unsigned int r = (s << 16) | (m << 13) | (e << 13);
memcpy( &out, &r, sizeof( float ) );
return out;
}

void UnquantizeHalfFloat_X4_SSE2( unsigned short* input, float* output )
{
const __m128i mask_zero = _mm_setzero_si128();
const __m128i mask_s = _mm_set1_epi16( 0x8000 );
const __m128i mask_m = _mm_set1_epi16( 0x03FF );
const __m128i mask_e = _mm_set1_epi16( 0x7C00 );
const __m128i bias_e = _mm_set1_epi32( 0x0001C000 );

__m128i halfs = _mm_loadl_epi64( reinterpret_cast<const __m128i*>(input) );

__m128i s = _mm_and_si128( halfs, mask_s );
__m128i m = _mm_and_si128( halfs, mask_m );
__m128i e = _mm_and_si128( halfs, mask_e );

__m128i s4 = _mm_unpacklo_epi16( s, mask_zero );
s4 = _mm_slli_epi32( s4, 16 );

__m128i m4 = _mm_unpacklo_epi16( m, mask_zero );
m4 = _mm_slli_epi32( m4, 13 );

__m128i e4 = _mm_unpacklo_epi16( e, mask_zero );
e4 = _mm_add_epi32( e4, bias_e );
e4 = _mm_slli_epi32( e4, 13 );

_mm_store_si128( reinterpret_cast<__m128i*>(output), _mm_or_si128( s4, _mm_or_si128( e4, m4 ) ) );
}

void UnquantizeHalfFloat_X8_SSE2( unsigned short* input, float* output )
{
const __m128i mask_zero = _mm_setzero_si128();
const __m128i mask_s = _mm_set1_epi16( 0x8000 );
const __m128i mask_m = _mm_set1_epi16( 0x03FF );
const __m128i mask_e = _mm_set1_epi16( 0x7C00 );
const __m128i bias_e = _mm_set1_epi32( 0x0001C000 );

__m128i halfs = _mm_load_si128( reinterpret_cast<const __m128i*>(input) );

__m128i s = _mm_and_si128( halfs, mask_s );
__m128i m = _mm_and_si128( halfs, mask_m );
__m128i e = _mm_and_si128( halfs, mask_e );

__m128i s4 = _mm_unpacklo_epi16( s, mask_zero );
s4 = _mm_slli_epi32( s4, 16 );

__m128i m4 = _mm_unpacklo_epi16( m, mask_zero );
m4 = _mm_slli_epi32( m4, 13 );

__m128i e4 = _mm_unpacklo_epi16( e, mask_zero );
e4 = _mm_add_epi32( e4, bias_e );
e4 = _mm_slli_epi32( e4, 13 );

_mm_store_si128( reinterpret_cast<__m128i*>(output + 0), _mm_or_si128( s4, _mm_or_si128( e4, m4 ) ) );

s4 = _mm_unpackhi_epi16( s, mask_zero );
s4 = _mm_slli_epi32( s4, 16 );

m4 = _mm_unpackhi_epi16( m, mask_zero );
m4 = _mm_slli_epi32( m4, 13 );

e4 = _mm_unpackhi_epi16( e, mask_zero );
e4 = _mm_add_epi32( e4, bias_e );
e4 = _mm_slli_epi32( e4, 13 );

_mm_store_si128( reinterpret_cast<__m128i*>(output + 4), _mm_or_si128( s4, _mm_or_si128( e4, m4 ) ) );
}

#ifdef _XM_AVX_INTRINSICS_
float UnquantizeHalfFloat_F16C( unsigned short input )
{
return _mm_cvtss_f32( _mm_cvtph_ps( _mm_cvtsi32_si128( input ) ) );
}

void UnquantizeHalfFloat_X4_F16C( unsigned short* input, float* output )
{
_mm_store_ps( output, _mm_cvtph_ps( _mm_loadl_epi64( reinterpret_cast<const __m128i*>(input) ) ) );
}

void UnquantizeHalfFloat_X8_F16C( unsigned short* input, float* output )
{
_mm256_store_ps( output, _mm256_cvtph_ps( _mm_load_si128( reinterpret_cast<const __m128i*>(input) ) ) );
}
#endif

void SignalHandler( int signal ) {
LogInfo() << "Signal:" << signal;
throw "!Access Violation!";
Expand Down Expand Up @@ -206,6 +376,29 @@ void CheckPlatformSupport() {
#elif __SSE__
support_message( "SSE", InstructionSet::SSE() );
#endif

#ifdef _XM_AVX_INTRINSICS_
if ( InstructionSet::F16C() ) {
QuantizeHalfFloat = QuantizeHalfFloat_F16C;
QuantizeHalfFloat_X4 = QuantizeHalfFloats_X4_F16C;
UnquantizeHalfFloat = UnquantizeHalfFloat_F16C;
UnquantizeHalfFloat_X4 = UnquantizeHalfFloat_X4_F16C;
UnquantizeHalfFloat_X8 = UnquantizeHalfFloat_X8_F16C;
} else
#endif
if ( InstructionSet::SSE41() ) {
QuantizeHalfFloat = QuantizeHalfFloat_Scalar;
QuantizeHalfFloat_X4 = QuantizeHalfFloats_X4_SSE41;
UnquantizeHalfFloat = UnquantizeHalfFloat_Scalar;
UnquantizeHalfFloat_X4 = UnquantizeHalfFloat_X4_SSE2;
UnquantizeHalfFloat_X8 = UnquantizeHalfFloat_X8_SSE2;
} else {
QuantizeHalfFloat = QuantizeHalfFloat_Scalar;
QuantizeHalfFloat_X4 = QuantizeHalfFloats_X4_SSE2;
UnquantizeHalfFloat = UnquantizeHalfFloat_Scalar;
UnquantizeHalfFloat_X4 = UnquantizeHalfFloat_X4_SSE2;
UnquantizeHalfFloat_X8 = UnquantizeHalfFloat_X8_SSE2;
}
}

#if defined(BUILD_GOTHIC_2_6_fix)
Expand Down
Loading

3 comments on commit 64a70c2

@lucifer602288
Copy link
Collaborator

@lucifer602288 lucifer602288 commented on 64a70c2 Sep 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did include the changes from Shoun2137 into my GD3D11 repository which was before based on your GD3D11 repository.
The build.yml I did adapt and it compiles but the artifacts push with copying the dlls does not work.
So you can take commits for your GD3D11 repository. Maybe you find the wrong path in the artifacts push.

@kirides
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Likely a missing artifact upload in the release.yml

I'm not yet keen on integrating @Shoun2137's commits, as a whole of things changed there,
and i can't currently make sure that no "bad code" made it's way into it. There's a loooooot of changes in a single huge commit

@lucifer602288
Copy link
Collaborator

@lucifer602288 lucifer602288 commented on 64a70c2 Sep 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

artifacts are now corrected

@Shoun2137 commites would not run on windows 7 as "DisplayFlip = true" is default.
Windows 7 users would complain and not know what to set. I would suggest an OS detectionif displayflip is enabled or not.
Disabled teselation was deleted.
UI was changed, a known mousepointer bug is.
The editor was removed.

Please sign in to comment.