diff --git a/src/tcompiler.cpp b/src/tcompiler.cpp index 1084845d..c0f920a0 100644 --- a/src/tcompiler.cpp +++ b/src/tcompiler.cpp @@ -228,6 +228,25 @@ bool HostHasAVX() { return Features["avx"]; } +bool HostHasAVX512() { + StringMap Features; + sys::getHostCPUFeatures(Features); + // The following instructions sets are supported by Intel CPUs starting 2017 + // (Skylake-SP and beyond) and AMD CPUs starting 2022 (Zen4 and beyond) + const char *instruction_set[] = { + "avx512f", // Foundation + "avx512dq", // Double Word, Quad Word + "avx512bw", // Vector Byte and Word + "avx512vl", // Vector Length + "avx512cd", // Conflict Detection + }; + bool has_avx512 = true; + for (auto &instr : instruction_set) { + has_avx512 &= Features[instr]; + } + return has_avx512; +} + int terra_inittarget(lua_State *L) { terra_State *T = terra_getstate(L, 1); TerraTarget *TT = new TerraTarget(); @@ -257,7 +276,13 @@ int terra_inittarget(lua_State *L) { #ifdef DISABLE_AVX TT->Features = "-avx"; #else - TT->Features = HostHasAVX() ? "+avx" : ""; + if (HostHasAVX512()) { + TT->Features = "+avx512f,+avx512dq,+avx512bw,+avx512vl,+avx512cd"; + } else if (HostHasAVX()) { + TT->Features = "+avx"; + } else { + TT->Features = ""; + } #endif } diff --git a/tests/avx512.t b/tests/avx512.t new file mode 100644 index 00000000..fe1c90d9 --- /dev/null +++ b/tests/avx512.t @@ -0,0 +1,8 @@ +local vec = vector(int8, 64) +terra compute(x: vec, y: vec) + -- vec has a size of 8 * 64 = 512 bits so this operation should compile + -- to a single AVX512 instruction + return x + y +end +compute:compile() +compute:disas()