diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4f986b94d..3f3d51883 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -359,6 +359,12 @@ jobs: arch_gnu: powerpc64le arch_deb: ppc64el distro: ubuntu-22.04 + - version: 12 + cross: mips64el + arch_gnu: mips64el + arch_gnu_abi: abi64 + arch_deb: mips64el + distro: ubuntu-22.04 runs-on: ${{ matrix.distro }} steps: - uses: actions/checkout@v3 @@ -427,6 +433,12 @@ jobs: arch_deb: ppc64el arch_gnu: powerpc64le distro: ubuntu-22.04 + - version: 16 + cross: ppc64el + arch_deb: mips64el + arch_gnu: mips64el + arch_gnu_abi: abi64 + distro: ubuntu-22.04 runs-on: ${{ matrix.distro }} steps: - uses: actions/checkout@v3 diff --git a/docker/cross-files/mips64el-clang-15.cross b/docker/cross-files/mips64el-clang-15.cross index 891f65268..c9a7f9a1d 100644 --- a/docker/cross-files/mips64el-clang-15.cross +++ b/docker/cross-files/mips64el-clang-15.cross @@ -4,7 +4,7 @@ cpp = 'clang++-15' ar = 'llvm-ar-15' strip = 'llvm-strip-15' objcopy = 'llvm-objcopy-15' -ld = 'llvm-ld-16' +ld = 'llvm-ld-15' exe_wrapper = ['qemu-mips64el-static', '-L', '/usr/mips64el-linux-gnuabi64'] [properties] diff --git a/docker/cross-files/mips64el-clang-16-ccache.cross b/docker/cross-files/mips64el-clang-16-ccache.cross new file mode 100644 index 000000000..a13ad258b --- /dev/null +++ b/docker/cross-files/mips64el-clang-16-ccache.cross @@ -0,0 +1,20 @@ +[binaries] +c = ['ccache', 'clang-16'] +cpp = ['ccache', 'clang++-16'] +ar = 'llvm-ar-16' +strip = 'llvm-strip-16' +objcopy = 'llvm-objcopy-16' +ld = 'llvm-ld-16' +exe_wrapper = ['qemu-mips64el-static', '-cpu', 'Loongson-3A4000', '-L', '/usr/mips64el-linux-gnuabi64'] + +[properties] +c_args = ['--target=mips64el-linux-gnuabi64', '-march=mips64r5', '-mmsa', '-isystem=/usr/mips64el-linux-gnuabi64/include', '-Weverything', '-fno-lax-vector-conversions', '-Werror', '-Wno-unsafe-buffer-usage'] +cpp_args = ['--target=mips64el-linux-gnuabi64', '-march=mips64r5', '-mmsa', '-isystem=/usr/mips64el-linux-gnuabi64/include', '-Weverything', '-fno-lax-vector-conversions', '-Werror', '-Wno-unsafe-buffer-usage'] +c_link_args = ['--target=mips64el-linux-gnuabi64'] +cpp_link_args = ['--target=mips64el-linux-gnuabi64'] + +[host_machine] +system = 'linux' +cpu_family = 'mips64el' +cpu = 'mips64el' +endian = 'little' diff --git a/docker/cross-files/mips64el-clang-16.cross b/docker/cross-files/mips64el-clang-16.cross new file mode 100644 index 000000000..026b4ba9d --- /dev/null +++ b/docker/cross-files/mips64el-clang-16.cross @@ -0,0 +1,20 @@ +[binaries] +c = 'clang-16' +cpp = 'clang++-16' +ar = 'llvm-ar-16' +strip = 'llvm-strip-16' +objcopy = 'llvm-objcopy-16' +ld = 'llvm-ld-16' +exe_wrapper = ['qemu-mips64el-static', '-cpu', 'Loongson-3A4000', '-L', '/usr/mips64el-linux-gnuabi64'] + +[properties] +c_args = ['--target=mips64el-linux-gnuabi64', '-march=mips64r5', '-mmsa', '-isystem=/usr/mips64el-linux-gnuabi64/include', '-Weverything', '-fno-lax-vector-conversions', '-Werror', '-Wno-unsafe-buffer-usage'] +cpp_args = ['--target=mips64el-linux-gnuabi64', '-march=mips64r5', '-mmsa', '-isystem=/usr/mips64el-linux-gnuabi64/include', '-Weverything', '-fno-lax-vector-conversions', '-Werror', '-Wno-unsafe-buffer-usage'] +c_link_args = ['--target=mips64el-linux-gnuabi64'] +cpp_link_args = ['--target=mips64el-linux-gnuabi64'] + +[host_machine] +system = 'linux' +cpu_family = 'mips64el' +cpu = 'mips64el' +endian = 'little' diff --git a/docker/cross-files/mips64el-gcc-12-ccache.cross b/docker/cross-files/mips64el-gcc-12-ccache.cross new file mode 100644 index 000000000..da7b49436 --- /dev/null +++ b/docker/cross-files/mips64el-gcc-12-ccache.cross @@ -0,0 +1,18 @@ +[binaries] +c = ['ccache', 'mips64el-linux-gnuabi64-gcc-12'] +cpp = ['ccache', 'mips64el-linux-gnuabi64-g++-12'] +ar = 'mips64el-linux-gnuabi64-ar' +strip = 'mips64el-linux-gnuabi64-strip' +objcopy = 'mips64el-linux-gnuabi64-objcopy' +ld = 'mips64el-linux-gnuabi64-ld' +exe_wrapper = ['qemu-mips64el-static', '-L', '/usr/mips64el-linux-gnuabi64'] + +[properties] +c_args = ['-mmsa', '-Wextra', '-Werror'] +cpp_args = ['-mmsa', '-Wextra', '-Werror'] + +[host_machine] +system = 'linux' +cpu_family = 'mips64el' +cpu = 'mips64el' +endian = 'little' diff --git a/simde/mips/msa/ld.h b/simde/mips/msa/ld.h index 9f17dbfb8..62662e6b6 100644 --- a/simde/mips/msa/ld.h +++ b/simde/mips/msa/ld.h @@ -37,16 +37,15 @@ SIMDE_FUNCTION_ATTRIBUTES simde_v16i8 simde_msa_ld_b(const void * rs, const int s10) SIMDE_REQUIRE_CONSTANT_RANGE(s10, 0, 1023) { - #if defined(SIMDE_MIPS_MSA_NATIVE) - return __msa_ld_b(rs, s10); - #else - simde_v16i8 r; + simde_v16i8 r; - simde_memcpy(&r, &(HEDLEY_REINTERPRET_CAST(const int8_t*, rs)[s10]), sizeof(r)); + simde_memcpy(&r, &(HEDLEY_REINTERPRET_CAST(const int8_t*, rs)[s10]), sizeof(r)); - return r; - #endif + return r; } +#if defined(SIMDE_MIPS_MSA_NATIVE) + #define simde_msa_ld_b(rs, s10) __msa_ld_b((rs), (s10)) +#endif #if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) #undef __msa_ld_b #define __msa_ld_b(rs, s10) simde_msa_ld_b((rs), (s10)) @@ -57,16 +56,15 @@ simde_v8i16 simde_msa_ld_h(const void * rs, const int s10) SIMDE_REQUIRE_CONSTANT_RANGE(s10, 0, 1023) HEDLEY_REQUIRE_MSG((s10 % sizeof(int16_t)) == 0, "`s10' must be a multiple of sizeof(int16_t)") { - #if defined(SIMDE_MIPS_MSA_NATIVE) - return __msa_ld_h(rs, s10); - #else - simde_v8i16 r; + simde_v8i16 r; - simde_memcpy(&r, &(HEDLEY_REINTERPRET_CAST(const int8_t*, rs)[s10]), sizeof(r)); + simde_memcpy(&r, &(HEDLEY_REINTERPRET_CAST(const int8_t*, rs)[s10]), sizeof(r)); - return r; - #endif + return r; } +#if defined(SIMDE_MIPS_MSA_NATIVE) + #define simde_msa_ld_h(rs, s10) __msa_ld_h((rs), (s10)) +#endif #if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) #undef __msa_ld_h #define __msa_ld_h(rs, s10) simde_msa_ld_h((rs), (s10)) @@ -77,16 +75,15 @@ simde_v4i32 simde_msa_ld_w(const void * rs, const int s10) SIMDE_REQUIRE_CONSTANT_RANGE(s10, 0, 1023) HEDLEY_REQUIRE_MSG((s10 % sizeof(int32_t)) == 0, "`s10' must be a multiple of sizeof(int32_t)") { - #if defined(SIMDE_MIPS_MSA_NATIVE) - return __msa_ld_w(rs, s10); - #else - simde_v4i32 r; + simde_v4i32 r; - simde_memcpy(&r, &(HEDLEY_REINTERPRET_CAST(const int8_t*, rs)[s10]), sizeof(r)); + simde_memcpy(&r, &(HEDLEY_REINTERPRET_CAST(const int8_t*, rs)[s10]), sizeof(r)); - return r; - #endif + return r; } +#if defined(SIMDE_MIPS_MSA_NATIVE) + #define simde_msa_ld_w(rs, s10) __msa_ld_w((rs), (s10)) +#endif #if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) #undef __msa_ld_w #define __msa_ld_w(rs, s10) simde_msa_ld_w((rs), (s10)) @@ -97,16 +94,15 @@ simde_v2i64 simde_msa_ld_d(const void * rs, const int s10) SIMDE_REQUIRE_CONSTANT_RANGE(s10, 0, 1023) HEDLEY_REQUIRE_MSG((s10 % sizeof(int64_t)) == 0, "`s10' must be a multiple of sizeof(int64_t)") { - #if defined(SIMDE_MIPS_MSA_NATIVE) - return __msa_ld_d(rs, s10); - #else - simde_v2i64 r; + simde_v2i64 r; - simde_memcpy(&r, &(HEDLEY_REINTERPRET_CAST(const int8_t*, rs)[s10]), sizeof(r)); + simde_memcpy(&r, &(HEDLEY_REINTERPRET_CAST(const int8_t*, rs)[s10]), sizeof(r)); - return r; - #endif + return r; } +#if defined(SIMDE_MIPS_MSA_NATIVE) + #define simde_msa_ld_d(rs, s10) __msa_ld_d((rs), (s10)) +#endif #if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) #undef __msa_ld_d #define __msa_ld_d(rs, s10) simde_msa_ld_d((rs), (s10)) @@ -116,96 +112,90 @@ SIMDE_FUNCTION_ATTRIBUTES simde_v16u8 simde_x_msa_ld_u_b(const void * rs, const int s10) SIMDE_REQUIRE_CONSTANT_RANGE(s10, 0, 1023) { - #if defined(SIMDE_MIPS_MSA_NATIVE) - return HEDLEY_REINTERPRET_CAST(simde_v16u8, __msa_ld_b(rs, s10)); - #else - simde_v16u8 r; + simde_v16u8 r; - simde_memcpy(&r, &(HEDLEY_REINTERPRET_CAST(const int8_t*, rs)[s10]), sizeof(r)); + simde_memcpy(&r, &(HEDLEY_REINTERPRET_CAST(const int8_t*, rs)[s10]), sizeof(r)); - return r; - #endif + return r; } +#if defined(SIMDE_MIPS_MSA_NATIVE) + #define simde_x_msa_ld_u_b(rs, s10) HEDLEY_REINTERPRET_CAST(simde_v16u8, __msa_ld_b((rs), (s10))) +#endif SIMDE_FUNCTION_ATTRIBUTES simde_v8u16 simde_x_msa_ld_u_h(const void * rs, const int s10) SIMDE_REQUIRE_CONSTANT_RANGE(s10, 0, 1023) HEDLEY_REQUIRE_MSG((s10 % sizeof(int16_t)) == 0, "`s10' must be a multiple of sizeof(int16_t)") { - #if defined(SIMDE_MIPS_MSA_NATIVE) - return HEDLEY_REINTERPRET_CAST(simde_v8u16, __msa_ld_b(rs, s10)); - #else - simde_v8u16 r; + simde_v8u16 r; - simde_memcpy(&r, &(HEDLEY_REINTERPRET_CAST(const int8_t*, rs)[s10]), sizeof(r)); + simde_memcpy(&r, &(HEDLEY_REINTERPRET_CAST(const int8_t*, rs)[s10]), sizeof(r)); - return r; - #endif + return r; } +#if defined(SIMDE_MIPS_MSA_NATIVE) + #define simde_x_msa_ld_u_h(rs, s10) HEDLEY_REINTERPRET_CAST(simde_v8u16, __msa_ld_b((rs), (s10))) +#endif SIMDE_FUNCTION_ATTRIBUTES simde_v4u32 simde_x_msa_ld_u_w(const void * rs, const int s10) SIMDE_REQUIRE_CONSTANT_RANGE(s10, 0, 1023) HEDLEY_REQUIRE_MSG((s10 % sizeof(int32_t)) == 0, "`s10' must be a multiple of sizeof(int32_t)") { - #if defined(SIMDE_MIPS_MSA_NATIVE) - return HEDLEY_REINTERPRET_CAST(simde_v4u32, __msa_ld_b(rs, s10)); - #else - simde_v4u32 r; + simde_v4u32 r; - simde_memcpy(&r, &(HEDLEY_REINTERPRET_CAST(const int8_t*, rs)[s10]), sizeof(r)); + simde_memcpy(&r, &(HEDLEY_REINTERPRET_CAST(const int8_t*, rs)[s10]), sizeof(r)); - return r; - #endif + return r; } +#if defined(SIMDE_MIPS_MSA_NATIVE) + #define simde_x_msa_ld_u_w(rs, s10) HEDLEY_REINTERPRET_CAST(simde_v4u32, __msa_ld_b((rs), (s10))) +#endif SIMDE_FUNCTION_ATTRIBUTES simde_v2u64 simde_x_msa_ld_u_d(const void * rs, const int s10) SIMDE_REQUIRE_CONSTANT_RANGE(s10, 0, 1023) HEDLEY_REQUIRE_MSG((s10 % sizeof(int64_t)) == 0, "`s10' must be a multiple of sizeof(int64_t)") { - #if defined(SIMDE_MIPS_MSA_NATIVE) - return HEDLEY_REINTERPRET_CAST(simde_v2u64, __msa_ld_b(rs, s10)); - #else - simde_v2u64 r; + simde_v2u64 r; - simde_memcpy(&r, &(HEDLEY_REINTERPRET_CAST(const int8_t*, rs)[s10]), sizeof(r)); + simde_memcpy(&r, &(HEDLEY_REINTERPRET_CAST(const int8_t*, rs)[s10]), sizeof(r)); - return r; - #endif + return r; } +#if defined(SIMDE_MIPS_MSA_NATIVE) + #define simde_x_msa_ld_u_d(rs, s10) HEDLEY_REINTERPRET_CAST(simde_v2u64, __msa_ld_b((rs), (s10))) +#endif SIMDE_FUNCTION_ATTRIBUTES simde_v4f32 simde_x_msa_fld_w(const void * rs, const int s10) SIMDE_REQUIRE_CONSTANT_RANGE(s10, 0, 1023) HEDLEY_REQUIRE_MSG((s10 % sizeof(int32_t)) == 0, "`s10' must be a multiple of sizeof(int32_t)") { - #if defined(SIMDE_MIPS_MSA_NATIVE) - return HEDLEY_REINTERPRET_CAST(simde_v4f32, __msa_ld_b(rs, s10)); - #else - simde_v4f32 r; + simde_v4f32 r; - simde_memcpy(&r, &(HEDLEY_REINTERPRET_CAST(const int8_t*, rs)[s10]), sizeof(r)); + simde_memcpy(&r, &(HEDLEY_REINTERPRET_CAST(const int8_t*, rs)[s10]), sizeof(r)); - return r; - #endif + return r; } +#if defined(SIMDE_MIPS_MSA_NATIVE) + #define simde_x_msa_fld_w(rs, s10) HEDLEY_REINTERPRET_CAST(simde_v4f32, __msa_ld_b((rs), (s10))) +#endif SIMDE_FUNCTION_ATTRIBUTES simde_v2f64 simde_x_msa_fld_d(const void * rs, const int s10) SIMDE_REQUIRE_CONSTANT_RANGE(s10, 0, 1023) HEDLEY_REQUIRE_MSG((s10 % sizeof(int64_t)) == 0, "`s10' must be a multiple of sizeof(int64_t)") { - #if defined(SIMDE_MIPS_MSA_NATIVE) - return HEDLEY_REINTERPRET_CAST(simde_v2f64, __msa_ld_b(rs, s10)); - #else - simde_v2f64 r; + simde_v2f64 r; - simde_memcpy(&r, &(HEDLEY_REINTERPRET_CAST(const int8_t*, rs)[s10]), sizeof(r)); + simde_memcpy(&r, &(HEDLEY_REINTERPRET_CAST(const int8_t*, rs)[s10]), sizeof(r)); - return r; - #endif + return r; } +#if defined(SIMDE_MIPS_MSA_NATIVE) + #define simde_x_msa_fld_d(rs, s10) HEDLEY_REINTERPRET_CAST(simde_v2f64, __msa_ld_b((rs), (s10))) +#endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP diff --git a/simde/mips/msa/st.h b/simde/mips/msa/st.h index 2c5b28833..9565c84ee 100644 --- a/simde/mips/msa/st.h +++ b/simde/mips/msa/st.h @@ -37,12 +37,11 @@ SIMDE_FUNCTION_ATTRIBUTES void simde_msa_st_b(simde_v16i8 a, void * rs, const int s10) SIMDE_REQUIRE_CONSTANT_RANGE(s10, 0, 1023) { - #if defined(SIMDE_MIPS_MSA_NATIVE) - return __msa_st_b(a, rs, s10); - #else - simde_memcpy(&(HEDLEY_REINTERPRET_CAST(int8_t*, rs)[s10]), &a, sizeof(a)); - #endif + simde_memcpy(&(HEDLEY_REINTERPRET_CAST(int8_t*, rs)[s10]), &a, sizeof(a)); } +#if defined(SIMDE_MIPS_MSA_NATIVE) + #define simde_msa_st_b(a, rs, s10) __msa_st_b((a), (rs), (s10)); +#endif #if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) #undef __msa_st_b #define __msa_st_b(a, rs, s10) simde_msa_st_b((a), (rs), (s10)) @@ -53,12 +52,11 @@ void simde_msa_st_h(simde_v8i16 a, void * rs, const int s10) SIMDE_REQUIRE_CONSTANT_RANGE(s10, 0, 1023) HEDLEY_REQUIRE_MSG((s10 % sizeof(int16_t)) == 0, "`s10' must be a multiple of sizeof(int16_t)") { - #if defined(SIMDE_MIPS_MSA_NATIVE) - return __msa_st_h(a, rs, s10); - #else - simde_memcpy(&(HEDLEY_REINTERPRET_CAST(int8_t*, rs)[s10]), &a, sizeof(a)); - #endif + simde_memcpy(&(HEDLEY_REINTERPRET_CAST(int8_t*, rs)[s10]), &a, sizeof(a)); } +#if defined(SIMDE_MIPS_MSA_NATIVE) + #define simde_msa_st_h(a, rs, s10) __msa_st_h((a), (rs), (s10)); +#endif #if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) #undef __msa_st_h #define __msa_st_h(a, rs, s10) simde_msa_st_h((a), (rs), (s10)) @@ -69,12 +67,11 @@ void simde_msa_st_w(simde_v4i32 a, void * rs, const int s10) SIMDE_REQUIRE_CONSTANT_RANGE(s10, 0, 1023) HEDLEY_REQUIRE_MSG((s10 % sizeof(int32_t)) == 0, "`s10' must be a multiple of sizeof(int32_t)") { - #if defined(SIMDE_MIPS_MSA_NATIVE) - return __msa_st_w(a, rs, s10); - #else - simde_memcpy(&(HEDLEY_REINTERPRET_CAST(int8_t*, rs)[s10]), &a, sizeof(a)); - #endif + simde_memcpy(&(HEDLEY_REINTERPRET_CAST(int8_t*, rs)[s10]), &a, sizeof(a)); } +#if defined(SIMDE_MIPS_MSA_NATIVE) + #define simde_msa_st_w(a, rs, s10) __msa_st_w((a), (rs), (s10)); +#endif #if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) #undef __msa_st_w #define __msa_st_w(a, rs, s10) simde_msa_st_w((a), (rs), (s10)) @@ -85,12 +82,11 @@ void simde_msa_st_d(simde_v2i64 a, void * rs, const int s10) SIMDE_REQUIRE_CONSTANT_RANGE(s10, 0, 1023) HEDLEY_REQUIRE_MSG((s10 % sizeof(int64_t)) == 0, "`s10' must be a multiple of sizeof(int64_t)") { - #if defined(SIMDE_MIPS_MSA_NATIVE) - return __msa_st_d(a, rs, s10); - #else - simde_memcpy(&(HEDLEY_REINTERPRET_CAST(int8_t*, rs)[s10]), &a, sizeof(a)); - #endif + simde_memcpy(&(HEDLEY_REINTERPRET_CAST(int8_t*, rs)[s10]), &a, sizeof(a)); } +#if defined(SIMDE_MIPS_MSA_NATIVE) + #define simde_msa_st_d(a, rs, s10) __msa_st_d((a), (rs), (s10)); +#endif #if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) #undef __msa_st_d #define __msa_st_d(a, rs, s10) simde_msa_st_d((a), (rs), (s10)) diff --git a/simde/simde-f16.h b/simde/simde-f16.h index 05877b911..632ef626a 100644 --- a/simde/simde-f16.h +++ b/simde/simde-f16.h @@ -69,6 +69,7 @@ SIMDE_BEGIN_DECLS_ #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16 #elif !defined(__EMSCRIPTEN__) && !(defined(__clang__) && defined(SIMDE_ARCH_POWER)) && \ !(defined(HEDLEY_MSVC_VERSION) && defined(__clang__)) && \ + !(defined(SIMDE_ARCH_MIPS) && defined(__clang__)) && \ !(defined(__clang__) && defined(SIMDE_ARCH_RISCV64)) && ( \ defined(SIMDE_X86_AVX512FP16_NATIVE) || \ (defined(SIMDE_ARCH_X86_SSE2) && HEDLEY_GCC_VERSION_CHECK(12,0,0)) || \