Skip to content

Commit

Permalink
Use intrinsics for all sifive_x280 kernels (#822)
Browse files Browse the repository at this point in the history
Details:
- Replace all assembly kernels in the `sifive_x280` kernel set with intrinsic versions.
- Fixes bug encountered in #805.
- Update the RISC-V toolchain used in CI testing.
- Special thanks to Michael Yeh (@myeh01) and SiFive.
  • Loading branch information
myeh01 authored Nov 29, 2024
1 parent 827c50b commit 50b7117
Show file tree
Hide file tree
Showing 62 changed files with 6,902 additions and 12,931 deletions.
106 changes: 53 additions & 53 deletions config/sifive_x280/bli_cntx_init_sifive_x280.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,10 @@ void bli_cntx_init_sifive_x280( cntx_t* cntx )
BLIS_ADDV_KER, BLIS_SCOMPLEX, bli_caddv_sifive_x280_intr,
BLIS_ADDV_KER, BLIS_DCOMPLEX, bli_zaddv_sifive_x280_intr,

BLIS_AMAXV_KER, BLIS_FLOAT, bli_samaxv_sifive_x280_asm,
BLIS_AMAXV_KER, BLIS_DOUBLE, bli_damaxv_sifive_x280_asm,
BLIS_AMAXV_KER, BLIS_SCOMPLEX, bli_camaxv_sifive_x280_asm,
BLIS_AMAXV_KER, BLIS_DCOMPLEX, bli_zamaxv_sifive_x280_asm,
BLIS_AMAXV_KER, BLIS_FLOAT, bli_samaxv_sifive_x280_intr,
BLIS_AMAXV_KER, BLIS_DOUBLE, bli_damaxv_sifive_x280_intr,
BLIS_AMAXV_KER, BLIS_SCOMPLEX, bli_camaxv_sifive_x280_intr,
BLIS_AMAXV_KER, BLIS_DCOMPLEX, bli_zamaxv_sifive_x280_intr,

BLIS_AXPBYV_KER, BLIS_FLOAT, bli_saxpbyv_sifive_x280_intr,
BLIS_AXPBYV_KER, BLIS_DOUBLE, bli_daxpbyv_sifive_x280_intr,
Expand All @@ -69,10 +69,10 @@ void bli_cntx_init_sifive_x280( cntx_t* cntx )
BLIS_AXPYV_KER, BLIS_SCOMPLEX, bli_caxpyv_sifive_x280_intr,
BLIS_AXPYV_KER, BLIS_DCOMPLEX, bli_zaxpyv_sifive_x280_intr,

BLIS_COPYV_KER, BLIS_FLOAT, bli_scopyv_sifive_x280_asm,
BLIS_COPYV_KER, BLIS_DOUBLE, bli_dcopyv_sifive_x280_asm,
BLIS_COPYV_KER, BLIS_SCOMPLEX, bli_ccopyv_sifive_x280_asm,
BLIS_COPYV_KER, BLIS_DCOMPLEX, bli_zcopyv_sifive_x280_asm,
BLIS_COPYV_KER, BLIS_FLOAT, bli_scopyv_sifive_x280_intr,
BLIS_COPYV_KER, BLIS_DOUBLE, bli_dcopyv_sifive_x280_intr,
BLIS_COPYV_KER, BLIS_SCOMPLEX, bli_ccopyv_sifive_x280_intr,
BLIS_COPYV_KER, BLIS_DCOMPLEX, bli_zcopyv_sifive_x280_intr,

BLIS_DOTV_KER, BLIS_FLOAT, bli_sdotv_sifive_x280_intr,
BLIS_DOTV_KER, BLIS_DOUBLE, bli_ddotv_sifive_x280_intr,
Expand All @@ -84,15 +84,15 @@ void bli_cntx_init_sifive_x280( cntx_t* cntx )
BLIS_DOTXV_KER, BLIS_SCOMPLEX, bli_cdotxv_sifive_x280_intr,
BLIS_DOTXV_KER, BLIS_DCOMPLEX, bli_zdotxv_sifive_x280_intr,

BLIS_INVERTV_KER, BLIS_FLOAT, bli_sinvertv_sifive_x280_asm,
BLIS_INVERTV_KER, BLIS_DOUBLE, bli_dinvertv_sifive_x280_asm,
BLIS_INVERTV_KER, BLIS_SCOMPLEX, bli_cinvertv_sifive_x280_asm,
BLIS_INVERTV_KER, BLIS_DCOMPLEX, bli_zinvertv_sifive_x280_asm,
BLIS_INVERTV_KER, BLIS_FLOAT, bli_sinvertv_sifive_x280_intr,
BLIS_INVERTV_KER, BLIS_DOUBLE, bli_dinvertv_sifive_x280_intr,
BLIS_INVERTV_KER, BLIS_SCOMPLEX, bli_cinvertv_sifive_x280_intr,
BLIS_INVERTV_KER, BLIS_DCOMPLEX, bli_zinvertv_sifive_x280_intr,

BLIS_INVSCALV_KER, BLIS_FLOAT, bli_sinvscalv_sifive_x280_asm,
BLIS_INVSCALV_KER, BLIS_DOUBLE, bli_dinvscalv_sifive_x280_asm,
BLIS_INVSCALV_KER, BLIS_SCOMPLEX, bli_cinvscalv_sifive_x280_asm,
BLIS_INVSCALV_KER, BLIS_DCOMPLEX, bli_zinvscalv_sifive_x280_asm,
BLIS_INVSCALV_KER, BLIS_FLOAT, bli_sinvscalv_sifive_x280_intr,
BLIS_INVSCALV_KER, BLIS_DOUBLE, bli_dinvscalv_sifive_x280_intr,
BLIS_INVSCALV_KER, BLIS_SCOMPLEX, bli_cinvscalv_sifive_x280_intr,
BLIS_INVSCALV_KER, BLIS_DCOMPLEX, bli_zinvscalv_sifive_x280_intr,

BLIS_SCAL2V_KER, BLIS_FLOAT, bli_sscal2v_sifive_x280_intr,
BLIS_SCAL2V_KER, BLIS_DOUBLE, bli_dscal2v_sifive_x280_intr,
Expand All @@ -104,20 +104,20 @@ void bli_cntx_init_sifive_x280( cntx_t* cntx )
BLIS_SCALV_KER, BLIS_SCOMPLEX, bli_cscalv_sifive_x280_intr,
BLIS_SCALV_KER, BLIS_DCOMPLEX, bli_zscalv_sifive_x280_intr,

BLIS_SETV_KER, BLIS_FLOAT, bli_ssetv_sifive_x280_asm,
BLIS_SETV_KER, BLIS_DOUBLE, bli_dsetv_sifive_x280_asm,
BLIS_SETV_KER, BLIS_SCOMPLEX, bli_csetv_sifive_x280_asm,
BLIS_SETV_KER, BLIS_DCOMPLEX, bli_zsetv_sifive_x280_asm,
BLIS_SETV_KER, BLIS_FLOAT, bli_ssetv_sifive_x280_intr,
BLIS_SETV_KER, BLIS_DOUBLE, bli_dsetv_sifive_x280_intr,
BLIS_SETV_KER, BLIS_SCOMPLEX, bli_csetv_sifive_x280_intr,
BLIS_SETV_KER, BLIS_DCOMPLEX, bli_zsetv_sifive_x280_intr,

BLIS_SUBV_KER, BLIS_FLOAT, bli_ssubv_sifive_x280_intr,
BLIS_SUBV_KER, BLIS_DOUBLE, bli_dsubv_sifive_x280_intr,
BLIS_SUBV_KER, BLIS_SCOMPLEX, bli_csubv_sifive_x280_intr,
BLIS_SUBV_KER, BLIS_DCOMPLEX, bli_zsubv_sifive_x280_intr,

BLIS_SWAPV_KER, BLIS_FLOAT, bli_sswapv_sifive_x280_asm,
BLIS_SWAPV_KER, BLIS_DOUBLE, bli_dswapv_sifive_x280_asm,
BLIS_SWAPV_KER, BLIS_SCOMPLEX, bli_cswapv_sifive_x280_asm,
BLIS_SWAPV_KER, BLIS_DCOMPLEX, bli_zswapv_sifive_x280_asm,
BLIS_SWAPV_KER, BLIS_FLOAT, bli_sswapv_sifive_x280_intr,
BLIS_SWAPV_KER, BLIS_DOUBLE, bli_dswapv_sifive_x280_intr,
BLIS_SWAPV_KER, BLIS_SCOMPLEX, bli_cswapv_sifive_x280_intr,
BLIS_SWAPV_KER, BLIS_DCOMPLEX, bli_zswapv_sifive_x280_intr,

BLIS_XPBYV_KER, BLIS_FLOAT, bli_sxpbyv_sifive_x280_intr,
BLIS_XPBYV_KER, BLIS_DOUBLE, bli_dxpbyv_sifive_x280_intr,
Expand All @@ -130,46 +130,46 @@ void bli_cntx_init_sifive_x280( cntx_t* cntx )
BLIS_AXPY2V_KER, BLIS_SCOMPLEX, bli_caxpy2v_sifive_x280_intr,
BLIS_AXPY2V_KER, BLIS_DCOMPLEX, bli_zaxpy2v_sifive_x280_intr,

BLIS_AXPYF_KER, BLIS_FLOAT, bli_saxpyf_sifive_x280_asm,
BLIS_AXPYF_KER, BLIS_DOUBLE, bli_daxpyf_sifive_x280_asm,
BLIS_AXPYF_KER, BLIS_SCOMPLEX, bli_caxpyf_sifive_x280_asm,
BLIS_AXPYF_KER, BLIS_DCOMPLEX, bli_zaxpyf_sifive_x280_asm,
BLIS_AXPYF_KER, BLIS_FLOAT, bli_saxpyf_sifive_x280_intr,
BLIS_AXPYF_KER, BLIS_DOUBLE, bli_daxpyf_sifive_x280_intr,
BLIS_AXPYF_KER, BLIS_SCOMPLEX, bli_caxpyf_sifive_x280_intr,
BLIS_AXPYF_KER, BLIS_DCOMPLEX, bli_zaxpyf_sifive_x280_intr,

BLIS_DOTXF_KER, BLIS_FLOAT, bli_sdotxf_sifive_x280_asm,
BLIS_DOTXF_KER, BLIS_DOUBLE, bli_ddotxf_sifive_x280_asm,
BLIS_DOTXF_KER, BLIS_SCOMPLEX, bli_cdotxf_sifive_x280_asm,
BLIS_DOTXF_KER, BLIS_DCOMPLEX, bli_zdotxf_sifive_x280_asm,
BLIS_DOTXF_KER, BLIS_FLOAT, bli_sdotxf_sifive_x280_intr,
BLIS_DOTXF_KER, BLIS_DOUBLE, bli_ddotxf_sifive_x280_intr,
BLIS_DOTXF_KER, BLIS_SCOMPLEX, bli_cdotxf_sifive_x280_intr,
BLIS_DOTXF_KER, BLIS_DCOMPLEX, bli_zdotxf_sifive_x280_intr,

BLIS_DOTAXPYV_KER, BLIS_FLOAT, bli_sdotaxpyv_sifive_x280_intr,
BLIS_DOTAXPYV_KER, BLIS_DOUBLE, bli_ddotaxpyv_sifive_x280_intr,
BLIS_DOTAXPYV_KER, BLIS_SCOMPLEX, bli_cdotaxpyv_sifive_x280_intr,
BLIS_DOTAXPYV_KER, BLIS_DCOMPLEX, bli_zdotaxpyv_sifive_x280_intr,

BLIS_DOTXAXPYF_KER, BLIS_FLOAT, bli_sdotxaxpyf_sifive_x280_asm,
BLIS_DOTXAXPYF_KER, BLIS_DOUBLE, bli_ddotxaxpyf_sifive_x280_asm,
BLIS_DOTXAXPYF_KER, BLIS_SCOMPLEX, bli_cdotxaxpyf_sifive_x280_asm,
BLIS_DOTXAXPYF_KER, BLIS_DCOMPLEX, bli_zdotxaxpyf_sifive_x280_asm,
BLIS_DOTXAXPYF_KER, BLIS_FLOAT, bli_sdotxaxpyf_sifive_x280_intr,
BLIS_DOTXAXPYF_KER, BLIS_DOUBLE, bli_ddotxaxpyf_sifive_x280_intr,
BLIS_DOTXAXPYF_KER, BLIS_SCOMPLEX, bli_cdotxaxpyf_sifive_x280_intr,
BLIS_DOTXAXPYF_KER, BLIS_DCOMPLEX, bli_zdotxaxpyf_sifive_x280_intr,

// Level 1m
BLIS_PACKM_KER, BLIS_FLOAT, bli_spackm_sifive_x280_asm_7m4,
BLIS_PACKM_KER, BLIS_DOUBLE, bli_dpackm_sifive_x280_asm_7m4,
BLIS_PACKM_KER, BLIS_SCOMPLEX, bli_cpackm_sifive_x280_asm_6m2,
BLIS_PACKM_KER, BLIS_DCOMPLEX, bli_zpackm_sifive_x280_asm_6m2,
BLIS_PACKM_KER, BLIS_FLOAT, bli_spackm_sifive_x280_intr,
BLIS_PACKM_KER, BLIS_DOUBLE, bli_dpackm_sifive_x280_intr,
BLIS_PACKM_KER, BLIS_SCOMPLEX, bli_cpackm_sifive_x280_intr,
BLIS_PACKM_KER, BLIS_DCOMPLEX, bli_zpackm_sifive_x280_intr,

// Level 3
BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_sifive_x280_asm_7m4,
BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_sifive_x280_asm_7m4,
BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_sifive_x280_asm_6m2,
BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_sifive_x280_asm_6m2,

BLIS_GEMMTRSM_L_UKR, BLIS_FLOAT, bli_sgemmtrsm_l_sifive_x280_asm,
BLIS_GEMMTRSM_L_UKR, BLIS_DOUBLE, bli_dgemmtrsm_l_sifive_x280_asm,
BLIS_GEMMTRSM_L_UKR, BLIS_SCOMPLEX, bli_cgemmtrsm_l_sifive_x280_asm,
BLIS_GEMMTRSM_L_UKR, BLIS_DCOMPLEX, bli_zgemmtrsm_l_sifive_x280_asm,
BLIS_GEMMTRSM_U_UKR, BLIS_FLOAT, bli_sgemmtrsm_u_sifive_x280_asm,
BLIS_GEMMTRSM_U_UKR, BLIS_DOUBLE, bli_dgemmtrsm_u_sifive_x280_asm,
BLIS_GEMMTRSM_U_UKR, BLIS_SCOMPLEX, bli_cgemmtrsm_u_sifive_x280_asm,
BLIS_GEMMTRSM_U_UKR, BLIS_DCOMPLEX, bli_zgemmtrsm_u_sifive_x280_asm,
BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_sifive_x280_intr,
BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_sifive_x280_intr,
BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_sifive_x280_intr,
BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_sifive_x280_intr,

BLIS_GEMMTRSM_L_UKR, BLIS_FLOAT, bli_sgemmtrsm_l_sifive_x280_intr,
BLIS_GEMMTRSM_L_UKR, BLIS_DOUBLE, bli_dgemmtrsm_l_sifive_x280_intr,
BLIS_GEMMTRSM_L_UKR, BLIS_SCOMPLEX, bli_cgemmtrsm_l_sifive_x280_intr,
BLIS_GEMMTRSM_L_UKR, BLIS_DCOMPLEX, bli_zgemmtrsm_l_sifive_x280_intr,
BLIS_GEMMTRSM_U_UKR, BLIS_FLOAT, bli_sgemmtrsm_u_sifive_x280_intr,
BLIS_GEMMTRSM_U_UKR, BLIS_DOUBLE, bli_dgemmtrsm_u_sifive_x280_intr,
BLIS_GEMMTRSM_U_UKR, BLIS_SCOMPLEX, bli_cgemmtrsm_u_sifive_x280_intr,
BLIS_GEMMTRSM_U_UKR, BLIS_DCOMPLEX, bli_zgemmtrsm_u_sifive_x280_intr,

BLIS_VA_END
);
Expand Down
2 changes: 1 addition & 1 deletion config/sifive_x280/make_defs.mk
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ endif
ifeq ($(DEBUG_TYPE),noopt)
COPTFLAGS := -O0
else
COPTFLAGS := -Ofast
COPTFLAGS := -O3
endif

# Flags specific to optimized kernels.
Expand Down
Loading

0 comments on commit 50b7117

Please sign in to comment.