diff --git a/.github/cases/blastoise/default.json b/.github/cases/blastoise/default.json index 68280a391..316f7d8dc 100644 --- a/.github/cases/blastoise/default.json +++ b/.github/cases/blastoise/default.json @@ -1,4 +1,5 @@ { + "pytorch.demo": -1, "mlir.rvv_vp_intrinsic_add": 436, "mlir.rvv_vp_intrinsic_add_scalable": 584, "mlir.hello": 146, diff --git a/.github/cases/psyduck/default.json b/.github/cases/psyduck/default.json new file mode 100644 index 000000000..460e5e84f --- /dev/null +++ b/.github/cases/psyduck/default.json @@ -0,0 +1,529 @@ +{ + "asm.mmm": 0, + "asm.smoke": 0, + "codegen.vaadd_vv": 0, + "codegen.vaadd_vx": 0, + "codegen.vaaddu_vv": 0, + "codegen.vaaddu_vx": 0, + "codegen.vadc_vim": 0, + "codegen.vadc_vvm": 0, + "codegen.vadc_vxm": 0, + "codegen.vadd_vi": 0, + "codegen.vadd_vv": 0, + "codegen.vadd_vx": 0, + "codegen.vand_vi": 0, + "codegen.vand_vv": 0, + "codegen.vand_vx": 0, + "codegen.vandn_vv": 0, + "codegen.vandn_vx": 0, + "codegen.vasub_vv": 0, + "codegen.vasub_vx": 0, + "codegen.vasubu_vv": 0, + "codegen.vasubu_vx": 0, + "codegen.vbrev8_v": 0, + "codegen.vbrev_v": 0, + "codegen.vclz_v": 0, + "codegen.vcompress_vm": 0, + "codegen.vcpop_m": 0, + "codegen.vcpop_v": 0, + "codegen.vctz_v": 0, + "codegen.vdiv_vv": 0, + "codegen.vdiv_vx": 0, + "codegen.vdivu_vv": 0, + "codegen.vdivu_vx": 0, + "codegen.vfadd_vf": 0, + "codegen.vfadd_vv": 0, + "codegen.vfclass_v": 0, + "codegen.vfcvt_f_x_v": 0, + "codegen.vfcvt_f_xu_v": 0, + "codegen.vfcvt_rtz_x_f_v": 0, + "codegen.vfcvt_rtz_xu_f_v": 0, + "codegen.vfcvt_x_f_v": 0, + "codegen.vfcvt_xu_f_v": 0, + "codegen.vfdiv_vf": 0, + "codegen.vfdiv_vv": 0, + "codegen.vfirst_m": 0, + "codegen.vfmacc_vf": 0, + "codegen.vfmacc_vv": 0, + "codegen.vfmadd_vf": 0, + "codegen.vfmadd_vv": 0, + "codegen.vfmax_vf": 0, + "codegen.vfmax_vv": 0, + "codegen.vfmerge_vfm": 0, + "codegen.vfmin_vf": 0, + "codegen.vfmin_vv": 0, + "codegen.vfmsac_vf": 0, + "codegen.vfmsac_vv": 0, + "codegen.vfmsub_vf": 0, + "codegen.vfmsub_vv": 0, + "codegen.vfmul_vf": 0, + "codegen.vfmul_vv": 0, + "codegen.vfmv_f_s": 0, + "codegen.vfmv_s_f": 0, + "codegen.vfmv_v_f": 0, + "codegen.vfnmacc_vf": 0, + "codegen.vfnmacc_vv": 0, + "codegen.vfnmadd_vf": 0, + "codegen.vfnmadd_vv": 0, + "codegen.vfnmsac_vf": 0, + "codegen.vfnmsac_vv": 0, + "codegen.vfnmsub_vf": 0, + "codegen.vfnmsub_vv": 0, + "codegen.vfrdiv_vf": 0, + "codegen.vfrec7_v": 0, + "codegen.vfredmax_vs": 0, + "codegen.vfredmin_vs": 0, + "codegen.vfredosum_vs": 0, + "codegen.vfredusum_vs": 0, + "codegen.vfrsqrt7_v": 0, + "codegen.vfrsub_vf": 0, + "codegen.vfsgnj_vf": 0, + "codegen.vfsgnj_vv": 0, + "codegen.vfsgnjn_vf": 0, + "codegen.vfsgnjn_vv": 0, + "codegen.vfsgnjx_vf": 0, + "codegen.vfsgnjx_vv": 0, + "codegen.vfsqrt_v": 0, + "codegen.vfsub_vf": 0, + "codegen.vfsub_vv": 0, + "codegen.vid_v": 0, + "codegen.viota_m": 0, + "codegen.vl1re16_v": 0, + "codegen.vl1re32_v": 0, + "codegen.vl1re8_v": 0, + "codegen.vl2re16_v": 0, + "codegen.vl2re32_v": 0, + "codegen.vl2re8_v": 0, + "codegen.vl4re16_v": 0, + "codegen.vl4re32_v": 0, + "codegen.vl4re8_v": 0, + "codegen.vl8re16_v": 0, + "codegen.vl8re32_v": 0, + "codegen.vl8re8_v": 0, + "codegen.vle16_v": 0, + "codegen.vle16ff_v": 0, + "codegen.vle32_v": 0, + "codegen.vle32ff_v": 0, + "codegen.vle8_v": 0, + "codegen.vle8ff_v": 0, + "codegen.vlm_v": 0, + "codegen.vloxei16_v": 0, + "codegen.vloxei32_v": 0, + "codegen.vloxei8_v": 0, + "codegen.vloxseg2ei16_v": 0, + "codegen.vloxseg2ei32_v": 0, + "codegen.vloxseg2ei8_v": 0, + "codegen.vloxseg3ei16_v": 0, + "codegen.vloxseg3ei32_v": 0, + "codegen.vloxseg3ei8_v": 0, + "codegen.vloxseg4ei16_v": 0, + "codegen.vloxseg4ei32_v": 0, + "codegen.vloxseg4ei8_v": 0, + "codegen.vloxseg5ei16_v": 0, + "codegen.vloxseg5ei32_v": 0, + "codegen.vloxseg5ei8_v": 0, + "codegen.vloxseg6ei16_v": 0, + "codegen.vloxseg6ei32_v": 0, + "codegen.vloxseg6ei8_v": 0, + "codegen.vloxseg7ei16_v": 0, + "codegen.vloxseg7ei32_v": 0, + "codegen.vloxseg7ei8_v": 0, + "codegen.vloxseg8ei16_v": 0, + "codegen.vloxseg8ei32_v": 0, + "codegen.vloxseg8ei8_v": 0, + "codegen.vlse16_v": 0, + "codegen.vlse32_v": 0, + "codegen.vlse8_v": 0, + "codegen.vlseg2e16_v": 0, + "codegen.vlseg2e32_v": 0, + "codegen.vlseg2e8_v": 0, + "codegen.vlseg3e16_v": 0, + "codegen.vlseg3e32_v": 0, + "codegen.vlseg3e8_v": 0, + "codegen.vlseg4e16_v": 0, + "codegen.vlseg4e32_v": 0, + "codegen.vlseg4e8_v": 0, + "codegen.vlseg5e16_v": 0, + "codegen.vlseg5e32_v": 0, + "codegen.vlseg5e8_v": 0, + "codegen.vlseg6e16_v": 0, + "codegen.vlseg6e32_v": 0, + "codegen.vlseg6e8_v": 0, + "codegen.vlseg7e16_v": 0, + "codegen.vlseg7e32_v": 0, + "codegen.vlseg7e8_v": 0, + "codegen.vlseg8e16_v": 0, + "codegen.vlseg8e32_v": 0, + "codegen.vlseg8e8_v": 0, + "codegen.vlsseg2e16_v": 0, + "codegen.vlsseg2e32_v": 0, + "codegen.vlsseg2e8_v": 0, + "codegen.vlsseg3e16_v": 0, + "codegen.vlsseg3e32_v": 0, + "codegen.vlsseg3e8_v": 0, + "codegen.vlsseg4e16_v": 0, + "codegen.vlsseg4e32_v": 0, + "codegen.vlsseg4e8_v": 0, + "codegen.vlsseg5e16_v": 0, + "codegen.vlsseg5e32_v": 0, + "codegen.vlsseg5e8_v": 0, + "codegen.vlsseg6e16_v": 0, + "codegen.vlsseg6e32_v": 0, + "codegen.vlsseg6e8_v": 0, + "codegen.vlsseg7e16_v": 0, + "codegen.vlsseg7e32_v": 0, + "codegen.vlsseg7e8_v": 0, + "codegen.vlsseg8e16_v": 0, + "codegen.vlsseg8e32_v": 0, + "codegen.vlsseg8e8_v": 0, + "codegen.vluxei16_v": 0, + "codegen.vluxei32_v": 0, + "codegen.vluxei8_v": 0, + "codegen.vluxseg2ei16_v": 0, + "codegen.vluxseg2ei32_v": 0, + "codegen.vluxseg2ei8_v": 0, + "codegen.vluxseg3ei16_v": 0, + "codegen.vluxseg3ei32_v": 0, + "codegen.vluxseg3ei8_v": 0, + "codegen.vluxseg4ei16_v": 0, + "codegen.vluxseg4ei32_v": 0, + "codegen.vluxseg4ei8_v": 0, + "codegen.vluxseg5ei16_v": 0, + "codegen.vluxseg5ei32_v": 0, + "codegen.vluxseg5ei8_v": 0, + "codegen.vluxseg6ei16_v": 0, + "codegen.vluxseg6ei32_v": 0, + "codegen.vluxseg6ei8_v": 0, + "codegen.vluxseg7ei16_v": 0, + "codegen.vluxseg7ei32_v": 0, + "codegen.vluxseg7ei8_v": 0, + "codegen.vluxseg8ei16_v": 0, + "codegen.vluxseg8ei32_v": 0, + "codegen.vluxseg8ei8_v": 0, + "codegen.vmacc_vv": 0, + "codegen.vmacc_vx": 0, + "codegen.vmadc_vi": 0, + "codegen.vmadc_vim": 0, + "codegen.vmadc_vv": 0, + "codegen.vmadc_vvm": 0, + "codegen.vmadc_vx": 0, + "codegen.vmadc_vxm": 0, + "codegen.vmadd_vv": 0, + "codegen.vmadd_vx": 0, + "codegen.vmand_mm": 0, + "codegen.vmandn_mm": 0, + "codegen.vmax_vv": 0, + "codegen.vmax_vx": 0, + "codegen.vmaxu_vv": 0, + "codegen.vmaxu_vx": 0, + "codegen.vmerge_vim": 0, + "codegen.vmerge_vvm": 0, + "codegen.vmerge_vxm": 0, + "codegen.vmfeq_vf": 0, + "codegen.vmfeq_vv": 0, + "codegen.vmfge_vf": 0, + "codegen.vmfgt_vf": 0, + "codegen.vmflt_vf": 0, + "codegen.vmflt_vv": 0, + "codegen.vmfne_vf": 0, + "codegen.vmfne_vv": 0, + "codegen.vmin_vv": 0, + "codegen.vmin_vx": 0, + "codegen.vminu_vv": 0, + "codegen.vminu_vx": 0, + "codegen.vmnand_mm": 0, + "codegen.vmnor_mm": 0, + "codegen.vmor_mm": 0, + "codegen.vmorn_mm": 0, + "codegen.vmsbc_vv": 0, + "codegen.vmsbc_vvm": 0, + "codegen.vmsbc_vx": 0, + "codegen.vmsbc_vxm": 0, + "codegen.vmsbf_m": 0, + "codegen.vmseq_vi": 0, + "codegen.vmseq_vv": 0, + "codegen.vmseq_vx": 0, + "codegen.vmsgt_vi": 0, + "codegen.vmsgt_vv": 0, + "codegen.vmsgt_vx": 0, + "codegen.vmsgtu_vi": 0, + "codegen.vmsgtu_vv": 0, + "codegen.vmsgtu_vx": 0, + "codegen.vmsif_m": 0, + "codegen.vmsle_vi": 0, + "codegen.vmsle_vv": 0, + "codegen.vmsle_vx": 0, + "codegen.vmsleu_vi": 0, + "codegen.vmsleu_vv": 0, + "codegen.vmsleu_vx": 0, + "codegen.vmslt_vv": 0, + "codegen.vmslt_vx": 0, + "codegen.vmsltu_vv": 0, + "codegen.vmsltu_vx": 0, + "codegen.vmsne_vi": 0, + "codegen.vmsne_vv": 0, + "codegen.vmsne_vx": 0, + "codegen.vmsof_m": 0, + "codegen.vmul_vv": 0, + "codegen.vmul_vx": 0, + "codegen.vmulh_vv": 0, + "codegen.vmulh_vx": 0, + "codegen.vmulhsu_vv": 0, + "codegen.vmulhsu_vx": 0, + "codegen.vmulhu_vv": 0, + "codegen.vmulhu_vx": 0, + "codegen.vmv1r_v": 0, + "codegen.vmv2r_v": 0, + "codegen.vmv4r_v": 0, + "codegen.vmv8r_v": 0, + "codegen.vmv_s_x": 0, + "codegen.vmv_v_i": 0, + "codegen.vmv_v_v": 0, + "codegen.vmv_v_x": 0, + "codegen.vmv_x_s": 0, + "codegen.vmxnor_mm": 0, + "codegen.vmxor_mm": 0, + "codegen.vnclip_wi": 0, + "codegen.vnclip_wv": 0, + "codegen.vnclip_wx": 0, + "codegen.vnclipu_wi": 0, + "codegen.vnclipu_wv": 0, + "codegen.vnclipu_wx": 0, + "codegen.vnmsac_vv": 0, + "codegen.vnmsac_vx": 0, + "codegen.vnmsub_vv": 0, + "codegen.vnmsub_vx": 0, + "codegen.vnsra_wi": 0, + "codegen.vnsra_wv": 0, + "codegen.vnsra_wx": 0, + "codegen.vnsrl_wi": 0, + "codegen.vnsrl_wv": 0, + "codegen.vnsrl_wx": 0, + "codegen.vor_vi": 0, + "codegen.vor_vv": 0, + "codegen.vor_vx": 0, + "codegen.vredand_vs": 0, + "codegen.vredmax_vs": 0, + "codegen.vredmaxu_vs": 0, + "codegen.vredmin_vs": 0, + "codegen.vredminu_vs": 0, + "codegen.vredor_vs": 0, + "codegen.vredsum_vs": 0, + "codegen.vredxor_vs": 0, + "codegen.vrem_vv": 0, + "codegen.vrem_vx": 0, + "codegen.vremu_vv": 0, + "codegen.vremu_vx": 0, + "codegen.vrev8_v": 0, + "codegen.vrgather_vi": 0, + "codegen.vrgather_vv": 0, + "codegen.vrgather_vx": 0, + "codegen.vrgatherei16_vv": 0, + "codegen.vrol_vv": 0, + "codegen.vrol_vx": 0, + "codegen.vror_vi": 0, + "codegen.vror_vv": 0, + "codegen.vror_vx": 0, + "codegen.vrsub_vi": 0, + "codegen.vrsub_vx": 0, + "codegen.vs1r_v": 0, + "codegen.vs2r_v": 0, + "codegen.vs4r_v": 0, + "codegen.vs8r_v": 0, + "codegen.vsadd_vi": 0, + "codegen.vsadd_vv": 0, + "codegen.vsadd_vx": 0, + "codegen.vsaddu_vi": 0, + "codegen.vsaddu_vv": 0, + "codegen.vsaddu_vx": 0, + "codegen.vsbc_vvm": 0, + "codegen.vsbc_vxm": 0, + "codegen.vse16_v": 0, + "codegen.vse32_v": 0, + "codegen.vse8_v": 0, + "codegen.vsetivli": 0, + "codegen.vsetvl": 0, + "codegen.vsetvli": 0, + "codegen.vsext_vf2": 0, + "codegen.vsext_vf4": 0, + "codegen.vslide1down_vx": 0, + "codegen.vslide1up_vx": 0, + "codegen.vslidedown_vi": 0, + "codegen.vslidedown_vx": 0, + "codegen.vslideup_vi": 0, + "codegen.vslideup_vx": 0, + "codegen.vsll_vi": 0, + "codegen.vsll_vv": 0, + "codegen.vsll_vx": 0, + "codegen.vsm_v": 0, + "codegen.vsmul_vv": 0, + "codegen.vsmul_vx": 0, + "codegen.vsoxei16_v": 0, + "codegen.vsoxei32_v": 0, + "codegen.vsoxei8_v": 0, + "codegen.vsoxseg2ei16_v": 0, + "codegen.vsoxseg2ei32_v": 0, + "codegen.vsoxseg2ei8_v": 0, + "codegen.vsoxseg3ei16_v": 0, + "codegen.vsoxseg3ei32_v": 0, + "codegen.vsoxseg3ei8_v": 0, + "codegen.vsoxseg4ei16_v": 0, + "codegen.vsoxseg4ei32_v": 0, + "codegen.vsoxseg4ei8_v": 0, + "codegen.vsoxseg5ei16_v": 0, + "codegen.vsoxseg5ei32_v": 0, + "codegen.vsoxseg5ei8_v": 0, + "codegen.vsoxseg6ei16_v": 0, + "codegen.vsoxseg6ei32_v": 0, + "codegen.vsoxseg6ei8_v": 0, + "codegen.vsoxseg7ei16_v": 0, + "codegen.vsoxseg7ei32_v": 0, + "codegen.vsoxseg7ei8_v": 0, + "codegen.vsoxseg8ei16_v": 0, + "codegen.vsoxseg8ei32_v": 0, + "codegen.vsoxseg8ei8_v": 0, + "codegen.vsra_vi": 0, + "codegen.vsra_vv": 0, + "codegen.vsra_vx": 0, + "codegen.vsrl_vi": 0, + "codegen.vsrl_vv": 0, + "codegen.vsrl_vx": 0, + "codegen.vsse16_v": 0, + "codegen.vsse32_v": 0, + "codegen.vsse8_v": 0, + "codegen.vsseg2e16_v": 0, + "codegen.vsseg2e32_v": 0, + "codegen.vsseg2e8_v": 0, + "codegen.vsseg3e16_v": 0, + "codegen.vsseg3e32_v": 0, + "codegen.vsseg3e8_v": 0, + "codegen.vsseg4e16_v": 0, + "codegen.vsseg4e32_v": 0, + "codegen.vsseg4e8_v": 0, + "codegen.vsseg5e16_v": 0, + "codegen.vsseg5e32_v": 0, + "codegen.vsseg5e8_v": 0, + "codegen.vsseg6e16_v": 0, + "codegen.vsseg6e32_v": 0, + "codegen.vsseg6e8_v": 0, + "codegen.vsseg7e16_v": 0, + "codegen.vsseg7e32_v": 0, + "codegen.vsseg7e8_v": 0, + "codegen.vsseg8e16_v": 0, + "codegen.vsseg8e32_v": 0, + "codegen.vsseg8e8_v": 0, + "codegen.vssra_vi": 0, + "codegen.vssra_vv": 0, + "codegen.vssra_vx": 0, + "codegen.vssrl_vi": 0, + "codegen.vssrl_vv": 0, + "codegen.vssrl_vx": 0, + "codegen.vssseg2e16_v": 0, + "codegen.vssseg2e32_v": 0, + "codegen.vssseg2e8_v": 0, + "codegen.vssseg3e16_v": 0, + "codegen.vssseg3e32_v": 0, + "codegen.vssseg3e8_v": 0, + "codegen.vssseg4e16_v": 0, + "codegen.vssseg4e32_v": 0, + "codegen.vssseg4e8_v": 0, + "codegen.vssseg5e16_v": 0, + "codegen.vssseg5e32_v": 0, + "codegen.vssseg5e8_v": 0, + "codegen.vssseg6e16_v": 0, + "codegen.vssseg6e32_v": 0, + "codegen.vssseg6e8_v": 0, + "codegen.vssseg7e16_v": 0, + "codegen.vssseg7e32_v": 0, + "codegen.vssseg7e8_v": 0, + "codegen.vssseg8e16_v": 0, + "codegen.vssseg8e32_v": 0, + "codegen.vssseg8e8_v": 0, + "codegen.vssub_vv": 0, + "codegen.vssub_vx": 0, + "codegen.vssubu_vv": 0, + "codegen.vssubu_vx": 0, + "codegen.vsub_vv": 0, + "codegen.vsub_vx": 0, + "codegen.vsuxei16_v": 0, + "codegen.vsuxei32_v": 0, + "codegen.vsuxei8_v": 0, + "codegen.vsuxseg2ei16_v": 0, + "codegen.vsuxseg2ei32_v": 0, + "codegen.vsuxseg2ei8_v": 0, + "codegen.vsuxseg3ei16_v": 0, + "codegen.vsuxseg3ei32_v": 0, + "codegen.vsuxseg3ei8_v": 0, + "codegen.vsuxseg4ei16_v": 0, + "codegen.vsuxseg4ei32_v": 0, + "codegen.vsuxseg4ei8_v": 0, + "codegen.vsuxseg5ei16_v": 0, + "codegen.vsuxseg5ei32_v": 0, + "codegen.vsuxseg5ei8_v": 0, + "codegen.vsuxseg6ei16_v": 0, + "codegen.vsuxseg6ei32_v": 0, + "codegen.vsuxseg6ei8_v": 0, + "codegen.vsuxseg7ei16_v": 0, + "codegen.vsuxseg7ei32_v": 0, + "codegen.vsuxseg7ei8_v": 0, + "codegen.vsuxseg8ei16_v": 0, + "codegen.vsuxseg8ei32_v": 0, + "codegen.vsuxseg8ei8_v": 0, + "codegen.vwadd_vv": 0, + "codegen.vwadd_vx": 0, + "codegen.vwadd_wv": 0, + "codegen.vwadd_wx": 0, + "codegen.vwaddu_vv": 0, + "codegen.vwaddu_vx": 0, + "codegen.vwaddu_wv": 0, + "codegen.vwaddu_wx": 0, + "codegen.vwmacc_vv": 0, + "codegen.vwmacc_vx": 0, + "codegen.vwmaccsu_vv": 0, + "codegen.vwmaccsu_vx": 0, + "codegen.vwmaccu_vv": 0, + "codegen.vwmaccu_vx": 0, + "codegen.vwmaccus_vx": 0, + "codegen.vwmul_vv": 0, + "codegen.vwmul_vx": 0, + "codegen.vwmulsu_vv": 0, + "codegen.vwmulsu_vx": 0, + "codegen.vwmulu_vv": 0, + "codegen.vwmulu_vx": 0, + "codegen.vwredsum_vs": 0, + "codegen.vwredsumu_vs": 0, + "codegen.vwsll_vi": 0, + "codegen.vwsll_vv": 0, + "codegen.vwsll_vx": 0, + "codegen.vwsub_vv": 0, + "codegen.vwsub_vx": 0, + "codegen.vwsub_wv": 0, + "codegen.vwsub_wx": 0, + "codegen.vwsubu_vv": 0, + "codegen.vwsubu_vx": 0, + "codegen.vwsubu_wv": 0, + "codegen.vwsubu_wx": 0, + "codegen.vxor_vi": 0, + "codegen.vxor_vv": 0, + "codegen.vxor_vx": 0, + "codegen.vzext_vf2": 0, + "codegen.vzext_vf4": 0, + "intrinsic.conv2d_less_m2": 0, + "intrinsic.linear_normalization": 0, + "intrinsic.softmax": 0, + "mlir.hello": 0, + "mlir.rvv_vp_intrinsic_add": 0, + "mlir.rvv_vp_intrinsic_add_scalable": 0, + "mlir.stripmining": 0, + "rvv_bench.ascii_to_utf16": 0, + "rvv_bench.ascii_to_utf32": 0, + "rvv_bench.byteswap": 0, + "rvv_bench.chacha20": 0, + "rvv_bench.mandelbrot": 0, + "rvv_bench.memcpy": 0, + "rvv_bench.memset": 0, + "rvv_bench.mergelines": 0, + "rvv_bench.poly1305": 0, + "rvv_bench.strlen": 0, + "rvv_bench.utf8_count": 0 +} diff --git a/.github/cases/psyduck/perf.json b/.github/cases/psyduck/perf.json new file mode 100644 index 000000000..3527224f9 --- /dev/null +++ b/.github/cases/psyduck/perf.json @@ -0,0 +1,9 @@ +{ + "mlir.conv": 0, + "mlir.matmul": 0, + "mlir.stripmining": 0, + "intrinsic.conv2d_less_m2": 0, + "intrinsic.linear_normalization": 0, + "intrinsic.matmul": 0, + "intrinsic.softmax": 0 +} diff --git a/README.md b/README.md index 78e5c6bb9..43658082b 100644 --- a/README.md +++ b/README.md @@ -232,6 +232,43 @@ If using clion, $ nix develop .#t1..ip.emu -c clion ipemu/csrc ``` +#### Rocket emulator + +Rocket emulator contains multiple build phrase: RTL -> MLIR Bytecode -> +system verilog -> verilated C sources -> Rust emulator. + +Most of the developer doesn't need to care about MLIR, system verilog and verilate detail. +To develop the Rocket-chip RTL, run: + +```bash +# This command provide a environment that contains mill, circt, espresso... development tools. +nix develop '.#t1.elaborator' +``` + +> Metals LSP users are recommended to switch to mill-bsp mode instead of the default bloop mode. + +To elaborate the RTLs, run mill or use the nix chroot: + +```bash +# for development +mill -i elaborator.runMain org.chipsalliance.t1.elaborator.Main +# for clean build +nix build .#t1.rocketv-mlirbc +``` + +To develop the emulator, use the below nix environment: + +```bash +nix develop .#t1.rocketv-emu.driver.devShell +``` + +This will setup the verilated C src in environment, download rust-analyzer. + +```bash +cd rocketemu/driver +cargo build --release +``` + #### Developing Testcases The `tests/` contains the testcases. There are four types of testcases: diff --git a/build.sc b/build.sc index 8e2cc5c3d..17f308090 100644 --- a/build.sc +++ b/build.sc @@ -35,7 +35,7 @@ trait Chisel object arithmetic extends Arithmetic -trait Arithmetic +trait Arithmetic extends millbuild.dependencies.arithmetic.common.ArithmeticModule { override def millSourcePath = os.pwd / "dependencies" / "arithmetic" / "arithmetic" def scalaVersion = T(v.scala) @@ -118,6 +118,38 @@ trait ConfigGen def mainargsIvy = v.mainargs } +object rocketv extends RocketV + +trait RocketV + extends millbuild.common.RocketVModule + with ScalafmtModule { + def scalaVersion = T(v.scala) + def rvdecoderdbModule = rvdecoderdb + def riscvOpcodesPath = T.input(PathRef(os.pwd / "dependencies" / "riscv-opcodes")) + def hardfloatModule = hardfloat + def axi4Module = axi4 + + def chiselModule = Some(chisel) + def chiselPluginJar = T(Some(chisel.pluginModule.jar())) + def chiselPluginIvy = None + def chiselIvy = None +} + +object t1rocket extends T1Rocket + +trait T1Rocket + extends millbuild.common.T1RocketModule + with ScalafmtModule { + def scalaVersion = T(v.scala) + def rocketModule = rocketv + def t1Module = t1 + + def chiselModule = Some(chisel) + def chiselPluginJar = T(Some(chisel.pluginModule.jar())) + def chiselPluginIvy = None + def chiselIvy = None +} + object ipemu extends IPEmulator trait IPEmulator @@ -132,6 +164,32 @@ trait IPEmulator def chiselIvy = None } +object rocketemu extends RocketEmulator +trait RocketEmulator extends millbuild.common.RocketEmulatorModule { + def scalaVersion = T(v.scala) + + def rocketVModule = rocketv + + def chiselModule = Some(chisel) + def chiselPluginJar = T(Some(chisel.pluginModule.jar())) + def chiselPluginIvy = None + def chiselIvy = None +} + +object t1rocketemu extends T1RocketEmulator + +trait T1RocketEmulator + extends millbuild.common.T1RocketEmulatorModule { + def scalaVersion = T(v.scala) + + def t1rocketModule = t1rocket + + def chiselModule = Some(chisel) + def chiselPluginJar = T(Some(chisel.pluginModule.jar())) + def chiselPluginIvy = None + def chiselIvy = None +} + object panamaconverter extends PanamaConverter trait PanamaConverter @@ -157,6 +215,10 @@ trait Elaborator def generators = Seq( t1, ipemu, + rocketv, + rocketemu, + t1rocket, + t1rocketemu, ) def mainargsIvy = v.mainargs diff --git a/common.sc b/common.sc index 21552589d..0f39a2376 100644 --- a/common.sc +++ b/common.sc @@ -70,6 +70,40 @@ trait ConfigGenModule override def ivyDeps = T(super.ivyDeps() ++ Seq(mainargsIvy)) } +// T1 forked version of RocketCore +trait RocketModule + extends ScalaModule + with HasChisel + with HasRVDecoderDB { + def rocketchipModule: ScalaModule + def moduleDeps = super.moduleDeps ++ Seq(rocketchipModule) +} + +// The next generation of purely standalone Rocket Core w/ AXI/CHI. +trait RocketVModule + extends ScalaModule + with HasChisel + with HasRVDecoderDB { + def axi4Module: ScalaModule + def hardfloatModule: ScalaModule + + def moduleDeps = super.moduleDeps ++ Seq(axi4Module, hardfloatModule) +} + +// Link T1 example: RocketV+T1 +trait T1RocketModule + extends ScalaModule + with HasChisel { + def rocketModule: ScalaModule + def t1Module: ScalaModule + + def moduleDeps = super.moduleDeps ++ Seq(rocketModule, t1Module) +} + +trait EmuHelperModule + extends ScalaModule + with HasChisel + trait IPEmulatorModule extends ScalaModule with HasChisel { @@ -77,6 +111,13 @@ trait IPEmulatorModule def moduleDeps = super.moduleDeps ++ Seq(t1Module) } +trait T1RocketEmulatorModule + extends ScalaModule + with HasChisel { + def t1rocketModule: ScalaModule + def moduleDeps = super.moduleDeps ++ Seq(t1rocketModule) +} + trait ElaboratorModule extends ScalaModule with HasChisel { @@ -120,3 +161,10 @@ trait OMReaderModule super.forkArgs() ++ Seq("--enable-native-access=ALL-UNNAMED", "--enable-preview", s"-Djava.library.path=${ circtInstallPath().path / "lib"}") ) } + +trait RocketEmulatorModule + extends ScalaModule + with HasChisel { + def rocketVModule: ScalaModule + def moduleDeps = super.moduleDeps ++ Seq(rocketVModule) +} diff --git a/configgen/generated/blastoise.json b/configgen/generated/blastoise.json index d0e26eb82..290ef86c1 100644 --- a/configgen/generated/blastoise.json +++ b/configgen/generated/blastoise.json @@ -6,86 +6,6 @@ "Zve32f" ], "t1customInstructions": [], - "lsuBankParameters": [ - { - "name": "scalar", - "region": "b00??????????????????????????????", - "beatbyte": 8, - "accessScalar": true - }, - { - "name": "ddrBank0", - "region": "b01???????????????????????00?????\nb10???????????????????????00?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "ddrBank1", - "region": "b01???????????????????????01?????\nb10???????????????????????01?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "ddrBank2", - "region": "b01???????????????????????10?????\nb10???????????????????????10?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "ddrBank3", - "region": "b01???????????????????????11?????\nb10???????????????????????11?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank0", - "region": "b11000000000?????????????000?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank1", - "region": "b11000000000?????????????001?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank2", - "region": "b11000000000?????????????010?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank3", - "region": "b11000000000?????????????011?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank4", - "region": "b11000000000?????????????100?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank5", - "region": "b11000000000?????????????101?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank6", - "region": "b11000000000?????????????110?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank7", - "region": "b11000000000?????????????111?????", - "beatbyte": 8, - "accessScalar": false - } - ], "vrfBankSize": 1, "vrfRamType": "org.chipsalliance.t1.rtl.vrf.RamType.p0rwp1rw", "vfuInstantiateParameter": { @@ -246,7 +166,8 @@ 3 ] ] - ] + ], + "zvbbModuleParameters": [] } }, "generator": "org.chipsalliance.t1.rtl.T1" diff --git a/configgen/generated/machamp.json b/configgen/generated/machamp.json index dc0a4b2d9..ceeaf5e59 100644 --- a/configgen/generated/machamp.json +++ b/configgen/generated/machamp.json @@ -6,86 +6,6 @@ "Zve32x" ], "t1customInstructions": [], - "lsuBankParameters": [ - { - "name": "scalar", - "region": "b00??????????????????????????????", - "beatbyte": 8, - "accessScalar": true - }, - { - "name": "ddrBank0", - "region": "b01??????????????????????00??????\nb10??????????????????????00??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "ddrBank1", - "region": "b01??????????????????????01??????\nb10??????????????????????01??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "ddrBank2", - "region": "b01??????????????????????10??????\nb10??????????????????????10??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "ddrBank3", - "region": "b01??????????????????????11??????\nb10??????????????????????11??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank0", - "region": "b11000000000????????????000??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank1", - "region": "b11000000000????????????001??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank2", - "region": "b11000000000????????????010??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank3", - "region": "b11000000000????????????011??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank4", - "region": "b11000000000????????????100??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank5", - "region": "b11000000000????????????101??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank6", - "region": "b11000000000????????????110??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank7", - "region": "b11000000000????????????111??????", - "beatbyte": 8, - "accessScalar": false - } - ], "vrfBankSize": 2, "vrfRamType": "org.chipsalliance.t1.rtl.vrf.RamType.p0rp1w", "vfuInstantiateParameter": { @@ -230,7 +150,8 @@ ] ] ], - "floatModuleParameters": [] + "floatModuleParameters": [], + "zvbbModuleParameters": [] } }, "generator": "org.chipsalliance.t1.rtl.T1" diff --git a/configgen/generated/psyduck.json b/configgen/generated/psyduck.json new file mode 100644 index 000000000..04a2f3572 --- /dev/null +++ b/configgen/generated/psyduck.json @@ -0,0 +1,191 @@ +{ + "parameter": { + "vLen": 512, + "dLen": 256, + "extensions": [ + "Zve32f", + "Zvbb" + ], + "t1customInstructions": [], + "vrfBankSize": 1, + "vrfRamType": "org.chipsalliance.t1.rtl.vrf.RamType.p0rwp1rw", + "vfuInstantiateParameter": { + "slotCount": 4, + "logicModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 1 + }, + "generator": "org.chipsalliance.t1.rtl.MaskedLogic" + }, + [ + 0, + 1, + 2, + 3 + ] + ] + ], + "aluModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 1 + }, + "generator": "org.chipsalliance.t1.rtl.LaneAdder" + }, + [ + 0 + ] + ], + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 1 + }, + "generator": "org.chipsalliance.t1.rtl.LaneAdder" + }, + [ + 1 + ] + ], + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 1 + }, + "generator": "org.chipsalliance.t1.rtl.LaneAdder" + }, + [ + 2 + ] + ], + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 1 + }, + "generator": "org.chipsalliance.t1.rtl.LaneAdder" + }, + [ + 3 + ] + ] + ], + "shifterModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 1 + }, + "generator": "org.chipsalliance.t1.rtl.LaneShifter" + }, + [ + 0, + 1, + 2, + 3 + ] + ] + ], + "mulModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 2 + }, + "generator": "org.chipsalliance.t1.rtl.LaneMul" + }, + [ + 0, + 1, + 2, + 3 + ] + ] + ], + "divModuleParameters": [], + "divfpModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 1 + }, + "generator": "org.chipsalliance.t1.rtl.LaneDivFP" + }, + [ + 0, + 1, + 2, + 3 + ] + ] + ], + "otherModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "vlMaxBits": 10, + "groupNumberBits": 4, + "laneNumberBits": 3, + "dataPathByteWidth": 4, + "latency": 1 + }, + "generator": "org.chipsalliance.t1.rtl.OtherUnit" + }, + [ + 0, + 1, + 2, + 3 + ] + ] + ], + "floatModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 3 + }, + "generator": "org.chipsalliance.t1.rtl.LaneFloat" + }, + [ + 0, + 1, + 2, + 3 + ] + ] + ], + "zvbbModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 3 + }, + "generator": "org.chipsalliance.t1.rtl.LaneZvbb" + }, + [ + 0, + 1, + 2, + 3 + ] + ] + ] + } + }, + "generator": "org.chipsalliance.t1.rtl.T1" +} \ No newline at end of file diff --git a/configgen/generated/sandslash.json b/configgen/generated/sandslash.json index 5ae0cb6b3..688085fe1 100644 --- a/configgen/generated/sandslash.json +++ b/configgen/generated/sandslash.json @@ -6,134 +6,6 @@ "Zve32x" ], "t1customInstructions": [], - "lsuBankParameters": [ - { - "name": "scalar", - "region": "b00??????????????????????????????", - "beatbyte": 8, - "accessScalar": true - }, - { - "name": "ddrBank0", - "region": "b01?????????????????????00???????\nb10?????????????????????00???????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "ddrBank1", - "region": "b01?????????????????????01???????\nb10?????????????????????01???????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "ddrBank2", - "region": "b01?????????????????????10???????\nb10?????????????????????10???????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "ddrBank3", - "region": "b01?????????????????????11???????\nb10?????????????????????11???????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank0", - "region": "b1100000000?????????0000?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank1", - "region": "b1100000000?????????0001?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank2", - "region": "b1100000000?????????0010?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank3", - "region": "b1100000000?????????0011?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank4", - "region": "b1100000000?????????0100?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank5", - "region": "b1100000000?????????0101?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank6", - "region": "b1100000000?????????0110?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank7", - "region": "b1100000000?????????0111?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank8", - "region": "b1100000000?????????1000?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank9", - "region": "b1100000000?????????1001?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank10", - "region": "b1100000000?????????1010?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank11", - "region": "b1100000000?????????1011?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank12", - "region": "b1100000000?????????1100?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank13", - "region": "b1100000000?????????1101?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank14", - "region": "b1100000000?????????1110?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank15", - "region": "b1100000000?????????1111?????????", - "beatbyte": 8, - "accessScalar": false - } - ], "vrfBankSize": 4, "vrfRamType": "org.chipsalliance.t1.rtl.vrf.RamType.p0rw", "vfuInstantiateParameter": { @@ -278,7 +150,8 @@ ] ] ], - "floatModuleParameters": [] + "floatModuleParameters": [], + "zvbbModuleParameters": [] } }, "generator": "org.chipsalliance.t1.rtl.T1" diff --git a/configgen/src/Main.scala b/configgen/src/Main.scala index c48760fef..88e3bc326 100644 --- a/configgen/src/Main.scala +++ b/configgen/src/Main.scala @@ -99,7 +99,59 @@ object Main { ), Seq(0, 1, 2, 3))), floatModuleParameters = - Seq((SerializableModuleGenerator(classOf[LaneFloat], LaneFloatParam(32, 3)), Seq(0, 1, 2, 3))) + Seq((SerializableModuleGenerator(classOf[LaneFloat], LaneFloatParam(32, 3)), Seq(0, 1, 2, 3))), + zvbbModuleParameters = Seq() + ) + ) + if (doEmit) param.emit(targetFile) + param + } + + // DLEN256 VLEN256; FP; VRF p0rw,p1rw bank1; LSU bank8 beatbyte 8; Zvbb + @main def psyduck( + @arg(name = "target-file", short = 't') targetFile: os.Path, + @arg(name = "emit", short = 'e', doc = "emit config") doEmit: Boolean = true + ): T1Parameter = { + val vLen = 512 + val dLen = 256 + val param = T1Parameter( + vLen, + dLen, + extensions = Seq("Zve32f", "Zvbb"), + t1customInstructions = Nil, + vrfBankSize = 1, + vrfRamType = RamType.p0rwp1rw, + vfuInstantiateParameter = VFUInstantiateParameter( + slotCount = 4, + logicModuleParameters = Seq( + (SerializableModuleGenerator(classOf[MaskedLogic], LogicParam(32, 1)), Seq(0, 1, 2, 3)) + ), + aluModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(0)), + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(1)), + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(2)), + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(3)) + ), + shifterModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneShifter], LaneShifterParameter(32, 1)), Seq(0, 1, 2, 3)) + ), + mulModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneMul], LaneMulParam(32, 2)), Seq(0, 1, 2, 3)) + ), + divModuleParameters = Seq(), + divfpModuleParameters = + Seq((SerializableModuleGenerator(classOf[LaneDivFP], LaneDivFPParam(32, 1)), Seq(0, 1, 2, 3))), + otherModuleParameters = + Seq(( + SerializableModuleGenerator( + classOf[OtherUnit], + OtherUnitParam(32, log2Ceil(vLen) + 1, log2Ceil(vLen * 8 / dLen), log2Ceil(dLen / 32), 4, 1) + ), + Seq(0, 1, 2, 3))), + floatModuleParameters = + Seq((SerializableModuleGenerator(classOf[LaneFloat], LaneFloatParam(32, 3)), Seq(0, 1, 2, 3))), + zvbbModuleParameters = + Seq((SerializableModuleGenerator(classOf[LaneZvbb], LaneZvbbParam(32, 3)), Seq(0, 1, 2, 3))) ) ) if (doEmit) param.emit(targetFile) @@ -148,7 +200,8 @@ object Main { OtherUnitParam(32, log2Ceil(vLen) + 1, log2Ceil(vLen * 8 / dLen), log2Ceil(dLen / 32), 4, 1) ), Seq(0, 1, 2, 3))), - floatModuleParameters = Seq() + floatModuleParameters = Seq(), + zvbbModuleParameters = Seq() // TODO ) ) if (doEmit) param.emit(targetFile) @@ -197,7 +250,8 @@ object Main { OtherUnitParam(32, log2Ceil(vLen) + 1, log2Ceil(vLen * 8 / dLen), log2Ceil(dLen / 32), 4, 1) ), Seq(0, 1, 2, 3))), - floatModuleParameters = Seq() + floatModuleParameters = Seq(), + zvbbModuleParameters = Seq() // TODO ) ) if (doEmit) param.emit(targetFile) diff --git a/difftest/online_dpi/src/dpi.rs b/difftest/online_dpi/src/dpi.rs index 9becfc1e0..464e007a9 100644 --- a/difftest/online_dpi/src/dpi.rs +++ b/difftest/online_dpi/src/dpi.rs @@ -2,7 +2,7 @@ #![allow(unused_variables)] use clap::Parser; -use std::ffi::{c_char, c_longlong, CString}; +use std::ffi::{c_char, c_longlong}; use std::sync::Mutex; use tracing::debug; @@ -47,7 +47,7 @@ unsafe fn load_from_payload<'a>( let data = &byte_vec[strb_width_in_byte..]; let strb_width_in_bit = std::cmp::min(8, data_width_in_byte); - let mut masks: Vec = strobe + let masks: Vec = strobe .into_iter() .flat_map(|strb| { let mask: Vec = (0..strb_width_in_bit).map(|i| (strb & (1 << i)) != 0).collect(); @@ -271,10 +271,10 @@ unsafe extern "C" fn retire_vector_mem(dummy: *const SvBitVecVal) { // import functions and wrappers //-------------------------------- +#[cfg(feature = "trace")] mod dpi_export { use std::ffi::c_char; extern "C" { - #[cfg(feature = "trace")] /// `export "DPI-C" function dump_wave(input string file)` pub fn dump_wave(path: *const c_char); } @@ -283,6 +283,7 @@ mod dpi_export { #[cfg(feature = "trace")] pub(crate) fn dump_wave(scope: crate::svdpi::SvScope, path: &str) { use crate::svdpi; + use std::ffi::CString; let path_cstring = CString::new(path).unwrap(); svdpi::set_scope(scope); diff --git a/difftest/online_dpi/src/drive.rs b/difftest/online_dpi/src/drive.rs index 5615b157b..5cc9fb78b 100644 --- a/difftest/online_dpi/src/drive.rs +++ b/difftest/online_dpi/src/drive.rs @@ -99,6 +99,7 @@ pub(crate) struct Driver { spike_runner: SpikeRunner, // SvScope from t1_cosim_init + #[cfg(feature = "trace")] scope: SvScope, #[cfg(feature = "trace")] @@ -161,8 +162,9 @@ impl Driver { let mut self_ = Self { spike_runner: SpikeRunner::new(&args.common_args, false), - scope, + #[cfg(feature = "trace")] + scope, #[cfg(feature = "trace")] wave_path: args.wave_path.to_owned(), #[cfg(feature = "trace")] diff --git a/elaborator/src/Elaborator.scala b/elaborator/src/Elaborator.scala new file mode 100644 index 000000000..f4e080865 --- /dev/null +++ b/elaborator/src/Elaborator.scala @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator + +import chisel3.RawModule +import chisel3.experimental.{SerializableModule, SerializableModuleGenerator, SerializableModuleParameter} +import mainargs.TokensReader + +import scala.reflect.runtime.universe +import scala.reflect.runtime.universe.{runtimeMirror, typeOf} + +// TODO: this will be upstreamed to Chisel +trait Elaborator { + implicit object PathRead extends TokensReader.Simple[os.Path] { + def shortName = "path" + def read(strs: Seq[String]) = Right(os.Path(strs.head, os.pwd)) + } + + def configImpl[P <: SerializableModuleParameter: universe.TypeTag]( + parameter: P + )(implicit rwP: upickle.default.Writer[P]) = os.write.over( + os.pwd / s"${getClass.getSimpleName.replace("$", "")}.json", + upickle.default.write(parameter) + ) + + def designImpl[ + M <: SerializableModule[P]: universe.TypeTag, + P <: SerializableModuleParameter: universe.TypeTag + ](parameter: os.Path, runFirtool: Boolean)(implicit + rwP: upickle.default.Reader[P] + ) = { + var fir: firrtl.ir.Circuit = null + val annos = Seq( + new chisel3.stage.phases.Elaborate, + new chisel3.stage.phases.Convert + ).foldLeft( + Seq( + chisel3.stage.ChiselGeneratorAnnotation(() => + SerializableModuleGenerator( + runtimeMirror(getClass.getClassLoader) + .runtimeClass(typeOf[M].typeSymbol.asClass) + .asInstanceOf[Class[M]], + upickle.default.read[P](os.read(parameter)) + ).module().asInstanceOf[RawModule] + ) + ): firrtl.AnnotationSeq + ) { case (annos, stage) => stage.transform(annos) } + .flatMap { + case firrtl.stage.FirrtlCircuitAnnotation(circuit) => + fir = circuit + None + case _: chisel3.stage.DesignAnnotation[_] => None + case _: chisel3.stage.ChiselCircuitAnnotation => None + case a => Some(a) + } + val annoJsonFile = os.pwd / s"${fir.main}.anno.json" + val firFile = os.pwd / s"${fir.main}.fir" + val svFile = os.pwd / s"${fir.main}.sv" + os.write.over(firFile, fir.serialize) + os.write.over( + annoJsonFile, + firrtl.annotations.JsonProtocol.serializeRecover(annos) + ) + if (runFirtool) { + os.proc( + "firtool", + s"--annotation-file=${annoJsonFile}", + s"${firFile}", + s"-o", + s"${svFile}", + "--strip-debug-info", + "--verification-flavor=sva", + "--extract-test-code" + ).call(os.pwd) + } + } +} diff --git a/elaborator/src/Main.scala b/elaborator/src/Main.scala index 01eea2b24..8cd5bc48f 100644 --- a/elaborator/src/Main.scala +++ b/elaborator/src/Main.scala @@ -5,7 +5,9 @@ package org.chipsalliance.t1.elaborator import mainargs._ import org.chipsalliance.t1.rtl.T1Parameter +import org.chipsalliance.rocketv.RocketTileParameter import chisel3.panamalib.option._ +import org.chipsalliance.t1.tile.T1RocketTileParameter object Main { implicit object PathRead extends TokensReader.Simple[os.Path] { @@ -27,11 +29,15 @@ object Main { ).foldLeft( Seq( chisel3.stage.ChiselGeneratorAnnotation(gen), - chisel3.panamaconverter.stage.FirtoolOptionsAnnotation(FirtoolOptions(Set( - BuildMode(BuildModeDebug), - PreserveValues(PreserveValuesModeNamed), - DisableUnknownAnnotations(true) - ))), + chisel3.panamaconverter.stage.FirtoolOptionsAnnotation( + FirtoolOptions( + Set( + BuildMode(BuildModeDebug), + PreserveValues(PreserveValuesModeNamed), + DisableUnknownAnnotations(true) + ) + ) + ) ): firrtl.AnnotationSeq ) { case (annos, stage) => stage.transform(annos) } .flatMap { @@ -41,9 +47,9 @@ object Main { case chisel3.panamaconverter.stage.PanamaCIRCTConverterAnnotation(converter) => if (binderMlirbcOut.nonEmpty) panamaCIRCTConverter = converter None - case _: chisel3.panamaconverter.stage.FirtoolOptionsAnnotation => None - case _: chisel3.stage.DesignAnnotation[_] => None - case _: chisel3.stage.ChiselCircuitAnnotation => None + case _: chisel3.panamaconverter.stage.FirtoolOptionsAnnotation => None + case _: chisel3.stage.DesignAnnotation[_] => None + case _: chisel3.stage.ChiselCircuitAnnotation => None case a => Some(a) } @@ -62,20 +68,48 @@ object Main { case class IPConfig( @arg(name = "ip-config", short = 'c') ipConfig: os.Path) { def generator = upickle.default - .read[chisel3.experimental.SerializableModuleGenerator[org.chipsalliance.t1.rtl.T1, org.chipsalliance.t1.rtl.T1Parameter]](ujson.read(os.read(ipConfig))) + .read[chisel3.experimental.SerializableModuleGenerator[ + org.chipsalliance.t1.rtl.T1, + org.chipsalliance.t1.rtl.T1Parameter + ]](ujson.read(os.read(ipConfig))) def parameter: T1Parameter = generator.parameter } - implicit def ipConfig: ParserForClass[IPConfig] = ParserForClass[IPConfig] + case class RocketConfig( + @arg(name = "rocket-config", short = 'c') rocketConfig: os.Path) { + def generator = upickle.default + .read[chisel3.experimental.SerializableModuleGenerator[ + org.chipsalliance.rocketv.RocketTile, + org.chipsalliance.rocketv.RocketTileParameter + ]](ujson.read(os.read(rocketConfig))) + def parameter: RocketTileParameter = generator.parameter + } + + case class T1RocketConfig( + @arg(name = "t1rocket-config", short = 'c') rocketConfig: os.Path) { + def generator = upickle.default + .read[chisel3.experimental.SerializableModuleGenerator[ + org.chipsalliance.t1.tile.T1RocketTile, + org.chipsalliance.t1.tile.T1RocketTileParameter + ]](ujson.read(os.read(rocketConfig))) + def parameter: T1RocketTileParameter = generator.parameter + } + + implicit def ipConfig: ParserForClass[IPConfig] = ParserForClass[IPConfig] + implicit def rocketConfig: ParserForClass[RocketConfig] = ParserForClass[RocketConfig] + implicit def t1RocketConfig: ParserForClass[T1RocketConfig] = ParserForClass[T1RocketConfig] + + @main def ip(elaborateConfig: ElaborateConfig, ipConfig: IPConfig): Unit = + elaborateConfig.elaborate(() => ipConfig.generator.module()) + + @main def ipemu(elaborateConfig: ElaborateConfig, ipConfig: IPConfig): Unit = + elaborateConfig.elaborate(() => new org.chipsalliance.t1.ipemu.TestBench(ipConfig.generator)) + + @main def rocketemu(elaborateConfig: ElaborateConfig, rocketConfig: RocketConfig): Unit = + elaborateConfig.elaborate(() => new org.chipsalliance.t1.rocketv.TestBench(rocketConfig.generator)) - // format: off - @main def ip(elaborateConfig: ElaborateConfig, ipConfig: IPConfig): Unit = elaborateConfig.elaborate(() => - ipConfig.generator.module() - ) - @main def ipemu(elaborateConfig: ElaborateConfig, ipConfig: IPConfig): Unit = elaborateConfig.elaborate(() => - new org.chipsalliance.t1.ipemu.TestBench(ipConfig.generator) - ) - // format: on + @main def t1rocketemu(elaborateConfig: ElaborateConfig, t1rocketConfig: T1RocketConfig): Unit = + elaborateConfig.elaborate(() => new org.chipsalliance.t1.t1rocketemu.TestBench(t1rocketConfig.generator)) def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) } diff --git a/elaborator/src/rocketv/ALU.scala b/elaborator/src/rocketv/ALU.scala new file mode 100644 index 000000000..05ad596b5 --- /dev/null +++ b/elaborator/src/rocketv/ALU.scala @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{ALU, ALUParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object ALU extends Elaborator { + @main + case class ALUParameterMain( + @arg(name = "xLen") xLen: Int) { + def convert: ALUParameter = ALUParameter(xLen) + } + + implicit def ALUParameterMainParser: ParserForClass[ALUParameterMain] = ParserForClass[ALUParameterMain] + + @main + def config(@arg(name = "parameter") parameter: ALUParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[ALU, ALUParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/elaborator/src/rocketv/AMOALU.scala b/elaborator/src/rocketv/AMOALU.scala new file mode 100644 index 000000000..148936db5 --- /dev/null +++ b/elaborator/src/rocketv/AMOALU.scala @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{AMOALU, AMOALUParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object AMOALU extends Elaborator { + @main + case class AMOALUParameterMain( + @arg(name = "operandBits") operandBits: Int) { + def convert: AMOALUParameter = AMOALUParameter(operandBits) + } + + implicit def AMOALUParameterMainParser: ParserForClass[AMOALUParameterMain] = ParserForClass[AMOALUParameterMain] + + @main + def config(@arg(name = "parameter") parameter: AMOALUParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[AMOALU, AMOALUParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/elaborator/src/rocketv/BTB.scala b/elaborator/src/rocketv/BTB.scala new file mode 100644 index 000000000..859e842e3 --- /dev/null +++ b/elaborator/src/rocketv/BTB.scala @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{BHTParameter, BTB, BTBParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object BTB extends Elaborator { + @main + case class BHTParameterMain( + @arg(name = "nEntries") nEntries: Int, + @arg(name = "counterLength") counterLength: Int, + @arg(name = "historyLength") historyLength: Int, + @arg(name = "historyBits") historyBits: Int) { + def convert: BHTParameter = BHTParameter( + nEntries, + counterLength, + historyLength, + historyBits + ) + } + implicit def BHTParameterMainParser: ParserForClass[BHTParameterMain] = ParserForClass[BHTParameterMain] + + @main + case class BTBParameterMain( + @arg(name = "useAsyncReset") useAsyncReset: Boolean, + @arg(name = "fetchBytes") fetchBytes: Int, + @arg(name = "vaddrBits") vaddrBits: Int, + @arg(name = "entries") entries: Int, + @arg(name = "nMatchBits") nMatchBits: Int, + @arg(name = "nPages") nPages: Int, + @arg(name = "nRAS") nRAS: Int, + @arg(name = "cacheBlockBytes") cacheBlockBytes: Int, + @arg(name = "iCacheSet") iCacheSet: Int, + @arg(name = "useCompressed") useCompressed: Boolean, + @arg(name = "updatesOutOfOrder") updatesOutOfOrder: Boolean, + @arg(name = "bht-nEntries") nEntries: Option[Int], + @arg(name = "bht-counterLength") counterLength: Option[Int], + @arg(name = "bht-historyLength") historyLength: Option[Int], + @arg(name = "bht-historyBits") historyBits: Option[Int], + @arg(name = "fetchWidth") fetchWidth: Int, + ) { + def convert: BTBParameter = BTBParameter( + useAsyncReset, + fetchBytes, + vaddrBits, + entries, + nMatchBits, + nPages, + nRAS, + cacheBlockBytes, + iCacheSet, + useCompressed, + updatesOutOfOrder, + fetchWidth, + (nEntries + .lazyZip(counterLength) + .lazyZip(historyLength) + .lazyZip(historyBits)) + .map { + case (nEntries, counterLength, historyLength, historyBits) => + BHTParameter(nEntries, counterLength, historyLength, historyBits) + } + .headOption + ) + } + + implicit def BTBParameterMainParser: ParserForClass[BTBParameterMain] = ParserForClass[BTBParameterMain] + + @main + def config(@arg(name = "parameter") parameter: BTBParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[BTB, BTBParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/elaborator/src/rocketv/BreakpointUnit.scala b/elaborator/src/rocketv/BreakpointUnit.scala new file mode 100644 index 000000000..445f1a2e7 --- /dev/null +++ b/elaborator/src/rocketv/BreakpointUnit.scala @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{BreakpointUnit, BreakpointUnitParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object BreakpointUnit extends Elaborator { + @main + case class BreakpointUnitParameterMain( + @arg(name = "nBreakpoints") nBreakpoints: Int, + @arg(name = "xLen") xLen: Int, + @arg(name = "useBPWatch") useBPWatch: Boolean, + @arg(name = "vaddrBits") vaddrBits: Int, + @arg(name = "mcontextWidth") mcontextWidth: Int, + @arg(name = "scontextWidth") scontextWidth: Int) { + def convert: BreakpointUnitParameter = + BreakpointUnitParameter(nBreakpoints, xLen, useBPWatch, vaddrBits, mcontextWidth, scontextWidth) + } + + implicit def BreakpointUnitParameterMainParser: ParserForClass[BreakpointUnitParameterMain] = + ParserForClass[BreakpointUnitParameterMain] + + @main + def config(@arg(name = "parameter") parameter: BreakpointUnitParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[BreakpointUnit, BreakpointUnitParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/elaborator/src/rocketv/CSR.scala b/elaborator/src/rocketv/CSR.scala new file mode 100644 index 000000000..17725a517 --- /dev/null +++ b/elaborator/src/rocketv/CSR.scala @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{CSR, CSRParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object CSR extends Elaborator { + @main + case class CSRParameterMain( + @arg(name = "useAsyncReset") useAsyncReset: Boolean, + @arg(name = "vLen") vLen: Int, + @arg(name = "xLen") xLen: Int, + @arg(name = "fLen") fLen: Int, + @arg(name = "hartIdLen") hartIdLen: Int, + @arg(name = "mcontextWidth") mcontextWidth: Int, + @arg(name = "scontextWidth") scontextWidth: Int, + @arg(name = "asidBits") asidBits: Int, + @arg(name = "vmidBits") vmidBits: Int, + @arg(name = "nPMPs") nPMPs: Int, + @arg(name = "nPerfCounters") nPerfCounters: Int, + @arg(name = "paddrBits") paddrBits: Int, + @arg(name = "nBreakpoints") nBreakpoints: Int, + @arg(name = "usingSupervisor") usingSupervisor: Boolean, + @arg(name = "usingFPU") usingFPU: Boolean, + @arg(name = "usingUser") usingUser: Boolean, + @arg(name = "usingVM") usingVM: Boolean, + @arg(name = "usingCompressed") usingCompressed: Boolean, + @arg(name = "usingAtomics") usingAtomics: Boolean, + @arg(name = "usingDebug") usingDebug: Boolean, + @arg(name = "usingMulDiv") usingMulDiv: Boolean, + @arg(name = "usingVector") usingVector: Boolean) { + def convert: CSRParameter = CSRParameter( + useAsyncReset: Boolean, + vLen: Int, + xLen: Int, + fLen: Int, + hartIdLen: Int, + mcontextWidth: Int, + scontextWidth: Int, + asidBits: Int, + vmidBits: Int, + nPMPs: Int, + nPerfCounters: Int, + paddrBits: Int, + nBreakpoints: Int, + usingSupervisor: Boolean, + usingFPU: Boolean, + usingUser: Boolean, + usingVM: Boolean, + usingCompressed: Boolean, + usingAtomics: Boolean, + usingDebug: Boolean, + usingMulDiv: Boolean, + usingVector: Boolean + ) + } + + implicit def CSRParameterMainParser: ParserForClass[CSRParameterMain] = ParserForClass[CSRParameterMain] + + @main + def config(@arg(name = "parameter") parameter: CSRParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[CSR, CSRParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/elaborator/src/rocketv/DCache.scala b/elaborator/src/rocketv/DCache.scala new file mode 100644 index 000000000..f791bdfea --- /dev/null +++ b/elaborator/src/rocketv/DCache.scala @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import chisel3.util.BitPat +import chisel3.util.experimental.BitSet +import mainargs._ +import org.chipsalliance.rocketv.{HellaCache, HellaCacheParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object DCache extends Elaborator { + implicit object BitSetRead extends TokensReader.Simple[BitSet] { + def shortName = "bitset" + def read(strs: Seq[String]) = { + Right( + strs.head + .split(",") + .map { opt => + if (opt.contains("-")) { + val range = opt.split("-") + require(range.size == 2) + val from = BigInt(range.head, 16) + val to = BigInt(range.last, 16) + 1 + BitSet.fromRange(from, to - from, range.head.length * 4) + } else if (opt.contains("+")) { + val range = opt.split("\\+") + require(range.size == 2) + val from = BigInt(range.head, 16) + val length = BigInt(range.last, 16) + BitSet.fromRange(from, length, range.head.length * 4) + } else { + BitPat(s"b$opt") + } + } + .reduce(_.union(_)) + ) + } + } + + @main + case class DCacheParameterMain( + @arg(name = "useAsyncReset") useAsyncReset: Boolean, + @arg(name = "clockGate") clockGate: Boolean, + @arg(name = "xLen") xLen: Int, + @arg(name = "fLen") fLen: Int, + @arg(name = "usingVM") usingVM: Boolean, + @arg(name = "paddrBits") paddrBits: Int, + @arg(name = "cacheBlockBytes") cacheBlockBytes: Int, + @arg(name = "nWays") nWays: Int, + @arg(name = "nSets") nSets: Int, + @arg(name = "rowBits") rowBits: Int, + @arg(name = "nTLBSets") nTLBSets: Int, + @arg(name = "nTLBWays") nTLBWays: Int, + @arg(name = "tagECC") tagECC: Option[String], + @arg(name = "dataECC") dataECC: Option[String], + @arg(name = "maxUncachedInFlight") maxUncachedInFlight: Int, + @arg(name = "separateUncachedResp") separateUncachedResp: Boolean, + @arg(name = "legal") legal: BitSet, + @arg(name = "cacheable") cacheable: BitSet, + @arg(name = "read") read: BitSet, + @arg(name = "write") write: BitSet, + @arg(name = "putPartial") putPartial: BitSet, + @arg(name = "logic") logic: BitSet, + @arg(name = "arithmetic") arithmetic: BitSet, + @arg(name = "exec") exec: BitSet, + @arg(name = "sideEffects") sideEffects: BitSet) { + def convert: HellaCacheParameter = HellaCacheParameter( + useAsyncReset, + clockGate, + xLen, + fLen, + usingVM, + paddrBits, + cacheBlockBytes, + nWays, + nSets, + rowBits, + nTLBSets, + nTLBWays, + tagECC, + dataECC, + maxUncachedInFlight, + separateUncachedResp, + legal, + cacheable, + read, + write, + putPartial, + logic, + arithmetic, + exec, + sideEffects + ) + } + + implicit def DCacheParameterMainParser: ParserForClass[DCacheParameterMain] = ParserForClass[DCacheParameterMain] + + @main + def config(@arg(name = "parameter") parameter: DCacheParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[HellaCache, HellaCacheParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/elaborator/src/rocketv/Decoder.scala b/elaborator/src/rocketv/Decoder.scala new file mode 100644 index 000000000..29f305d7e --- /dev/null +++ b/elaborator/src/rocketv/Decoder.scala @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{Decoder, DecoderParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object Decoder extends Elaborator { + @main + case class DecoderParameterMain( + @arg(name = "instructionSets") instructionSets: Set[String], + @arg(name = "pipelinedMul") pipelinedMul: Boolean, + @arg(name = "fenceIFlushDCache") fenceIFlushDCache: Boolean) { + def convert: DecoderParameter = DecoderParameter( + instructionSets, + pipelinedMul, + fenceIFlushDCache + ) + } + + implicit def DecoderParameterMainParser: ParserForClass[DecoderParameterMain] = ParserForClass[DecoderParameterMain] + + @main + def config(@arg(name = "parameter") parameter: DecoderParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[Decoder, DecoderParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/elaborator/src/rocketv/FPToFP.scala b/elaborator/src/rocketv/FPToFP.scala new file mode 100644 index 000000000..c35094b93 --- /dev/null +++ b/elaborator/src/rocketv/FPToFP.scala @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{FPToFP, FPToFPParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object FPToFP extends Elaborator { + @main + case class FPToFPParameterMain( + useAsyncReset: Boolean, + latency: Int, + xLen: Int, + fLen: Int, + minFLen: Int) { + def convert: FPToFPParameter = FPToFPParameter( + useAsyncReset, + latency, + xLen, + fLen, + minFLen + ) + } + + implicit def FPToFPParameterMainParser: ParserForClass[FPToFPParameterMain] = ParserForClass[FPToFPParameterMain] + + @main + def config(@arg(name = "parameter") parameter: FPToFPParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[FPToFP, FPToFPParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/elaborator/src/rocketv/FPToInt.scala b/elaborator/src/rocketv/FPToInt.scala new file mode 100644 index 000000000..2ac447b02 --- /dev/null +++ b/elaborator/src/rocketv/FPToInt.scala @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{FPToInt, FPToIntParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object FPToInt extends Elaborator { + @main + case class FPToIntParameterMain( + @arg(name = "useAsyncReset") useAsyncReset: Boolean, + @arg(name = "xLen") xLen: Int, + @arg(name = "fLen") fLen: Int, + @arg(name = "minFLen") minFLen: Int) { + def convert: FPToIntParameter = FPToIntParameter( + useAsyncReset, + xLen, + fLen, + minFLen + ) + } + + implicit def FPToIntParameterMainParser: ParserForClass[FPToIntParameterMain] = ParserForClass[FPToIntParameterMain] + + @main + def config(@arg(name = "parameter") parameter: FPToIntParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[FPToInt, FPToIntParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/elaborator/src/rocketv/FPU.scala b/elaborator/src/rocketv/FPU.scala new file mode 100644 index 000000000..6a077b1b8 --- /dev/null +++ b/elaborator/src/rocketv/FPU.scala @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{FPU, FPUParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object FPU extends Elaborator { + @main + case class FPUParameterMain( + @arg(name = "useAsyncReset") useAsyncReset: Boolean, + @arg(name = "useClockGating") useClockGating: Boolean, + @arg(name = "xLen") xLen: Int, + @arg(name = "fLen") fLen: Int, + @arg(name = "minFLen") minFLen: Int, + @arg(name = "sfmaLatency") sfmaLatency: Int, + @arg(name = "dfmaLatency") dfmaLatency: Int, + @arg(name = "divSqrt") divSqrt: Boolean, + @arg(name = "hartIdLen") hartIdLen: Int) { + def convert: FPUParameter = FPUParameter( + useAsyncReset, + useClockGating, + xLen, + fLen, + minFLen, + sfmaLatency, + dfmaLatency, + divSqrt, + hartIdLen + ) + } + + implicit def FPUParameterMainParser: ParserForClass[FPUParameterMain] = ParserForClass[FPUParameterMain] + + @main + def config(@arg(name = "parameter") parameter: FPUParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[FPU, FPUParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/elaborator/src/rocketv/FPUFMAPipe.scala b/elaborator/src/rocketv/FPUFMAPipe.scala new file mode 100644 index 000000000..5c2b15724 --- /dev/null +++ b/elaborator/src/rocketv/FPUFMAPipe.scala @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{FPUFMAPipe, FPUFMAPipeParameter, FType} +import org.chipsalliance.t1.elaborator.Elaborator + +object FPUFMAPipe extends Elaborator { + @main + case class FPUFMAPipeParameterMain( + useAsyncReset: Boolean, + latency: Int, + xLen: Int, + fLen: Int, + minFLen: Int, + t: String) { + + def convert: FPUFMAPipeParameter = FPUFMAPipeParameter( + useAsyncReset, + latency, + xLen, + fLen, + minFLen, + t match { + case s"e${exp}s${sig}" => FType(exp.toInt, sig.toInt) + case "h" => FType(5, 11) + case "s" => FType(8, 24) + case "d" => FType(11, 53) + } + ) + } + + implicit def FPUFMAPipeParameterMainParser: ParserForClass[FPUFMAPipeParameterMain] = + ParserForClass[FPUFMAPipeParameterMain] + + @main + def config(@arg(name = "parameter") parameter: FPUFMAPipeParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[FPUFMAPipe, FPUFMAPipeParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/elaborator/src/rocketv/Frontend.scala b/elaborator/src/rocketv/Frontend.scala new file mode 100644 index 000000000..c36c94993 --- /dev/null +++ b/elaborator/src/rocketv/Frontend.scala @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import chisel3.util.BitPat +import chisel3.util.experimental.BitSet +import mainargs._ +import org.chipsalliance.rocketv.{BHTParameter, Frontend, FrontendParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object Frontend extends Elaborator { + implicit object BitSetRead extends TokensReader.Simple[BitSet] { + def shortName = "bitset" + def read(strs: Seq[String]) = { + Right( + strs.head + .split(",") + .map { opt => + if (opt.contains("-")) { + val range = opt.split("-") + require(range.size == 2) + val from = BigInt(range.head, 16) + val to = BigInt(range.last, 16) + 1 + BitSet.fromRange(from, to - from, range.head.length * 4) + } else if (opt.contains("+")) { + val range = opt.split("\\+") + require(range.size == 2) + val from = BigInt(range.head, 16) + val length = BigInt(range.last, 16) + BitSet.fromRange(from, length, range.head.length * 4) + } else { + BitPat(s"b$opt") + } + } + .reduce(_.union(_)) + ) + } + } + + @main + case class FrontendParameterMain( + @arg(name = "useAsyncReset") useAsyncReset: Boolean, + @arg(name = "clockGate") clockGate: Boolean, + @arg(name = "xLen") xLen: Int, + @arg(name = "usingAtomics") usingAtomics: Boolean, + @arg(name = "usingDataScratchpad") usingDataScratchpad: Boolean, + @arg(name = "usingVM") usingVM: Boolean, + @arg(name = "usingCompressed") usingCompressed: Boolean, + @arg(name = "usingBTB") usingBTB: Boolean, + @arg(name = "itlbNSets") itlbNSets: Int, + @arg(name = "itlbNWays") itlbNWays: Int, + @arg(name = "itlbNSectors") itlbNSectors: Int, + @arg(name = "itlbNSuperpageEntries") itlbNSuperpageEntries: Int, + @arg(name = "blockBytes") blockBytes: Int, + @arg(name = "iCacheNSets") iCacheNSets: Int, + @arg(name = "iCacheNWays") iCacheNWays: Int, + @arg(name = "iCachePrefetch") iCachePrefetch: Boolean, + @arg(name = "btbEntries") btbEntries: Int, + @arg(name = "btbNMatchBits") btbNMatchBits: Int, + @arg(name = "btbUpdatesOutOfOrder") btbUpdatesOutOfOrder: Boolean, + @arg(name = "nPages") nPages: Int, + @arg(name = "nRAS") nRAS: Int, + @arg(name = "nPMPs") nPMPs: Int, + @arg(name = "paddrBits") paddrBits: Int, + @arg(name = "pgLevels") pgLevels: Int, + @arg(name = "asidBits") asidBits: Int, + @arg(name = "bhtNEntries") bhtNEntries: Option[Int], + @arg(name = "bhtCounterLength") bhtCounterLength: Option[Int], + @arg(name = "bhtHistoryLength") bhtHistoryLength: Option[Int], + @arg(name = "bhtHistoryBits") bhtHistoryBits: Option[Int], + @arg(name = "legal") legal: Seq[BitSet], + @arg(name = "cacheable") cacheable: Seq[BitSet], + @arg(name = "read") read: Seq[BitSet], + @arg(name = "write") write: Seq[BitSet], + @arg(name = "putPartial") putPartial: Seq[BitSet], + @arg(name = "logic") logic: Seq[BitSet], + @arg(name = "arithmetic") arithmetic: Seq[BitSet], + @arg(name = "exec") exec: Seq[BitSet], + @arg(name = "sideEffects") sideEffects: Seq[BitSet]) { + def convert: FrontendParameter = FrontendParameter( + useAsyncReset: Boolean, + clockGate: Boolean, + xLen: Int, + usingAtomics: Boolean, + usingDataScratchpad: Boolean, + usingVM: Boolean, + usingCompressed: Boolean, + usingBTB: Boolean, + itlbNSets: Int, + itlbNWays: Int, + itlbNSectors: Int, + itlbNSuperpageEntries: Int, + blockBytes: Int, + iCacheNSets: Int, + iCacheNWays: Int, + iCachePrefetch: Boolean, + btbEntries: Int, + btbNMatchBits: Int, + btbUpdatesOutOfOrder: Boolean, + nPages: Int, + nRAS: Int, + nPMPs: Int, + paddrBits: Int, + pgLevels: Int, + asidBits: Int, + bhtNEntries + .lazyZip(bhtCounterLength) + .lazyZip(bhtHistoryLength) + .lazyZip(bhtHistoryBits) + .map { + case (bhtNEntries, bhtCounterLength, bhtHistoryLength, bhtHistoryBits) => + BHTParameter(bhtNEntries, bhtCounterLength, bhtHistoryLength, bhtHistoryBits) + } + .headOption, + legal.foldLeft(BitSet.empty)(_.union(_)), + cacheable.foldLeft(BitSet.empty)(_.union(_)), + read.foldLeft(BitSet.empty)(_.union(_)), + write.foldLeft(BitSet.empty)(_.union(_)), + putPartial.foldLeft(BitSet.empty)(_.union(_)), + logic.foldLeft(BitSet.empty)(_.union(_)), + arithmetic.foldLeft(BitSet.empty)(_.union(_)), + exec.foldLeft(BitSet.empty)(_.union(_)), + sideEffects.foldLeft(BitSet.empty)(_.union(_)) + ) + } + + implicit def FrontendParameterMainParser: ParserForClass[FrontendParameterMain] = + ParserForClass[FrontendParameterMain] + + @main + def config(@arg(name = "parameter") parameter: FrontendParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[Frontend, FrontendParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/elaborator/src/rocketv/IBuf.scala b/elaborator/src/rocketv/IBuf.scala new file mode 100644 index 000000000..1e2ac17da --- /dev/null +++ b/elaborator/src/rocketv/IBuf.scala @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{IBuf, IBufParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object IBuf extends Elaborator { + @main + case class IBufParameterMain( + @arg(name = "useAsyncReset") useAsyncReset: Boolean, + @arg(name = "xLen") xLen: Int, + @arg(name = "usingCompressed") usingCompressed: Boolean, + @arg(name = "vaddrBits") vaddrBits: Int, + @arg(name = "entries") entries: Int, + @arg(name = "vaddrBitsExtended") vaddrBitsExtended: Int, + @arg(name = "bhtHistoryLength") bhtHistoryLength: Option[Int], + @arg(name = "bhtCounterLength") bhtCounterLength: Option[Int], + @arg(name = "fetchWidth") fetchWidth: Int) { + def convert: IBufParameter = IBufParameter( + useAsyncReset, + xLen, + usingCompressed, + vaddrBits, + entries, + vaddrBitsExtended, + bhtHistoryLength, + bhtCounterLength, + fetchWidth + ) + } + + implicit def IBufParameterMainParser: ParserForClass[IBufParameterMain] = ParserForClass[IBufParameterMain] + + @main + def config(@arg(name = "parameter") parameter: IBufParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[IBuf, IBufParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/elaborator/src/rocketv/ICache.scala b/elaborator/src/rocketv/ICache.scala new file mode 100644 index 000000000..e80b84360 --- /dev/null +++ b/elaborator/src/rocketv/ICache.scala @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{ICache, ICacheParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object ICache extends Elaborator { + @main + case class ICacheParameterMain( + @arg(name = "useAsyncReset") useAsyncReset: Boolean, + @arg(name = "prefetch") prefetch: Boolean, + @arg(name = "nSets") nSets: Int, + @arg(name = "nWays") nWays: Int, + @arg(name = "blockBytes") blockBytes: Int, + @arg(name = "usingVM") usingVM: Boolean, + @arg(name = "vaddrBits") vaddrBits: Int, + @arg(name = "paddrBits") paddrBits: Int) { + def convert: ICacheParameter = ICacheParameter( + useAsyncReset, + prefetch, + nSets, + nWays, + blockBytes, + usingVM, + vaddrBits, + paddrBits + ) + } + + implicit def ICacheParameterMainParser: ParserForClass[ICacheParameterMain] = ParserForClass[ICacheParameterMain] + + @main + def config(@arg(name = "parameter") parameter: ICacheParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[ICache, ICacheParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/elaborator/src/rocketv/IntToFP.scala b/elaborator/src/rocketv/IntToFP.scala new file mode 100644 index 000000000..b195becb7 --- /dev/null +++ b/elaborator/src/rocketv/IntToFP.scala @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{IntToFP, IntToFPParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object IntToFP extends Elaborator { + @main + case class IntToFPParameterMain( + @arg(name = "useAsyncReset") useAsyncReset: Boolean, + @arg(name = "latency") latency: Int, + @arg(name = "fLen") fLen: Int, + @arg(name = "xLen") xLen: Int, + @arg(name = "minFLen") minFLen: Int) { + def convert: IntToFPParameter = IntToFPParameter( + useAsyncReset, + latency, + fLen, + xLen, + minFLen + ) + } + + implicit def IntToFPParameterMainParser: ParserForClass[IntToFPParameterMain] = ParserForClass[IntToFPParameterMain] + + @main + def config(@arg(name = "parameter") parameter: IntToFPParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[IntToFP, IntToFPParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/elaborator/src/rocketv/MulAddRecFNPipe.scala b/elaborator/src/rocketv/MulAddRecFNPipe.scala new file mode 100644 index 000000000..741567250 --- /dev/null +++ b/elaborator/src/rocketv/MulAddRecFNPipe.scala @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{MulAddRecFNPipe, MulAddRecFNPipeParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object MulAddRecFNPipe extends Elaborator { + @main + case class MulAddRecFNPipeParameterMain( + @arg(name = "useAsyncReset") useAsyncReset: Boolean, + @arg(name = "latency") latency: Int, + @arg(name = "expWidth") expWidth: Int, + @arg(name = "sigWidth") sigWidth: Int) { + def convert: MulAddRecFNPipeParameter = MulAddRecFNPipeParameter(useAsyncReset, latency, expWidth, sigWidth) + } + + implicit def MulAddRecFNPipeParameterMainParser: ParserForClass[MulAddRecFNPipeParameterMain] = + ParserForClass[MulAddRecFNPipeParameterMain] + + @main + def config(@arg(name = "parameter") parameter: MulAddRecFNPipeParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[MulAddRecFNPipe, MulAddRecFNPipeParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/elaborator/src/rocketv/MulDiv.scala b/elaborator/src/rocketv/MulDiv.scala new file mode 100644 index 000000000..8b2814490 --- /dev/null +++ b/elaborator/src/rocketv/MulDiv.scala @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{MulDiv, MulDivParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object MulDiv extends Elaborator { + @main + case class MulDivParameterMain( + @arg(name = "useAsyncReset") useAsyncReset: Boolean, + @arg(name = "latency") latency: Int, + @arg(name = "width") width: Int, + @arg(name = "divUnroll") divUnroll: Int, + @arg(name = "divEarlyOut") divEarlyOut: Boolean, + @arg(name = "divEarlyOutGranularity") divEarlyOutGranularity: Int, + @arg(name = "mulUnroll") mulUnroll: Int, + @arg(name = "mulEarlyOut") mulEarlyOut: Boolean) { + def convert: MulDivParameter = MulDivParameter( + useAsyncReset, + latency, + width, + divUnroll, + divEarlyOut, + divEarlyOutGranularity, + mulUnroll, + mulEarlyOut + ) + } + + implicit def MulDivParameterMainParser: ParserForClass[MulDivParameterMain] = ParserForClass[MulDivParameterMain] + + @main + def config(@arg(name = "parameter") parameter: MulDivParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[MulDiv, MulDivParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/elaborator/src/rocketv/PMAChecker.scala b/elaborator/src/rocketv/PMAChecker.scala new file mode 100644 index 000000000..571acd126 --- /dev/null +++ b/elaborator/src/rocketv/PMAChecker.scala @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import chisel3.util.BitPat +import chisel3.util.experimental.BitSet +import mainargs._ +import org.chipsalliance.rocketv.{PMAChecker, PMACheckerParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object PMAChecker extends Elaborator { + + implicit object BitSetRead extends TokensReader.Simple[BitSet] { + def shortName = "bitset" + def read(strs: Seq[String]) = { + Right(strs.head.split(",").map{ opt => + if (opt.contains("-")) { + val range = opt.split("-") + require(range.size == 2) + val from = BigInt(range.head, 16) + val to = BigInt(range.last, 16) + 1 + BitSet.fromRange(from, to - from, range.head.length * 4) + } else if (opt.contains("+")) { + val range = opt.split("\\+") + require(range.size == 2) + val from = BigInt(range.head, 16) + val length = BigInt(range.last, 16) + BitSet.fromRange(from, length, range.head.length * 4) + } else { + BitPat(s"b$opt") + } + }.reduce(_.union(_))) + } + } + @main + case class PMACheckerParameterMain( + paddrBits: Int, + legal: Seq[BitSet], + cacheable: Seq[BitSet], + read: Seq[BitSet], + write: Seq[BitSet], + putPartial: Seq[BitSet], + logic: Seq[BitSet], + arithmetic: Seq[BitSet], + exec: Seq[BitSet], + sideEffects: Seq[BitSet]) { + def convert: PMACheckerParameter = PMACheckerParameter( + paddrBits, + legal.foldLeft(BitSet.empty)(_.union(_)), + cacheable.foldLeft(BitSet.empty)(_.union(_)), + read.foldLeft(BitSet.empty)(_.union(_)), + write.foldLeft(BitSet.empty)(_.union(_)), + putPartial.foldLeft(BitSet.empty)(_.union(_)), + logic.foldLeft(BitSet.empty)(_.union(_)), + arithmetic.foldLeft(BitSet.empty)(_.union(_)), + exec.foldLeft(BitSet.empty)(_.union(_)), + sideEffects.foldLeft(BitSet.empty)(_.union(_)) + ) + } + + implicit def PMACheckerParameterMainParser: ParserForClass[PMACheckerParameterMain] = + ParserForClass[PMACheckerParameterMain] + + @main + def config(@arg(name = "parameter") parameter: PMACheckerParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[PMAChecker, PMACheckerParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/elaborator/src/rocketv/PMP.scala b/elaborator/src/rocketv/PMP.scala new file mode 100644 index 000000000..bb8e69c10 --- /dev/null +++ b/elaborator/src/rocketv/PMP.scala @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{PMPChecker, PMPCheckerParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object PMPChecker extends Elaborator { + @main + case class PMPCheckerParameterMain( + @arg(name = "nPMPs") nPMPs: Int, + @arg(name = "paddrBits") paddrBits: Int, + @arg(name = "lgMaxSize") lgMaxSize: Int, + @arg(name = "pmpGranularity") pmpGranularity: Int) { + def convert: PMPCheckerParameter = PMPCheckerParameter( + nPMPs: Int, + paddrBits: Int, + lgMaxSize: Int, + pmpGranularity: Int + ) + } + + implicit def PMPCheckerParameterMainParser: ParserForClass[PMPCheckerParameterMain] = + ParserForClass[PMPCheckerParameterMain] + + @main + def config(@arg(name = "parameter") parameter: PMPCheckerParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[PMPChecker, PMPCheckerParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/elaborator/src/rocketv/PTW.scala b/elaborator/src/rocketv/PTW.scala new file mode 100644 index 000000000..4a20e110f --- /dev/null +++ b/elaborator/src/rocketv/PTW.scala @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{PTW, PTWParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object PTW extends Elaborator { + @main + case class PTWParameterMain( + @arg(name = "useAsyncReset") useAsyncReset: Boolean, + @arg(name = "hasClockGate") hasClockGate: Boolean, + @arg(name = "usingVM") usingVM: Boolean, + @arg(name = "usingHypervisor") usingHypervisor: Boolean, + @arg(name = "xLen") xLen: Int, + @arg(name = "fLen") fLen: Int, + @arg(name = "paddrBits") paddrBits: Int, + @arg(name = "asidBits") asidBits: Int, + @arg(name = "pgLevels") pgLevels: Int, + @arg(name = "nPTECacheEntries") nPTECacheEntries: Int, + @arg(name = "nL2TLBWays") nL2TLBWays: Int, + @arg(name = "nL2TLBEntries") nL2TLBEntries: Int, + @arg(name = "nPMPs") nPMPs: Int) { + def convert: PTWParameter = PTWParameter( + useAsyncReset, + hasClockGate, + usingVM, + usingHypervisor, + xLen, + fLen, + paddrBits, + asidBits, + pgLevels, + nPTECacheEntries, + nL2TLBWays, + nL2TLBEntries, + nPMPs + ) + } + + implicit def PTWParameterMainParser: ParserForClass[PTWParameterMain] = ParserForClass[PTWParameterMain] + + @main + def config(@arg(name = "parameter") parameter: PTWParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[PTW, PTWParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/elaborator/src/rocketv/PipelinedMultiplier.scala b/elaborator/src/rocketv/PipelinedMultiplier.scala new file mode 100644 index 000000000..9ef2cb47b --- /dev/null +++ b/elaborator/src/rocketv/PipelinedMultiplier.scala @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{PipelinedMultiplier, PipelinedMultiplierParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object PipelinedMultiplier extends Elaborator { + @main + case class PipelinedMultiplierParameterMain( + @arg(name = "useAsyncReset") useAsyncReset: Boolean, + @arg(name = "latency") latency: Int, + @arg(name = "width") width: Int) { + def convert: PipelinedMultiplierParameter = PipelinedMultiplierParameter( + useAsyncReset: Boolean, + latency: Int, + width: Int + ) + } + + implicit def PipelinedMultiplierParameterMainParser: ParserForClass[PipelinedMultiplierParameterMain] = ParserForClass[PipelinedMultiplierParameterMain] + + @main + def config(@arg(name = "parameter") parameter: PipelinedMultiplierParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[PipelinedMultiplier, PipelinedMultiplierParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/elaborator/src/rocketv/RVCExpander.scala b/elaborator/src/rocketv/RVCExpander.scala new file mode 100644 index 000000000..2cf3aa7df --- /dev/null +++ b/elaborator/src/rocketv/RVCExpander.scala @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{RVCExpander, RVCExpanderParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object RVCExpander extends Elaborator { + @main + case class RVCExpanderParameterMain( + @arg(name = "xLen") xLen: Int, + @arg(name = "usingCompressed") usingCompressed: Boolean) { + def convert: RVCExpanderParameter = RVCExpanderParameter( + xLen, + usingCompressed + ) + } + + implicit def RVCExpanderParameterMainParser: ParserForClass[RVCExpanderParameterMain] = + ParserForClass[RVCExpanderParameterMain] + + @main + def config(@arg(name = "parameter") parameter: RVCExpanderParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[RVCExpander, RVCExpanderParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/elaborator/src/rocketv/Rocket.scala b/elaborator/src/rocketv/Rocket.scala new file mode 100644 index 000000000..f5954500e --- /dev/null +++ b/elaborator/src/rocketv/Rocket.scala @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{Rocket, RocketParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object Rocket extends Elaborator { + @main + case class RocketParameterMain( + @arg(name = "useAsyncReset") useAsyncReset: Boolean, + @arg(name = "clockGate") clockGate: Boolean, + @arg(name = "instructionSets") instructionSets: Set[String], + @arg(name = "vLen") vLen: Int, + @arg(name = "usingUser") usingUser: Boolean, + @arg(name = "hartIdLen") hartIdLen: Int, + @arg(name = "nPMPs") nPMPs: Int, + @arg(name = "asidBits") asidBits: Int, + @arg(name = "nBreakpoints") nBreakpoints: Int, + @arg(name = "usingBTB") usingBTB: Boolean, + @arg(name = "useBPWatch") useBPWatch: Boolean, + @arg(name = "mcontextWidth") mcontextWidth: Int, + @arg(name = "scontextWidth") scontextWidth: Int, + @arg(name = "mulDivLantency") mulDivLantency: Int, + @arg(name = "divUnroll") divUnroll: Int, + @arg(name = "divEarlyOut") divEarlyOut: Boolean, + @arg(name = "divEarlyOutGranularity") divEarlyOutGranularity: Int, + @arg(name = "mulUnroll") mulUnroll: Int, + @arg(name = "mulEarlyOut") mulEarlyOut: Boolean, + @arg(name = "paddrBits") paddrBits: Int, + @arg(name = "cacheBlockBytes") cacheBlockBytes: Int, + @arg(name = "hasBeu") hasBeu: Boolean, + @arg(name = "fastLoadByte") fastLoadByte: Boolean, + @arg(name = "fastLoadWord") fastLoadWord: Boolean, + @arg(name = "dcacheNSets") dcacheNSets: Int, + @arg(name = "flushOnFenceI") flushOnFenceI: Boolean, + @arg(name = "usingT1") usingT1: Boolean) { + def convert: RocketParameter = RocketParameter( + useAsyncReset, + clockGate, + instructionSets, + vLen, + usingUser, + hartIdLen, + nPMPs, + asidBits, + nBreakpoints, + usingBTB, + useBPWatch, + mcontextWidth, + scontextWidth, + mulDivLantency, + divUnroll, + divEarlyOut, + divEarlyOutGranularity, + mulUnroll, + mulEarlyOut, + paddrBits, + cacheBlockBytes, + hasBeu, + fastLoadByte, + fastLoadWord, + dcacheNSets, + flushOnFenceI, + usingT1 + ) + } + + implicit def RocketParameterMainParser: ParserForClass[RocketParameterMain] = ParserForClass[RocketParameterMain] + + @main + def config(@arg(name = "parameter") parameter: RocketParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[Rocket, RocketParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/elaborator/src/rocketv/RocketTile.scala b/elaborator/src/rocketv/RocketTile.scala new file mode 100644 index 000000000..753d026a9 --- /dev/null +++ b/elaborator/src/rocketv/RocketTile.scala @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import chisel3.util.BitPat +import chisel3.util.experimental.BitSet +import mainargs._ +import org.chipsalliance.rocketv.{BHTParameter, RocketTile, RocketTileParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +// --useAsyncReset true --clockGate true --instructionSets rv32_i --priv m --hartIdLen 4 --useBPWatch false --mcontextWidth 0 --scontextWidth 0 --asidBits 0 --resetVectorBits 32 --nBreakpoints 0 --dtlbNWays 0 --dtlbNSets 0 --itlbNSets 0 --itlbNWays 0 --itlbNSectors 0 --itlbNSuperpageEntries 0 --nPTECacheEntries 0 --nL2TLBWays 0 --nL2TLBEntries 0 --paddrBits 32 --cacheBlockBytes 32 --nPMPs 8 --legal 00000000-ffffffff --cacheable 80000000-ffffffff --read 00000000-ffffffff --write 00000000-ffffffff --putPartial 00000000-ffffffff --logic 0 --arithmetic 0 --exec 80000000-ffffffff --sideEffects 00000000-3fffffff --btbEntries 28 --btbNMatchBits 14 --btbUpdatesOutOfOrder false --nPages 6 --nRAS 6 --bhtNEntries 512 --bhtCounterLength 1 --bhtHistoryLength 8 --bhtHistoryBits 3 --mulDivLatency 2 --divUnroll 1 --divEarlyOut false --divEarlyOutGranularity 0 --mulUnroll 1 --mulEarlyOut false --sfmaLatency 3 --dfmaLatency 3 --divSqrt true --flushOnFenceI true --fastLoadByte false --fastLoadWord false --dcacheNSets 64 --dcacheNWays 4 --dcacheRowBits 32 --maxUncachedInFlight 1 --separateUncachedResp false --iCacheNSets 64 --iCacheNWays 4 --iCachePrefetch false + +object RocketTile extends Elaborator { + implicit object BitSetRead extends TokensReader.Simple[BitSet] { + def shortName = "bitset" + def read(strs: Seq[String]) = { + Right( + strs.head + .split(",") + .map { opt => + if (opt.contains("-")) { + val range = opt.split("-") + require(range.size == 2) + val from = BigInt(range.head, 16) + val to = BigInt(range.last, 16) + 1 + BitSet.fromRange(from, to - from, range.head.length * 4) + } else if (opt.contains("+")) { + val range = opt.split("\\+") + require(range.size == 2) + val from = BigInt(range.head, 16) + val length = BigInt(range.last, 16) + BitSet.fromRange(from, length, range.head.length * 4) + } else { + BitPat(s"b$opt") + } + } + .reduce(_.union(_)) + ) + } + } + + @main + case class RocketTileParameterMain( + @arg(name = "useAsyncReset") useAsyncReset: Boolean, + @arg(name = "clockGate") clockGate: Boolean, + @arg(name = "instructionSets") instructionSets: Set[String], + @arg(name = "priv") priv: String, + @arg(name = "hartIdLen") hartIdLen: Int, + @arg(name = "useBPWatch") useBPWatch: Boolean, + @arg(name = "mcontextWidth") mcontextWidth: Int, + @arg(name = "scontextWidth") scontextWidth: Int, + @arg(name = "asidBits") asidBits: Int, + @arg(name = "resetVectorBits") resetVectorBits: Int, + @arg(name = "nBreakpoints") nBreakpoints: Int, + @arg(name = "dtlbNWays") dtlbNWays: Int, + @arg(name = "dtlbNSets") dtlbNSets: Int, + @arg(name = "itlbNSets") itlbNSets: Int, + @arg(name = "itlbNWays") itlbNWays: Int, + @arg(name = "itlbNSectors") itlbNSectors: Int, + @arg(name = "itlbNSuperpageEntries") itlbNSuperpageEntries: Int, + @arg(name = "nPTECacheEntries") nPTECacheEntries: Int, + @arg(name = "nL2TLBWays") nL2TLBWays: Int, + @arg(name = "nL2TLBEntries") nL2TLBEntries: Int, + @arg(name = "paddrBits") paddrBits: Int, + @arg(name = "cacheBlockBytes") cacheBlockBytes: Int, + @arg(name = "nPMPs") nPMPs: Int, + @arg(name = "legal") legal: BitSet, + @arg(name = "cacheable") cacheable: BitSet, + @arg(name = "read") read: BitSet, + @arg(name = "write") write: BitSet, + @arg(name = "putPartial") putPartial: BitSet, + @arg(name = "logic") logic: BitSet, + @arg(name = "arithmetic") arithmetic: BitSet, + @arg(name = "exec") exec: BitSet, + @arg(name = "sideEffects") sideEffects: BitSet, + @arg(name = "btbEntries") btbEntries: Int, + @arg(name = "btbNMatchBits") btbNMatchBits: Int, + @arg(name = "btbUpdatesOutOfOrder") btbUpdatesOutOfOrder: Boolean, + @arg(name = "nPages") nPages: Int, + @arg(name = "nRAS") nRAS: Int, + @arg(name = "bhtNEntries") bhtNEntries: Option[Int], + @arg(name = "bhtCounterLength") bhtCounterLength: Option[Int], + @arg(name = "bhtHistoryLength") bhtHistoryLength: Option[Int], + @arg(name = "bhtHistoryBits") bhtHistoryBits: Option[Int], + @arg(name = "mulDivLatency") mulDivLatency: Int, + @arg(name = "divUnroll") divUnroll: Int, + @arg(name = "divEarlyOut") divEarlyOut: Boolean, + @arg(name = "divEarlyOutGranularity") divEarlyOutGranularity: Int, + @arg(name = "mulUnroll") mulUnroll: Int, + @arg(name = "mulEarlyOut") mulEarlyOut: Boolean, + @arg(name = "sfmaLatency") sfmaLatency: Int, + @arg(name = "dfmaLatency") dfmaLatency: Int, + @arg(name = "divSqrt") divSqrt: Boolean, + @arg(name = "flushOnFenceI") flushOnFenceI: Boolean, + @arg(name = "fastLoadByte") fastLoadByte: Boolean, + @arg(name = "fastLoadWord") fastLoadWord: Boolean, + @arg(name = "dcacheNSets") dcacheNSets: Int, + @arg(name = "dcacheNWays") dcacheNWays: Int, + @arg(name = "dcacheRowBits") dcacheRowBits: Int, + @arg(name = "maxUncachedInFlight") maxUncachedInFlight: Int, + @arg(name = "separateUncachedResp") separateUncachedResp: Boolean, + @arg(name = "iCacheNSets") iCacheNSets: Int, + @arg(name = "iCacheNWays") iCacheNWays: Int, + @arg(name = "iCachePrefetch") iCachePrefetch: Boolean) { + def convert: RocketTileParameter = RocketTileParameter( + useAsyncReset: Boolean, + clockGate: Boolean, + instructionSets: Set[String], + priv: String, + hartIdLen: Int, + useBPWatch: Boolean, + mcontextWidth: Int, + scontextWidth: Int, + asidBits: Int, + resetVectorBits: Int, + nBreakpoints: Int, + dtlbNWays: Int, + dtlbNSets: Int, + itlbNSets: Int, + itlbNWays: Int, + itlbNSectors: Int, + itlbNSuperpageEntries: Int, + nPTECacheEntries: Int, + nL2TLBWays: Int, + nL2TLBEntries: Int, + paddrBits: Int, + cacheBlockBytes: Int, + nPMPs: Int, + legal: BitSet, + cacheable: BitSet, + read: BitSet, + write: BitSet, + putPartial: BitSet, + logic: BitSet, + arithmetic: BitSet, + exec: BitSet, + sideEffects: BitSet, + btbEntries: Int, + btbNMatchBits: Int, + btbUpdatesOutOfOrder: Boolean, + nPages: Int, + nRAS: Int, + bhtNEntries + .lazyZip(bhtCounterLength) + .lazyZip(bhtHistoryLength) + .lazyZip(bhtHistoryBits) + .map { + case ( + bhtNEntries, + bhtCounterLength, + bhtHistoryLength, + bhtHistoryBits + ) => + BHTParameter(bhtNEntries, bhtCounterLength, bhtHistoryLength, bhtHistoryBits) + } + .headOption: Option[BHTParameter], + mulDivLatency: Int, + divUnroll: Int, + divEarlyOut: Boolean, + divEarlyOutGranularity: Int, + mulUnroll: Int, + mulEarlyOut: Boolean, + sfmaLatency: Int, + dfmaLatency: Int, + divSqrt: Boolean, + flushOnFenceI: Boolean, + fastLoadByte: Boolean, + fastLoadWord: Boolean, + dcacheNSets: Int, + dcacheNWays: Int, + dcacheRowBits: Int, + maxUncachedInFlight: Int, + separateUncachedResp: Boolean, + iCacheNSets: Int, + iCacheNWays: Int, + iCachePrefetch: Boolean + ) + } + + implicit def RocketTileParameterMainParser: ParserForClass[RocketTileParameterMain] = + ParserForClass[RocketTileParameterMain] + + @main + def config(@arg(name = "parameter") parameter: RocketTileParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[RocketTile, RocketTileParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/elaborator/src/rocketv/TLB.scala b/elaborator/src/rocketv/TLB.scala new file mode 100644 index 000000000..1ee1ff458 --- /dev/null +++ b/elaborator/src/rocketv/TLB.scala @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import chisel3.util.BitPat +import chisel3.util.experimental.BitSet +import mainargs._ +import org.chipsalliance.rocketv.{TLB, TLBParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object TLB extends Elaborator { + implicit object BitSetRead extends TokensReader.Simple[BitSet] { + def shortName = "bitset" + def read(strs: Seq[String]) = { + Right( + strs.head + .split(",") + .map { opt => + if (opt.contains("-")) { + val range = opt.split("-") + require(range.size == 2) + val from = BigInt(range.head, 16) + val to = BigInt(range.last, 16) + 1 + BitSet.fromRange(from, to - from, range.head.length * 4) + } else if (opt.contains("+")) { + val range = opt.split("\\+") + require(range.size == 2) + val from = BigInt(range.head, 16) + val length = BigInt(range.last, 16) + BitSet.fromRange(from, length, range.head.length * 4) + } else { + BitPat(s"b$opt") + } + } + .reduce(_.union(_)) + ) + } + } + + @main + case class TLBParameterMain( + @arg(name = "useAsyncReset") useAsyncReset: Boolean, + @arg(name = "xLen") xLen: Int, + @arg(name = "nSets") nSets: Int, + @arg(name = "nWays") nWays: Int, + @arg(name = "nSectors") nSectors: Int, + @arg(name = "nSuperpageEntries") nSuperpageEntries: Int, + @arg(name = "asidBits") asidBits: Int, + @arg(name = "pgLevels") pgLevels: Int, + @arg(name = "usingHypervisor") usingHypervisor: Boolean, + @arg(name = "usingAtomics") usingAtomics: Boolean, + @arg(name = "usingDataScratchpad") usingDataScratchpad: Boolean, + @arg(name = "usingAtomicsOnlyForIO") usingAtomicsOnlyForIO: Boolean, + @arg(name = "usingVM") usingVM: Boolean, + @arg(name = "usingAtomicsInCache") usingAtomicsInCache: Boolean, + @arg(name = "nPMPs") nPMPs: Int, + @arg(name = "paddrBits") paddrBits: Int, + @arg(name = "legal") legal: Seq[BitSet], + @arg(name = "cacheable") cacheable: Seq[BitSet], + @arg(name = "read") read: Seq[BitSet], + @arg(name = "write") write: Seq[BitSet], + @arg(name = "putPartial") putPartial: Seq[BitSet], + @arg(name = "logic") logic: Seq[BitSet], + @arg(name = "arithmetic") arithmetic: Seq[BitSet], + @arg(name = "exec") exec: Seq[BitSet], + @arg(name = "sideEffects") sideEffects: Seq[BitSet], + @arg(name = "isITLB") isITLB: Boolean) { + def convert: TLBParameter = TLBParameter( + useAsyncReset, + xLen, + nSets, + nWays, + nSectors, + nSuperpageEntries, + asidBits, + pgLevels, + usingHypervisor, + usingAtomics, + usingDataScratchpad, + usingAtomicsOnlyForIO, + usingVM, + usingAtomicsInCache, + nPMPs, + PMAChecker.PMACheckerParameterMain( + paddrBits, + legal, + cacheable, + read, + write, + putPartial, + logic, + arithmetic, + exec, + sideEffects + ).convert, + paddrBits, + isITLB + ) + } + + implicit def TLBParameterMainParser: ParserForClass[TLBParameterMain] = ParserForClass[TLBParameterMain] + + @main + def config(@arg(name = "parameter") parameter: TLBParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[TLB, TLBParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/elaborator/src/t1rocket/T1RocketTile.scala b/elaborator/src/t1rocket/T1RocketTile.scala new file mode 100644 index 000000000..3cb8398e2 --- /dev/null +++ b/elaborator/src/t1rocket/T1RocketTile.scala @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.t1rocketv + +import chisel3.util.BitPat +import chisel3.util.experimental.BitSet +import mainargs._ +import org.chipsalliance.t1.elaborator.Elaborator +import org.chipsalliance.t1.rtl.vrf.RamType +import org.chipsalliance.t1.rtl.vrf.RamType.{p0rp1w, p0rw, p0rwp1rw} +import org.chipsalliance.t1.tile.{T1RocketTile, T1RocketTileParameter} + +// --instructionSets rv32_i --instructionSets rv_a --instructionSets rv_c --instructionSets rv_v --instructionSets Zve32x --instructionSets zvl1024b --cacheBlockBytes 32 --nPMPs 8 --cacheable 80000000-ffffffff --sideEffects 00000000-1fffffff --dcacheNSets 64 --dcacheNWays 4 --dcacheRowBits 32 --iCacheNSets 32 --iCacheNWays 4 --iCachePrefetch false --dLen 256 --vrfBankSize 2 --vrfRamType p0rp1w +object T1RocketTile extends Elaborator { + implicit object BitSetRead extends TokensReader.Simple[BitSet] { + def shortName = "bitset" + def read(strs: Seq[String]) = { + Right( + strs.head + .split(",") + .map { opt => + if (opt.contains("-")) { + val range = opt.split("-") + require(range.size == 2) + val from = BigInt(range.head, 16) + val to = BigInt(range.last, 16) + 1 + BitSet.fromRange(from, to - from, range.head.length * 4) + } else if (opt.contains("+")) { + val range = opt.split("\\+") + require(range.size == 2) + val from = BigInt(range.head, 16) + val length = BigInt(range.last, 16) + BitSet.fromRange(from, length, range.head.length * 4) + } else { + BitPat(s"b$opt") + } + } + .reduce(_.union(_)) + ) + } + } + + implicit object RamTypeRead extends TokensReader.Simple[RamType] { + def shortName = "ramtype" + def read(strs: Seq[String]) = { + Right( + strs.head match { + case "p0rw" => p0rw + case "p0rp1w" => p0rp1w + case "p0rwp1rw" => p0rwp1rw + } + ) + } + } + + @main + case class T1RocketTileParameterMain( + @arg(name = "instructionSets") instructionSets: Seq[String], + @arg(name = "cacheBlockBytes") cacheBlockBytes: Int, + @arg(name = "nPMPs") nPMPs: Int, + @arg(name = "cacheable") cacheable: BitSet, + @arg(name = "sideEffects") sideEffects: BitSet, + @arg(name = "dcacheNSets") dcacheNSets: Int, + @arg(name = "dcacheNWays") dcacheNWays: Int, + @arg(name = "dcacheRowBits") dcacheRowBits: Int, + @arg(name = "iCacheNSets") iCacheNSets: Int, + @arg(name = "iCacheNWays") iCacheNWays: Int, + @arg(name = "iCachePrefetch") iCachePrefetch: Boolean, + @arg(name = "dLen") dLen: Int, + @arg(name = "vrfBankSize") vrfBankSize: Int, + @arg(name = "vrfRamType") vrfRamType: RamType + ) { + def convert: T1RocketTileParameter = T1RocketTileParameter( + instructionSets: Seq[String], + cacheBlockBytes: Int, + nPMPs: Int, + cacheable: BitSet, + sideEffects: BitSet, + dcacheNSets: Int, + dcacheNWays: Int, + dcacheRowBits: Int, + iCacheNSets: Int, + iCacheNWays: Int, + iCachePrefetch: Boolean, + dLen: Int, + vrfBankSize: Int, + vrfRamType: RamType + ) + } + + implicit def T1RocketTileParameterMainParser: ParserForClass[T1RocketTileParameterMain] = + ParserForClass[T1RocketTileParameterMain] + + @main + def config(@arg(name = "parameter") parameter: T1RocketTileParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[T1RocketTile, T1RocketTileParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/ipemu/src/AXI4SlaveAgent.scala b/ipemu/src/AXI4SlaveAgent.scala index 23b814cde..963b04ebf 100644 --- a/ipemu/src/AXI4SlaveAgent.scala +++ b/ipemu/src/AXI4SlaveAgent.scala @@ -30,6 +30,7 @@ class WritePayload(length: Int, dataWidth: Int) extends Bundle { val strb = Vec(length, UInt(math.max(8, dataWidth / 8).W)) } +// TODO: consider adding the latency of the read transaction class ReadPayload(length: Int,dataWidth: Int) extends Bundle { val data = Vec(length, UInt(dataWidth.W)) } @@ -121,7 +122,8 @@ class AXI4SlaveAgent(parameter: AXI4SlaveAgentParameter) channel.BVALID := awExist && wExist channel.BID := Mux(awIssued, awid, channel.AWID) channel.BRESP := 0.U(2.W) // OK - channel.BUSER := Mux(awIssued, awuser, channel.AWUSER) + channel.BUSER := DontCare + // TODO: add latency to the write transaction reply when(channel.BVALID && channel.BREADY) { RawClockedVoidFunctionCall(s"axi_write_${parameter.name}")( io.clock, diff --git a/ipemu/src/TestBench.scala b/ipemu/src/TestBench.scala index 21c3ad75a..c3c96b318 100644 --- a/ipemu/src/TestBench.scala +++ b/ipemu/src/TestBench.scala @@ -190,32 +190,24 @@ class TestBench(generator: SerializableModuleGenerator[T1, T1Parameter]) // Events for difftest and performance modeling - val laneProbes = dut.io.laneProbes.zipWithIndex.map { - case (p, idx) => - val wire = Wire(p.cloneType).suggestName(s"lane${idx}Probe") - wire := probe.read(p) - wire + // Probes + val laneProbes = t1Probe.laneProbes.zipWithIndex.map { + case (lane, i) => lane.suggestName(s"lane${i}Probe") } - val lsuProbe = probe.read(dut.io.lsuProbe).suggestName("lsuProbe") + val lsuProbe = t1Probe.lsuProbe.suggestName("lsuProbe") val storeUnitProbe = lsuProbe.storeUnitProbe.suggestName("storeUnitProbe") val otherUnitProbe = lsuProbe.otherUnitProbe.suggestName("otherUnitProbe") - val laneVrfProbes = dut.io.laneVrfProbes.zipWithIndex.map { - case (p, idx) => - val wire = Wire(p.cloneType).suggestName(s"lane${idx}VrfProbe") - wire := probe.read(p) - wire - } - // vrf write - laneVrfProbes.zipWithIndex.foreach { + laneProbes.zipWithIndex.foreach { case (lane, i) => - when(lane.valid)( + val vrf = lane.vrfProbe.suggestName(s"lane${i}VrfProbe") + when(vrf.valid)( printf( - cf"""{"event":"VrfWrite","issue_idx":${lane.requestInstruction},"vd":${lane.requestVd},"offset":${lane.requestOffset},"mask":"${lane.requestMask}%x","data":"${lane.requestData}%x","lane":$i,"cycle":${simulationTime}}\n""" + cf"""{"event":"VrfWrite","issue_idx":${vrf.requestInstruction},"vd":${vrf.requestVd},"offset":${vrf.requestOffset},"mask":"${vrf.requestMask}%x","data":"${vrf.requestData}%x","lane":$i,"cycle":${simulationTime}}\n""" ) ) } diff --git a/nix/overlay.nix b/nix/overlay.nix index 9306cffd7..bd590d879 100644 --- a/nix/overlay.nix +++ b/nix/overlay.nix @@ -19,7 +19,14 @@ rec { dramsim3 = final.callPackage ./pkgs/dramsim3.nix { }; libspike = final.callPackage ./pkgs/libspike.nix { }; libspike_interfaces = final.callPackage ../difftest/spike_interfaces { }; - buddy-mlir = final.callPackage ./pkgs/buddy-mlir.nix { }; + + # DynamoCompiler doesn't support python 3.12+ yet + buddy-mlir = final.callPackage ./pkgs/buddy-mlir.nix { python3 = final.python311; }; + buddy-mlir-pyenv = final.buddy-mlir.pythonModule.withPackages (ps: [ + final.buddy-mlir + ps.torch + ]); + fetchMillDeps = final.callPackage ./pkgs/mill-builder.nix { }; circt-full = final.callPackage ./pkgs/circt-full.nix { }; rvv-codegen = final.callPackage ./pkgs/rvv-codegen.nix { }; @@ -90,5 +97,36 @@ rec { }; }; + riscv-tests = final.pkgsCross.riscv32-embedded.stdenv.mkDerivation rec { + pname = "riscv-tests"; + version = "7878085d2546af0eb7af72a1df00996d5d8c43fb"; + src = final.fetchFromGitHub { + owner = "riscv-software-src"; + repo = "riscv-tests"; + rev = "${version}"; + hash = "sha256-CruSrXVO5Qlk63HPBVbwzl/RdxAAl2bknWawDHJwEKY="; + }; + + postUnpack = '' + rm -rf $sourceRoot/env + cp -r ${../tests/riscv-test-env} $sourceRoot/env + ''; + + enableParallelBuilding = true; + + configureFlags = [ + # to match rocket-tools path + "--prefix=${placeholder "out"}/riscv32-unknown-elf" + ]; + buildPhase = "make RISCV_PREFIX=riscv32-none-elf-"; + installPhase = '' + runHook preInstall + make install + mkdir -p $out/debug/ + cp debug/*.py $out/debug/ + runHook postInstall + ''; + }; + t1 = final.callPackage ./t1 { }; } diff --git a/nix/pkgs/buddy-llvm.nix b/nix/pkgs/buddy-llvm.nix new file mode 100644 index 000000000..af5bc1c86 --- /dev/null +++ b/nix/pkgs/buddy-llvm.nix @@ -0,0 +1,76 @@ +{ stdenv +, cmake +, ninja +, python3 +, fetchFromGitHub +}: + +let + pythonEnv = python3.withPackages (ps: [ + ps.numpy + ps.pybind11 + ps.pyyaml + ps.ml-dtypes + ]); +in +stdenv.mkDerivation rec { + name = "llvm-for-buddy-mlir"; + version = "6c59f0e1b0fb56c909ad7c9aad4bde37dc006ae0"; + src = fetchFromGitHub { + owner = "llvm"; + repo = "llvm-project"; + rev = version; + hash = "sha256-bMJJ2q1hSh7m0ewclHOmIe7lOHv110rz/P7D3pw8Uiw="; + }; + + requiredSystemFeatures = [ "big-parallel" ]; + + propagatedBuildInputs = [ + pythonEnv + ]; + + nativeBuildInputs = [ + cmake + ninja + ]; + + cmakeDir = "../llvm"; + cmakeFlags = [ + "-DLLVM_ENABLE_PROJECTS=mlir" + "-DLLVM_TARGETS_TO_BUILD=host;RISCV" + "-DLLVM_ENABLE_ASSERTIONS=ON" + "-DCMAKE_BUILD_TYPE=Release" + # required for MLIR python binding + "-DMLIR_ENABLE_BINDINGS_PYTHON=ON" + # required for not, FileCheck... + "-DLLVM_INSTALL_UTILS=ON" + ]; + + outputs = [ "out" "lib" "dev" ]; + + postInstall = '' + # buddy-mlir have custom RVV backend that required LLVM backend, + # and those LLVM backend headers require this config.h header file. + # However for LLVM, this config.h is meant to be used on build phase only, + # so it will not be installed for cmake install. + # We have to do some hack + cp -v "include/llvm/Config/config.h" "$dev/include/llvm/Config/config.h" + + # move llvm-config to $dev to resolve a circular dependency + moveToOutput "bin/llvm-config*" "$dev" + + # move all lib files to $lib except lib/cmake + moveToOutput "lib" "$lib" + moveToOutput "lib/cmake" "$dev" + + # patch configuration files so each path points to the new $lib or $dev paths + substituteInPlace "$dev/lib/cmake/llvm/LLVMConfig.cmake" \ + --replace 'set(LLVM_BINARY_DIR "''${LLVM_INSTALL_PREFIX}")' 'set(LLVM_BINARY_DIR "'"$lib"'")' + substituteInPlace \ + "$dev/lib/cmake/llvm/LLVMExports-release.cmake" \ + "$dev/lib/cmake/mlir/MLIRTargets-release.cmake" \ + --replace "\''${_IMPORT_PREFIX}/lib/lib" "$lib/lib/lib" \ + --replace "\''${_IMPORT_PREFIX}/lib/objects-Release" "$lib/lib/objects-Release" \ + --replace "$out/bin/llvm-config" "$dev/bin/llvm-config" # patch path for llvm-config + ''; +} diff --git a/nix/pkgs/buddy-mlir.nix b/nix/pkgs/buddy-mlir.nix index 8bd03422f..4ec3a4ff9 100644 --- a/nix/pkgs/buddy-mlir.nix +++ b/nix/pkgs/buddy-mlir.nix @@ -1,14 +1,16 @@ -{ cmake, ninja, python3, llvmPackages_17, fetchFromGitHub, fetchpatch }: +{ cmake +, ninja +, llvmPackages_17 +, fetchFromGitHub +, fetchpatch +, python3 +, callPackage +}: let stdenv = llvmPackages_17.stdenv; bintools = llvmPackages_17.bintools; - buddy-llvm = fetchFromGitHub { - owner = "llvm"; - repo = "llvm-project"; - rev = "6c59f0e1b0fb56c909ad7c9aad4bde37dc006ae0"; - hash = "sha256-bMJJ2q1hSh7m0ewclHOmIe7lOHv110rz/P7D3pw8Uiw="; - }; + buddy-llvm = callPackage ./buddy-llvm.nix { inherit stdenv python3; }; in stdenv.mkDerivation { pname = "buddy-mlir"; @@ -17,47 +19,39 @@ stdenv.mkDerivation { src = fetchFromGitHub { owner = "buddy-compiler"; repo = "buddy-mlir"; - rev = "ec8a17969b645f0a0c1a822ffb04192b236b5c88"; - hash = "sha256-3ecxei/nkx8sjgVkeQvZMaxr1CQXwhTz8aY1e0I3zBA="; + rev = "d7d90a488ac0d6fc1e700e932f842c7b2bcad816"; + hash = "sha256-MhykCa6Z7Z8PpAlNh+vMuWYEOZZDyWhtMzMnFlNbGIk="; }; - unpackPhase = '' - # We can only use one-step build now...buddy-mlir have bad build system that always - # assume the build artifacts are inside of the LLVM sources. And it also relies on - # some LLVM Cpp source that are configured to be installed by default. - cp -r ${buddy-llvm} llvm-project - cp -r $src buddy-mlir - - # Directories copied from nix store are read only - chmod -R u+w llvm-project buddy-mlir - ''; - sourceRoot = "llvm-project"; - - nativeBuildInputs = [ cmake ninja python3 bintools ]; - prePatch = "pushd ../buddy-mlir"; - patches = [ - (fetchpatch { - url = "https://github.com/buddy-compiler/buddy-mlir/pull/357.patch"; - hash = "sha256-ysPcHAkrFJDtHmWVo35Wz6ullIGsP1EedYdJCq4fRX4="; - }) + nativeBuildInputs = [ cmake ninja bintools ]; + buildInputs = [ + buddy-llvm ]; - postPatch = "popd"; - cmakeDir = "../llvm"; cmakeFlags = [ + "-DMLIR_DIR=${buddy-llvm.dev}/lib/cmake/mlir" + "-DLLVM_DIR=${buddy-llvm.dev}/lib/cmake/llvm" + "-DLLVM_MAIN_SRC_DIR=${buddy-llvm.src}/llvm" + "-DBUDDY_MLIR_ENABLE_PYTHON_PACKAGES=ON" "-DCMAKE_BUILD_TYPE=Release" - "-DLLVM_INSTALL_UTILS=ON" - "-DLLVM_ENABLE_PROJECTS=mlir" - "-DLLVM_TARGETS_TO_BUILD=host;RISCV" - "-DLLVM_ENABLE_ASSERTIONS=ON" - "-DLLVM_USE_LINKER=lld" - - "-DLLVM_EXTERNAL_PROJECTS=buddy-mlir" - "-DLLVM_EXTERNAL_BUDDY_MLIR_SOURCE_DIR=../../buddy-mlir" ]; - passthru.llvm = buddy-llvm; - # No need to do check, and it also takes too much time to finish. doCheck = false; + + # Here we concatenate the LLVM and Buddy python module into one directory for easier import + postFixup = '' + mkdir -p $out/lib/python${python3.pythonVersion}/site-packages + cp -vr $out/python_packages/buddy $out/lib/python${python3.pythonVersion}/site-packages/ + cp -vr ${buddy-llvm}/python_packages/mlir_core/mlir $out/lib/python${python3.pythonVersion}/site-packages/ + ''; + + passthru = { + llvm = buddy-llvm; + + # Below three fields are black magic that allow site-packages automatically imported with nixpkgs hooks + pythonModule = python3; + pythonPath = [ ]; + requiredPythonModules = [ ]; + }; } diff --git a/nix/pkgs/rvv-codegen.nix b/nix/pkgs/rvv-codegen.nix index 4e6d3ed7b..bffddec26 100644 --- a/nix/pkgs/rvv-codegen.nix +++ b/nix/pkgs/rvv-codegen.nix @@ -11,10 +11,10 @@ buildGoModule { pname = "riscv-vector-test"; version = "unstable-2023-04-12"; src = fetchFromGitHub { - owner = "ksco"; + owner = "chipsalliance"; repo = "riscv-vector-tests"; - rev = "bafa717d37b9bef3e80b66a50b01c22f532306bc"; - hash = "sha256-C91HUDyMykS3qM9h+rJ2uKAJcKHkoakw9I+wwtco0m8="; + rev = "caae5c8fcf465be73266f9b3bd672f71a362548e"; + hash = "sha256-388MKOO+g4PjR3BcxiA8vNY7itDcIhz88vZmMZkbsj8="; }; doCheck = false; vendorHash = "sha256-9cQlivpHg6IDYpmgBp34n6BR/I0FIYnmrXCuiGmAhNE="; diff --git a/nix/t1/_sources/generated.json b/nix/t1/_sources/generated.json index 20f84a5e5..26e124b0b 100644 --- a/nix/t1/_sources/generated.json +++ b/nix/t1/_sources/generated.json @@ -41,7 +41,7 @@ }, "chisel": { "cargoLocks": null, - "date": "2024-08-02", + "date": "2024-08-07", "extract": null, "name": "chisel", "passthru": null, @@ -53,11 +53,11 @@ "name": null, "owner": "chipsalliance", "repo": "chisel", - "rev": "557bc5064afb34124a39e9a3677f1e647306b3f6", - "sha256": "sha256-ZYk76WOd4OZrimiWRw6TV/QQ/zy3u9blwwqTAMRs5uk=", + "rev": "8572c28dbcc54a2f20ade462028ed50a2f3209b8", + "sha256": "sha256-j76XKUrUCI1g7fgcnaPWgU9KcI2a8T1jOSezOdRpkKI=", "type": "github" }, - "version": "557bc5064afb34124a39e9a3677f1e647306b3f6" + "version": "8572c28dbcc54a2f20ade462028ed50a2f3209b8" }, "chisel-interface": { "cargoLocks": null, diff --git a/nix/t1/_sources/generated.nix b/nix/t1/_sources/generated.nix index 2f4406549..f74d22375 100644 --- a/nix/t1/_sources/generated.nix +++ b/nix/t1/_sources/generated.nix @@ -27,15 +27,15 @@ }; chisel = { pname = "chisel"; - version = "557bc5064afb34124a39e9a3677f1e647306b3f6"; + version = "8572c28dbcc54a2f20ade462028ed50a2f3209b8"; src = fetchFromGitHub { owner = "chipsalliance"; repo = "chisel"; - rev = "557bc5064afb34124a39e9a3677f1e647306b3f6"; + rev = "8572c28dbcc54a2f20ade462028ed50a2f3209b8"; fetchSubmodules = false; - sha256 = "sha256-ZYk76WOd4OZrimiWRw6TV/QQ/zy3u9blwwqTAMRs5uk="; + sha256 = "sha256-j76XKUrUCI1g7fgcnaPWgU9KcI2a8T1jOSezOdRpkKI="; }; - date = "2024-08-02"; + date = "2024-08-07"; }; chisel-interface = { pname = "chisel-interface"; diff --git a/nix/t1/default.nix b/nix/t1/default.nix index 088cd98ae..33bf22df7 100644 --- a/nix/t1/default.nix +++ b/nix/t1/default.nix @@ -35,6 +35,10 @@ lib.makeScope newScope configgen = _millOutput.configgen // { meta.mainProgram = "configgen"; }; t1package = _millOutput.t1package; + rocketv = self.callPackage ../../rocketemu { }; + + t1rocketemu = self.callPackage ../../t1rocketemu { }; + omreader-unwrapped = self.callPackage ./omreader.nix { }; submodules = self.callPackage ./submodules.nix { }; diff --git a/nix/t1/t1.nix b/nix/t1/t1.nix index 7e1d75680..c4efaada7 100644 --- a/nix/t1/t1.nix +++ b/nix/t1/t1.nix @@ -29,6 +29,10 @@ let ./../../ipemu/src ./../../elaborator ./../../configgen/src + ./../../rocketv + ./../../t1rocket/src + ./../../t1rocketemu/src + ./../../rocketemu/src ]; }; @@ -61,6 +65,7 @@ let circt-full jextract-21 add-determinism + espresso makeWrapper passthru.millDeps.setupHook @@ -98,7 +103,7 @@ let mkdir -p $configgen/bin $elaborator/bin makeWrapper ${jdk21}/bin/java $configgen/bin/configgen --add-flags "-jar $out/share/java/configgen.jar" - makeWrapper ${jdk21}/bin/java $elaborator/bin/elaborator --add-flags "--enable-preview -Djava.library.path=${circt-full}/lib -jar $out/share/java/elaborator.jar" + makeWrapper ${jdk21}/bin/java $elaborator/bin/elaborator --add-flags "--enable-preview -Djava.library.path=${circt-full}/lib -cp $out/share/java/elaborator.jar org.chipsalliance.t1.elaborator.Main" ''; }; in diff --git a/rocketemu/.rustfmt.toml b/rocketemu/.rustfmt.toml new file mode 100644 index 000000000..7b6c82e24 --- /dev/null +++ b/rocketemu/.rustfmt.toml @@ -0,0 +1,4 @@ +hard_tabs = false +tab_spaces = 2 +chain_width = 100 +struct_lit_width = 50 \ No newline at end of file diff --git a/rocketemu/Cargo.lock b/rocketemu/Cargo.lock new file mode 100644 index 000000000..4f4c7afe7 --- /dev/null +++ b/rocketemu/Cargo.lock @@ -0,0 +1,634 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "anstream" +version = "0.6.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" + +[[package]] +name = "anstyle-parse" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8" +dependencies = [ + "anstyle", + "windows-sys", +] + +[[package]] +name = "anyhow" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" + +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clap" +version = "4.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35723e6a11662c2afb578bcf0b88bf6ea8e21282a953428f240574fcc3a2b5b3" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49eb96cbfa7cfa35017b7cd548c75b14c3118c98b423041d70562665e07fb0fa" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d029b67f89d30bbb547c89fd5161293c0aec155fc691d7924b64550662db93e" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" + +[[package]] +name = "colorchoice" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" + +[[package]] +name = "common" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "spike_rs", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "driver" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "elf", + "hex", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "elf" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4445909572dbd556c457c849c4ca58623d84b27c8fff1e74b0b4227d8b90d17b" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "libc" +version = "0.2.155" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" + +[[package]] +name = "libloading" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" +dependencies = [ + "cfg-if", + "windows-targets", +] + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata 0.1.10", +] + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "offline" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "common", + "libloading", + "num-bigint", + "serde", + "serde_json", + "spike_rs", + "tracing", + "tracing-subscriber", + "xmas-elf", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + +[[package]] +name = "pin-project-lite" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" + +[[package]] +name = "proc-macro2" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata 0.4.7", + "regex-syntax 0.8.4", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax 0.6.29", +] + +[[package]] +name = "regex-automata" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax 0.8.4", +] + +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + +[[package]] +name = "regex-syntax" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "serde" +version = "1.0.204" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.204" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.120" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e0d21c9a8cae1235ad58a00c11cb40d4b1e5c784f1ef2c537876ed6ffd8b7c5" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "spike_rs" +version = "0.1.0" +dependencies = [ + "anyhow", + "libc", + "tracing", + "xmas-elf", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thread_local" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" +dependencies = [ + "cfg-if", + "once_cell", +] + +[[package]] +name = "tracing" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "xmas-elf" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42c49817e78342f7f30a181573d82ff55b88a35f86ccaf07fc64b3008f56d1c6" +dependencies = [ + "zero", +] + +[[package]] +name = "zero" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fe21bcc34ca7fe6dd56cc2cb1261ea59d6b93620215aefb5ea6032265527784" diff --git a/rocketemu/Cargo.toml b/rocketemu/Cargo.toml new file mode 100644 index 000000000..b34153b2a --- /dev/null +++ b/rocketemu/Cargo.toml @@ -0,0 +1,23 @@ +[workspace] +resolver = "2" +members = [ + "test_common", + "spike_rs", + "offline", + "driver", +] +exclude = [ + "spike_interfaces" +] + +[workspace.package] +version = "0.1.0" + +[workspace.dependencies] +anyhow = "1.0.79" +clap = { version = "4.4.18", features = ["derive"] } +tracing = "0.1.40" +tracing-subscriber = { version = "0.3", features = ["env-filter", "ansi"] } +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +num-bigint = "0.4.6" diff --git a/rocketemu/default.nix b/rocketemu/default.nix new file mode 100644 index 000000000..99d9e3a58 --- /dev/null +++ b/rocketemu/default.nix @@ -0,0 +1,76 @@ +{ lib +, newScope +, rustPlatform +, libspike +, zlib +}: +let + configsDirectory = ../rocketv/configs; + # allConfigs is a (configName -> configJsonPath) map + allConfigs = lib.mapAttrs' + (fileName: fileType: + assert fileType == "regular" && lib.hasSuffix ".json" fileName; + lib.nameValuePair + (lib.removeSuffix ".json" fileName) + (lib.path.append configsDirectory fileName)) + (builtins.readDir configsDirectory); +in +lib.mapAttrs + (configName: configPath: ( + lib.makeScope newScope (scope: rec { + rocket-config = configPath; + mlirbc = scope.callPackage ./nix/mlirbc.nix { }; + rtl = scope.callPackage ./nix/rtl.nix { }; + verilated-csrc = scope.callPackage ./nix/verilated-csrc.nix { }; + + c-dpi-lib = scope.callPackage ./dpi { }; + + # FIXME: merge with difftest and put it under the nix/pkgs + spike_interfaces = scope.callPackage ../difftest/spike_interfaces { }; + + emu = rustPlatform.buildRustPackage { + name = "rocketemu"; + + src = with lib.fileset; toSource { + root = ./.; + fileset = unions [ + ./driver + ./offline + ./spike_rs + ./test_common + ./Cargo.lock + ./Cargo.toml + ]; + }; + + buildInputs = [ + zlib + spike_interfaces + ]; + + env = + let + toLib = drv: "${drv}/lib"; + in + { + ROCKET_DPI_DIR = toLib c-dpi-lib; + TESTBENCH_LIB_DIR = toLib verilated-csrc; + SPIKE_LIB_DIR = toLib libspike; + SPIKE_INTERFACES_LIB_DIR = toLib spike_interfaces; + }; + + cargoLock = { + lockFile = ./Cargo.lock; + }; + + outputs = [ "out" "driver" "offline" ]; + + postInstall = '' + mkdir -p $driver/bin $offline/bin + ln -s $out/bin/driver $driver/bin/driver + ln -s $out/bin/offline $driver/bin/offline + ''; + }; + }) + )) # end of mapAttr + allConfigs diff --git a/rocketemu/dpi/CMakeLists.txt b/rocketemu/dpi/CMakeLists.txt new file mode 100644 index 000000000..7579134b2 --- /dev/null +++ b/rocketemu/dpi/CMakeLists.txt @@ -0,0 +1,52 @@ +cmake_minimum_required(VERSION 3.20) +project(rocket_dpi_c) +set(CMAKE_CXX_STANDARD 17) + +message(STATUS "Project '${PROJECT_NAME}' build type: ${CMAKE_BUILD_TYPE}") + +set(THREADS_PREFER_PTHREAD_FLAG ON) + +add_library(dpi + STATIC + dpi.cc +) + +add_library(dpi_pre_link + STATIC + dpi_pre_link.cc +) + +if (NOT DEFINED VERILATED_LIB_DIR) + set(VERILATED_LIB_DIR "$ENV{VERILATED_LIB_DIR}") + if (VERILATED_LIB_DIR STREQUAL "") + message(FATAL_ERROR "You should specify verilated libs via -DVERILATE_LIB_DIR or environment variable VERILATED_LIB_DIR, but it seems not") + endif() +endif() + +if (NOT DEFINED VERILATED_INC_DIR) + set(VERILATED_INC_DIR "$ENV{VERILATED_INC_DIR}") + if (VERILATED_INC_DIR STREQUAL "") + message(FATAL_ERROR "You should specify verilated libs via -DVERILATED_INC_DIR or environment variable VERILATED_INC_DIR, but it seems not") + endif() +endif() + +# include verilated headers +target_include_directories(dpi PUBLIC ${VERILATED_INC_DIR}) +target_include_directories(dpi PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_include_directories(dpi_pre_link PUBLIC ${VERILATED_INC_DIR}) +target_include_directories(dpi_pre_link PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + +# include verilator headers +find_package(verilator REQUIRED) +message(STATUS "Found verilator: ${verilator_DIR}") +target_include_directories(dpi PUBLIC ${verilator_DIR}/include) +target_include_directories(dpi PUBLIC ${verilator_DIR}/include/vltstd) +target_include_directories(dpi_pre_link PUBLIC ${verilator_DIR}/include) +target_include_directories(dpi_pre_link PUBLIC ${verilator_DIR}/include/vltstd) + +if(DEFINED VM_TRACE) + target_compile_definitions(dpi PRIVATE VM_TRACE=1) + target_compile_definitions(dpi_pre_link PRIVATE VM_TRACE=1) +endif() + +install(TARGETS dpi dpi_pre_link ARCHIVE) diff --git a/rocketemu/dpi/default.nix b/rocketemu/dpi/default.nix new file mode 100644 index 000000000..6c4e1faff --- /dev/null +++ b/rocketemu/dpi/default.nix @@ -0,0 +1,27 @@ +{ lib +, verilator +, stdenv +, cmake +, ninja +, verilated-csrc +}: +stdenv.mkDerivation { + name = "rocketv-emulator"; + + src = ./.; + + nativeBuildInputs = [ + cmake + ninja + verilator + ]; + + cmakeFlags = lib.optionals verilated-csrc.enable-trace [ + "-DVM_TRACE=ON" + ]; + + env = { + VERILATED_INC_DIR = "${verilated-csrc}/include"; + VERILATED_LIB_DIR = "${verilated-csrc}/lib"; + }; +} diff --git a/rocketemu/dpi/dpi.cc b/rocketemu/dpi/dpi.cc new file mode 100644 index 000000000..c1f6e403b --- /dev/null +++ b/rocketemu/dpi/dpi.cc @@ -0,0 +1,64 @@ +// This file includes DPI call implementatitons + +#include "svdpi.h" + +#include "dpi.h" + +extern "C" { + +void *dpi_call_target; + +/// evaluate after AW and W is finished at corresponding channel_id. +void axi_write_loadStoreAXI(long long channel_id, long long awid, + long long awaddr, long long awlen, long long awsize, + long long awburst, long long awlock, + long long awcache, long long awprot, + long long awqos, long long awregion, + /// struct packed {bit [255:0][DLEN:0] data; + /// bit [255:0][DLEN/8:0] strb; } payload + const svBitVecVal *payload) { + axi_write_loadStoreAXI_rs(dpi_call_target, channel_id, awid, awaddr, awlen, + awsize, awburst, awlock, awcache, awprot, awqos, + awregion, payload); +}; + +/// evaluate at AR fire at corresponding channel_id. +void axi_read_loadStoreAXI( + long long channel_id, long long arid, long long araddr, long long arlen, + long long arsize, long long arburst, long long arlock, long long arcache, + long long arprot, long long arqos, long long arregion, + /// struct packed {bit [255:0][DLEN:0] data; byte beats; } payload + svBitVecVal *payload) { + axi_read_loadStoreAXI_rs(dpi_call_target, channel_id, arid, araddr, arlen, + arsize, arburst, arlock, arcache, arprot, arqos, + arregion, payload); +}; + +/// evaluate at AR fire at corresponding channel_id. +void axi_read_instructionFetchAXI( + long long channel_id, long long arid, long long araddr, long long arlen, + long long arsize, long long arburst, long long arlock, long long arcache, + long long arprot, long long arqos, long long arregion, + /// struct packed {bit [255:0][31:0] data; byte beats; } payload + svBitVecVal *payload) { + axi_read_instructionFetchAXI_rs(dpi_call_target, channel_id, arid, araddr, + arlen, arsize, arburst, arlock, arcache, + arprot, arqos, arregion, payload); +}; + +/// evaluate after reset, and will only be called once returning *call_init = +/// true. +void cosim_init() { dpi_call_target = cosim_init_rs(); } + +/// dynamically set resetvector according to the payload +void get_resetvector(long long *resetvector) { + get_resetvector_rs(dpi_call_target, resetvector); +} + +/// evaluate at every 1024 cycles, return reason = 0 to continue simulation, +/// other value is used as error code. +void cosim_watchdog(char *reason) { + cosim_watchdog_rs(dpi_call_target, reason); +} + +} // extern "C" diff --git a/rocketemu/dpi/dpi.h b/rocketemu/dpi/dpi.h new file mode 100644 index 000000000..6f38b1639 --- /dev/null +++ b/rocketemu/dpi/dpi.h @@ -0,0 +1,55 @@ +// This file includes DPI calls to be implemented in Rust + +#pragma once + +#include "svdpi.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern void *dpi_call_target; + +/// evaluate after AW and W is finished at corresponding channel_id. +extern void +axi_write_loadStoreAXI_rs(void *dpi_call_target, long long channel_id, + long long awid, long long awaddr, long long awlen, + long long awsize, long long awburst, long long awlock, + long long awcache, long long awprot, long long awqos, + long long awregion, + /// struct packed {bit [255:0][DLEN:0] data; bit + /// [255:0][DLEN/8:0] strb; } payload + const svBitVecVal *payload); + +/// evaluate at AR fire at corresponding channel_id. +extern void axi_read_loadStoreAXI_rs( + void *dpi_call_target, long long channel_id, long long arid, + long long araddr, long long arlen, long long arsize, long long arburst, + long long arlock, long long arcache, long long arprot, long long arqos, + long long arregion, + /// struct packed {bit [255:0][DLEN:0] data; byte beats; } payload + svBitVecVal *payload); + +/// evaluate at AR fire at corresponding channel_id. +extern void axi_read_instructionFetchAXI_rs( + void *dpi_call_target, long long channel_id, long long arid, + long long araddr, long long arlen, long long arsize, long long arburst, + long long arlock, long long arcache, long long arprot, long long arqos, + long long arregion, + /// struct packed {bit [255:0][31:0] data; byte beats; } payload + svBitVecVal *payload); + +/// evaluate after reset, and will only be called once returning *call_init = +/// true. returns dpi call target +extern void *cosim_init_rs(); + +/// evaluate after reset, return the reset vector +extern void *get_resetvector_rs(void *dpi_call_target, long long *resetvector); + +/// evaluate at every 1024 cycles, return reason = 0 to continue simulation, +/// other value is used as error code. +extern void cosim_watchdog_rs(void *dpi_call_target, char *reason); + +#ifdef __cplusplus +} +#endif diff --git a/rocketemu/dpi/dpi_pre_link.cc b/rocketemu/dpi/dpi_pre_link.cc new file mode 100644 index 000000000..e35b4dccd --- /dev/null +++ b/rocketemu/dpi/dpi_pre_link.cc @@ -0,0 +1,67 @@ +#include +#include + +#include "dpi_pre_link.h" + +class VTestBench; + +VerilatedContext *contextp; +VTestBench *topp; + +bool quit; + +void quit_c() { + quit = true; +} + +int verilator_main_c(int argc, char **argv) { + // Setup context, defaults, and parse command line + Verilated::debug(0); + contextp = new VerilatedContext(); + contextp->fatalOnError(false); + contextp->commandArgs(argc, argv); + + // Set quit flag, true means quit + quit = false; + + // Construct the Verilated model, from Vtop.h generated from Verilating + topp = new VTestBench(contextp); + + // Simulate until $finish + while (!contextp->gotFinish() && !quit) { + // Evaluate model + topp->eval(); + // Advance time + if (!topp->eventsPending()) + break; + contextp->time(topp->nextTimeSlot()); + } + + if (!contextp->gotFinish()) { + VL_DEBUG_IF(VL_PRINTF("+ Exiting without $finish; no events left\n");); + } + + // Final model cleanup + topp->final(); + + delete topp; + delete contextp; + + return 0; +} + +#ifdef VM_TRACE +void dump_wave_c(char *path) { + Verilated::traceEverOn(true); + svSetScope(svGetScopeFromName("TOP.TestBench.clockGen")); + dump_wave(path); +} +#endif + +uint64_t get_t_c() { + if (contextp) { + return contextp->time(); + } else { // before ctx is initialized + return 0; + } +} diff --git a/rocketemu/dpi/dpi_pre_link.h b/rocketemu/dpi/dpi_pre_link.h new file mode 100644 index 000000000..94d8bd51a --- /dev/null +++ b/rocketemu/dpi/dpi_pre_link.h @@ -0,0 +1,26 @@ +// This header provides seveal functions to be used in Rust +// +// dpi_pre_link should be linked before libverilated.so because in +// uses symbols in libverilated.so + +#pragma once + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +int verilator_main_c(int argc, char **argv); + +void quit_c(); + +#ifdef VM_TRACE +void dump_wave_c(char *path); +#endif + +uint64_t get_t_c(); + +#ifdef __cplusplus +} +#endif diff --git a/rocketemu/driver/.rustfmt.toml b/rocketemu/driver/.rustfmt.toml new file mode 100644 index 000000000..bf1a32fd3 --- /dev/null +++ b/rocketemu/driver/.rustfmt.toml @@ -0,0 +1,4 @@ +hard_tabs = false +tab_spaces = 2 +chain_width = 100 +struct_lit_width = 50 diff --git a/rocketemu/driver/Cargo.lock b/rocketemu/driver/Cargo.lock new file mode 100644 index 000000000..2e745aead --- /dev/null +++ b/rocketemu/driver/Cargo.lock @@ -0,0 +1,488 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "anstream" +version = "0.6.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" + +[[package]] +name = "anstyle-parse" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad186efb764318d35165f1758e7dcef3b10628e26d41a44bc5550652e6804391" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19" +dependencies = [ + "anstyle", + "windows-sys", +] + +[[package]] +name = "anyhow" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clap" +version = "4.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64acc1846d54c1fe936a78dc189c34e28d3f5afc348403f28ecf53660b9b8462" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fb8393d67ba2e7bfaf28a23458e4e2b543cc73a99595511eb207fdb8aede942" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bac35c6dafb060fd4d275d9a4ffae97917c13a6327903a8be2153cd964f7085" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70" + +[[package]] +name = "colorchoice" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422" + +[[package]] +name = "driver" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "elf", + "hex", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "elf" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4445909572dbd556c457c849c4ca58623d84b27c8fff1e74b0b4227d8b90d17b" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata 0.1.10", +] + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + +[[package]] +name = "pin-project-lite" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" + +[[package]] +name = "proc-macro2" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata 0.4.7", + "regex-syntax 0.8.4", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax 0.6.29", +] + +[[package]] +name = "regex-automata" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax 0.8.4", +] + +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + +[[package]] +name = "regex-syntax" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f0209b68b3613b093e0ec905354eccaedcfe83b8cb37cbdeae64026c3064c16" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thread_local" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" +dependencies = [ + "cfg-if", + "once_cell", +] + +[[package]] +name = "tracing" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" diff --git a/rocketemu/driver/Cargo.toml b/rocketemu/driver/Cargo.toml new file mode 100644 index 000000000..bc0e7bec4 --- /dev/null +++ b/rocketemu/driver/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "driver" +version = "0.1.0" +edition = "2021" + +[dependencies] +clap = { workspace = true } +tracing = { workspace = true } +tracing-subscriber = { workspace = true } +anyhow = { workspace = true } + +elf = "0.7.4" +hex = "0.4.3" + +[features] +trace = [] diff --git a/rocketemu/driver/build.rs b/rocketemu/driver/build.rs new file mode 100644 index 000000000..748eeea21 --- /dev/null +++ b/rocketemu/driver/build.rs @@ -0,0 +1,20 @@ +fn main() { + const SEARCH_DIRS: [&str; 2] = ["ROCKET_DPI_DIR", "TESTBENCH_LIB_DIR"]; + SEARCH_DIRS.iter().for_each(|env| { + let dir = + std::env::var(env).unwrap_or_else(|_| panic!("ERROR: {} environment variable not set", &env)); + println!("cargo:rustc-link-search=native={}", &dir); + println!("cargo:rerun-if-env-changed={}", env); + }); + + // link order matters! + // verilator_main <- VTestBench <-- verilated <- dpi_c <- stdc++ + // verilated <- libz + // that's why we must split verilator_main and dpi_c + println!("cargo:rustc-link-lib=static=dpi_pre_link"); + println!("cargo:rustc-link-lib=static=VTestBench"); + println!("cargo:rustc-link-lib=static=verilated"); + println!("cargo:rustc-link-lib=static=dpi"); + println!("cargo:rustc-link-lib=static=stdc++"); + println!("cargo:rustc-link-lib=dylib=z"); +} diff --git a/rocketemu/driver/default.nix b/rocketemu/driver/default.nix new file mode 100644 index 000000000..88134307a --- /dev/null +++ b/rocketemu/driver/default.nix @@ -0,0 +1,33 @@ +{ rustPlatform +, c-dpi-lib +, rocketv-verilated-csrc +, zlib +, rust-analyzer +, rustfmt +}: +let + self = rustPlatform.buildRustPackage { + name = "rocket-driver"; + + src = ./.; + + cargoLock = { + lockFile = ./Cargo.lock; + }; + + buildInputs = [ zlib ]; + + env = { + ROCKET_DPI_DIR = toString c-dpi-lib; + TESTBENCH_LIB_DIR = toString rocketv-verilated-csrc; + }; + + passthru.devShell = self.overrideAttrs (old: { + nativeBuildInputs = old.nativeBuildInputs ++ [ + rust-analyzer + rustfmt + ]; + }); + }; +in +self diff --git a/rocketemu/driver/src/dpi.rs b/rocketemu/driver/src/dpi.rs new file mode 100644 index 000000000..48ec129ac --- /dev/null +++ b/rocketemu/driver/src/dpi.rs @@ -0,0 +1,237 @@ +#![allow(non_snake_case)] +#![allow(unused_variables)] + +use clap::Parser; +use std::ffi::{c_char, c_int, c_longlong, CString}; +use std::ptr; +use tracing::debug; + +use crate::sim::{SimulationArgs, Simulator}; + +pub type SvScalar = u8; +pub type SvBit = SvScalar; +pub type SvBitVecVal = u32; + +// -------------------------- +// preparing data structures +// -------------------------- + +///! Read 2^aw_size from *payload, and split it at dlen/16. +///! +///! Return (strobe in bit, data in byte) +unsafe fn load_from_payload( + payload: &*const SvBitVecVal, + aw_size: c_longlong, + data_width: usize, + dlen: usize, +) -> (Vec, &[u8]) { + let src = *payload as *mut u8; + let data_width_in_byte = dlen / 8; + let strb_width_in_byte = dlen / data_width; + let payload_size_in_byte = strb_width_in_byte + data_width_in_byte; // data width in byte + let byte_vec = std::slice::from_raw_parts(src, payload_size_in_byte); + let strobe = &byte_vec[0..strb_width_in_byte]; + let data = &byte_vec[strb_width_in_byte..]; + + let strb_width_in_bit = data_width / 8; + let masks: Vec = strobe + .into_iter() + .flat_map(|strb| { + let mask: Vec = (0..strb_width_in_bit).map(|i| (strb & (1 << i)) != 0).collect(); + mask + }) + .collect(); + assert!( + masks.len() == data.len(), + "strobe bit width is not aligned with data byte width" + ); + + debug!( + "load {payload_size_in_byte} byte from payload: raw_data={} strb={} data={}", + hex::encode(byte_vec), + hex::encode(strobe), + hex::encode(data), + ); + + (masks, data) +} + +fn write_to_pointer(dst: *mut u8, data: &[u8], n: usize) { + unsafe { + for i in 0..n { + ptr::write(dst.add(i), data[i]); + } + } +} + +unsafe fn fill_axi_read_payload(dst: *mut SvBitVecVal, dlen: u32, data: &[u8]) { + let data_len = (256 / 8) * dlen as usize; + assert!(data.len() <= data_len); + let dst = dst as *mut u8; + write_to_pointer(dst, data, data.len()); +} + +//---------------------- +// dpi functions +//---------------------- + +#[no_mangle] +unsafe extern "C" fn axi_write_loadStoreAXI_rs( + target: *mut (), + channel_id: c_longlong, + awid: c_longlong, + awaddr: c_longlong, + awlen: c_longlong, + awsize: c_longlong, + awburst: c_longlong, + awlock: c_longlong, + awcache: c_longlong, + awprot: c_longlong, + awqos: c_longlong, + awregion: c_longlong, + payload: *const SvBitVecVal, +) { + debug!( + "axi_write_loadStoreAXI (channel_id={channel_id}, awid={awid}, awaddr={awaddr:#x}, \ + awlen={awlen}, awsize=2^{awsize}, awburst={awburst}, awlock={awlock}, awcache={awcache}, \ + awprot={awprot}, awqos={awqos}, awregion={awregion})" + ); + + let sim = &mut *(target as *mut Simulator); + let data_width = 32; // TODO: get from sim + let (strobe, data) = load_from_payload(&payload, 1 << awsize, data_width, sim.dlen as usize); + sim.axi_write(awaddr as u32, &strobe, data); +} + +#[no_mangle] +unsafe extern "C" fn axi_read_loadStoreAXI_rs( + target: *mut (), + channel_id: c_longlong, + arid: c_longlong, + araddr: c_longlong, + arlen: c_longlong, + arsize: c_longlong, + arburst: c_longlong, + arlock: c_longlong, + arcache: c_longlong, + arprot: c_longlong, + arqos: c_longlong, + arregion: c_longlong, + payload: *mut SvBitVecVal, +) { + debug!( + "axi_read_loadStoreAXI (channel_id={channel_id}, arid={arid}, araddr={araddr:#x}, \ + arlen={arlen}, arsize={arsize}, arburst={arburst}, arlock={arlock}, arcache={arcache}, \ + arprot={arprot}, arqos={arqos}, arregion={arregion})" + ); + let sim = &mut *(target as *mut Simulator); + let response = sim.axi_read(araddr as u32, arsize as u64); + fill_axi_read_payload(payload, sim.dlen, &response.data); +} + +#[no_mangle] +unsafe extern "C" fn axi_read_instructionFetchAXI_rs( + target: *mut (), + channel_id: c_longlong, + arid: c_longlong, + araddr: c_longlong, + arlen: c_longlong, + arsize: c_longlong, + arburst: c_longlong, + arlock: c_longlong, + arcache: c_longlong, + arprot: c_longlong, + arqos: c_longlong, + arregion: c_longlong, + payload: *mut SvBitVecVal, +) { + debug!( + "axi_read_instructionFetchAXI (channel_id={channel_id}, arid={arid}, araddr={araddr:#x}, \ + arlen={arlen}, arsize={arsize}, arburst={arburst}, arlock={arlock}, arcache={arcache}, \ + arprot={arprot}, arqos={arqos}, arregion={arregion})" + ); + let sim = &mut *(target as *mut Simulator); + let response = sim.axi_read(araddr as u32, arsize as u64); + fill_axi_read_payload(payload, sim.dlen, &response.data); +} + +#[no_mangle] +unsafe extern "C" fn cosim_init_rs(call_init: *mut SvBit) -> *mut () { + let args = SimulationArgs::parse(); + *call_init = 1; + let sim = Box::new(Simulator::new(args)); + Box::into_raw(sim) as *mut () +} + +#[no_mangle] +unsafe extern "C" fn get_resetvector_rs(target: *mut (), resetvector: *mut c_longlong) { + if !target.is_null() { + let sim = &mut *(target as *mut Simulator); + *resetvector = sim.e_entry as c_longlong + } +} + +#[no_mangle] +unsafe extern "C" fn cosim_watchdog_rs(target: *mut (), reason: *mut c_char) { + // watchdog dpi call would be called before initialization, guard on null target + if !target.is_null() { + let sim = &mut *(target as *mut Simulator); + *reason = sim.watchdog() as c_char + } +} + +//-------------------------------- +// import functions and wrappers +//-------------------------------- + +#[link(name = "dpi_pre_link")] +extern "C" { + fn verilator_main_c(argc: c_int, argv: *mut *mut c_char) -> c_int; + + fn quit_c(); + + #[cfg(feature = "trace")] + fn dump_wave_c(path: *const c_char); + + fn get_t_c() -> u64; +} + +// FIXME: currently we are using verilator context_p as simulation time. +// But we should implement read cycle at TestBench top +pub(crate) fn get_t() -> u64 { + unsafe { get_t_c() / 20 } +} + +pub(crate) fn quit() { + unsafe { + quit_c(); + } +} + +pub(crate) fn verilator_main() { + let mut c_args_ptr: Vec<*mut c_char> = std::env::args() + .collect::>() + .iter() + .map(|arg| CString::new(arg.as_str()).unwrap()) + .map(|arg| arg.as_ptr() as *mut c_char) + .collect(); + + c_args_ptr.push(ptr::null_mut()); + + let argc = std::env::args().len() as c_int; + + let argv = c_args_ptr.as_mut_ptr(); + + unsafe { + verilator_main_c(argc, argv); + } +} + +#[cfg(feature = "trace")] +pub(crate) fn dump_wave(path: &str) { + let path_cstring = CString::new(path).unwrap(); + let path_ptr: *const c_char = path_cstring.as_ptr(); + unsafe { + dump_wave_c(path_ptr); + } +} diff --git a/rocketemu/driver/src/main.rs b/rocketemu/driver/src/main.rs new file mode 100644 index 000000000..579e89c6a --- /dev/null +++ b/rocketemu/driver/src/main.rs @@ -0,0 +1,6 @@ +mod dpi; +mod sim; + +fn main() { + dpi::verilator_main(); +} diff --git a/rocketemu/driver/src/sim.rs b/rocketemu/driver/src/sim.rs new file mode 100644 index 000000000..0420a7f8c --- /dev/null +++ b/rocketemu/driver/src/sim.rs @@ -0,0 +1,331 @@ +#[cfg(feature = "trace")] +use crate::dpi::dump_wave; +use crate::dpi::get_t; +use crate::dpi::quit; + +use clap::{arg, Parser}; +use std::collections::HashMap; +use std::os::unix::fs::FileExt; +use std::{ + fs, + path::{Path, PathBuf}, +}; +use tracing::{debug, error, info, trace}; + +use anyhow::Context; +use elf::abi::STT_FUNC; +use elf::{ + abi::{EM_RISCV, ET_EXEC, PT_LOAD}, + endian::LittleEndian, + ElfStream, +}; + +pub(crate) struct AxiReadPayload { + pub(crate) data: Vec, +} + +const EXIT_POS: u32 = 0x4000_0000; +const EXIT_CODE: u32 = 0xdead_beef; + +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +pub struct SimulationArgs { + /// Path to the ELF file + #[arg(long)] + pub elf_file: PathBuf, + + /// Path to the log file + #[arg(long)] + pub log_file: Option, + + /// Log level: trace, debug, info, warn, error + #[arg(long, default_value = "info")] + pub log_level: String, + + /// The timeout value + #[arg(long, default_value_t = 1_0000)] + pub timeout: u64, + + #[cfg(feature = "trace")] + #[arg(long)] + pub wave_path: String, + + #[cfg(feature = "trace")] + #[arg(long, default_value = "")] + pub dump_range: String, +} + +impl SimulationArgs { + #[cfg(feature = "trace")] + fn parse_range(&self) -> (u64, u64) { + let input = &self.dump_range; + + if input.is_empty() { + return (0, 0); + } + + let parts: Vec<&str> = input.split(",").collect(); + + if parts.len() != 1 && parts.len() != 2 { + error!("invalid dump wave range: `{input}` was given"); + return (0, 0); + } + + const INVALID_NUMBER: &'static str = "invalid number"; + + if parts.len() == 1 { + return (parts[0].parse().expect(INVALID_NUMBER), 0); + } + + if parts[0].is_empty() { + return (0, parts[1].parse().expect(INVALID_NUMBER)); + } + + let start = parts[0].parse().expect(INVALID_NUMBER); + let end = parts[1].parse().expect(INVALID_NUMBER); + if start > end { + panic!("dump start is larger than end: `{input}`"); + } + + (start, end) + } +} + +// FIXME: fix FunctionSym +#[derive(Debug)] +#[allow(dead_code)] +pub struct FunctionSym { + #[allow(dead_code)] + pub(crate) name: String, + #[allow(dead_code)] + pub(crate) info: u8, +} +pub type FunctionSymTab = HashMap; + +// NOTE: make it configurable from cmd line? +const SIM_MEM_SIZE: usize = 1usize << 32; + +#[derive(Debug)] +pub struct Simulator { + pub(crate) mem: Vec, + #[allow(dead_code)] + pub(crate) fn_sym_tab: FunctionSymTab, + pub(crate) dlen: u32, + pub(crate) timeout: u64, + pub(crate) e_entry: u64, + + #[cfg(feature = "trace")] + wave_path: String, + #[cfg(feature = "trace")] + dump_start: u64, + #[cfg(feature = "trace")] + dump_end: u64, + #[cfg(feature = "trace")] + dump_started: bool, +} + +pub static WATCHDOG_CONTINUE: u8 = 0; +pub static WATCHDOG_TIMEOUT: u8 = 1; + +impl Simulator { + pub fn new(args: SimulationArgs) -> Self { + let log_level: tracing::Level = args.log_level.parse().expect("fail to parse LOG level"); + let global_logger = tracing_subscriber::FmtSubscriber::builder() + .with_env_filter(tracing_subscriber::EnvFilter::from_default_env()) + .with_max_level(log_level) + .without_time() + .with_target(false) + .with_ansi(true) + .compact() + .finish(); + tracing::subscriber::set_global_default(global_logger) + .expect("internal error: fail to setup log subscriber"); + + // pass e_entry to rocket + let (e_entry, mem, fn_sym_tab) = + Self::load_elf(&args.elf_file).expect("fail creating simulator"); + + #[cfg(feature = "trace")] + let (dump_start, dump_end) = args.parse_range(); + + Self { + mem, + fn_sym_tab, + timeout: args.timeout, + dlen: option_env!("DESIGN_DLEN") + .map(|dlen| dlen.parse().expect("fail to parse dlen into u32 digit")) + .unwrap_or(256), + e_entry: e_entry, + + #[cfg(feature = "trace")] + wave_path: args.wave_path.to_owned(), + #[cfg(feature = "trace")] + dump_start, + #[cfg(feature = "trace")] + dump_end, + #[cfg(feature = "trace")] + dump_started: false, + } + } + + // FIXME: In current implementation, all the ELF sections are read without considering bytes order. + // We might want to take care of those information with lenntoho to convert them into host byte. + // The *elf* crate hopefully will handle this for us, but I don't do further investigation yet. (assign to @Avimitin) + pub fn load_elf(path: &Path) -> anyhow::Result<(u64, Vec, FunctionSymTab)> { + let file = fs::File::open(path).with_context(|| "reading ELF file")?; + let mut elf: ElfStream = + ElfStream::open_stream(&file).with_context(|| "parsing ELF file")?; + + if elf.ehdr.e_machine != EM_RISCV { + anyhow::bail!("ELF is not in RISC-V"); + } + + if elf.ehdr.e_type != ET_EXEC { + anyhow::bail!("ELF is not an executable"); + } + + if elf.ehdr.e_phnum == 0 { + anyhow::bail!("ELF has zero size program header"); + } + + debug!("ELF entry: 0x{:x}", elf.ehdr.e_entry); + // FIXME: + // 1. If we use reduce map, collecting spartial memory into a whole big one, + // instead of manipulating mutable memory, does it affect runtime overhead? + // Does rustc help us optimize this operation? + // 2. The default ProgramHeader use u64 for Elf32_phdr and Elf64_phdr, can we optimize this or + // just let it go. + let mut mem: Vec = vec![0; SIM_MEM_SIZE]; + elf.segments().iter().filter(|phdr| phdr.p_type == PT_LOAD).for_each(|phdr| { + let vaddr: usize = phdr.p_vaddr.try_into().expect("fail converting vaddr(u64) to usize"); + let filesz: usize = phdr.p_filesz.try_into().expect("fail converting p_filesz(u64) to usize"); + debug!( + "Read loadable segments 0x{:x}..0x{:x} to memory 0x{:x}", + phdr.p_offset, + phdr.p_offset + filesz as u64, + vaddr + ); + // Load file start from offset into given mem slice + // The `offset` of the read_at method is relative to the start of the file and thus independent from the current cursor. + file.read_at(&mut mem[vaddr..vaddr + filesz], phdr.p_offset).unwrap_or_else(|err| { + panic!( + "fail reading ELF into mem with vaddr={}, filesz={}, offset={}. Error detail: {}", + vaddr, filesz, phdr.p_offset, err + ) + }); + }); + + // FIXME: now the symbol table doesn't contain any function value + let mut fn_sym_tab = FunctionSymTab::new(); + let symbol_table = + elf.symbol_table().with_context(|| "reading symbol table(SHT_SYMTAB) from ELF")?; + if let Some((parsed_table, string_table)) = symbol_table { + parsed_table + .iter() + // st_symtype = symbol.st_info & 0xf (But why masking here?) + .filter(|sym| sym.st_symtype() == STT_FUNC) + .for_each(|sym| { + let name = string_table + .get(sym.st_name as usize) + .unwrap_or_else(|_| panic!("fail to get name at st_name={}", sym.st_name)); + fn_sym_tab.insert( + sym.st_value, + FunctionSym { name: name.to_string(), info: sym.st_symtype() }, + ); + }); + } else { + debug!("load_elf: symtab not found"); + }; + + Ok((elf.ehdr.e_entry, mem, fn_sym_tab)) + } + + fn write_mem(&mut self, addr: u32, alignment_bytes: u32, masks: &[bool], data: &[u8]) { + // early return with strobe 0 write + if !masks.iter().any(|&x| x) { + return; + } + let size = data.len() as u32; + // debug!("[{}] write_mem: size={size}, addr={addr:#x}", get_t()); + assert!( + (addr % size == 0 || addr % alignment_bytes == 0) && size >= alignment_bytes, + "unaligned write access addr={addr} size={size}bytes dlen={alignment_bytes}bytes" + ); + + masks.iter().enumerate().filter(|(_, &m)| m).for_each(|(i, _)| { + self.mem[addr as usize + i] = data[i]; + }); + } + + pub fn axi_write(&mut self, addr: u32, strobe: &[bool], data: &[u8]) { + // panic on misalign mask and data + assert_eq!( + strobe.len(), + data.len(), + "[{}] axi_write: strobe size is not equal to data size", + get_t() + ); + let data_hex = hex::encode(data); + info!("[{}] axi_write (addr={addr:#x}, data={data_hex})", get_t()); + + if addr == EXIT_POS && data.len() >= 4 { + let exit_code = u32::from_le_bytes([data[0], data[1], data[2], data[3]]); + if exit_code == EXIT_CODE { + info!("exit with code: {:x?}", exit_code); + quit(); + return; + } + } + + self.write_mem(addr, self.dlen / 8, strobe, data); + } + + fn read_mem(&mut self, addr: u32, size: u32) -> Vec { + assert!( + addr % size == 0, + "unaligned access addr={addr} size={size}bytes" + ); + // debug!("[{}] read_mem: size={size}, addr={addr:#x}", get_t()); + + (0..size).map(|i| self.mem[(addr + i) as usize]).collect() + } + + pub fn axi_read(&mut self, addr: u32, arsize: u64) -> AxiReadPayload { + let size = 1 << arsize; // size in bytes + let data = self.read_mem(addr, size); + let data_hex = hex::encode(&data); + info!( + "[{}] axi_read (addr={addr:#x}, size={size}, data={data_hex})", + get_t() + ); + AxiReadPayload { data } + } + + pub(crate) fn watchdog(&mut self) -> u8 { + let tick = get_t(); + if tick > self.timeout { + error!("[{}] watchdog timeout", get_t()); + WATCHDOG_TIMEOUT + } else { + #[cfg(feature = "trace")] + if self.dump_end != 0 && tick > self.dump_end { + info!("[{tick}] run to dump end, exiting",); + return WATCHDOG_TIMEOUT; + } + + #[cfg(feature = "trace")] + if !self.dump_started && tick >= self.dump_start { + self.start_dump_wave(); + self.dump_started = true; + } + + trace!("[{}] watchdog continue", get_t()); + WATCHDOG_CONTINUE + } + } + + #[cfg(feature = "trace")] + fn start_dump_wave(&mut self) { + dump_wave(&self.wave_path); + } +} diff --git a/rocketemu/nix/mlirbc.nix b/rocketemu/nix/mlirbc.nix new file mode 100644 index 000000000..65a25e503 --- /dev/null +++ b/rocketemu/nix/mlirbc.nix @@ -0,0 +1,23 @@ +{ stdenvNoCC + +, espresso +, circt + +, elaborator +, rocket-config +}: +stdenvNoCC.mkDerivation { + name = "t1-rocketv-elaborated.mlirbc"; + + nativeBuildInputs = [ elaborator espresso circt ]; + + buildCommand = '' + mkdir elaborate + elaborator rocketemu --target-dir elaborate --rocket-config ${rocket-config} + firtool elaborate/*.fir \ + --annotation-file elaborate/*.anno.json \ + --emit-bytecode \ + --parse-only \ + -o $out + ''; +} diff --git a/rocketemu/nix/rtl.nix b/rocketemu/nix/rtl.nix new file mode 100644 index 000000000..053078f82 --- /dev/null +++ b/rocketemu/nix/rtl.nix @@ -0,0 +1,26 @@ +{ stdenvNoCC +, lib + +, circt +, mlirbc +}: + +let + mfcArgs = lib.escapeShellArgs [ + "-O=debug" + "--split-verilog" + "--preserve-values=named" + "--lowering-options=verifLabels,omitVersionComment" + "--strip-debug-info" + ]; +in +stdenvNoCC.mkDerivation { + name = "t1-rocketv-rtl"; + nativeBuildInputs = [ circt ]; + + buildCommand = '' + mkdir -p $out + + firtool ${mlirbc} ${mfcArgs} -o $out + ''; +} diff --git a/rocketemu/nix/verilated-csrc.nix b/rocketemu/nix/verilated-csrc.nix new file mode 100644 index 000000000..f32ade7af --- /dev/null +++ b/rocketemu/nix/verilated-csrc.nix @@ -0,0 +1,78 @@ +{ lib +, fetchgit +, stdenv +, rtl +, verilator +, enable-trace ? true +, zlib +}: + +let + rocket-chip-v-src = fetchgit { + url = "https://github.com/chipsalliance/rocket-chip.git"; + rev = "833385404d9c722bdfad3e453c19a3ac6f40dbf0"; + fetchSubmodules = false; + sparseCheckout = [ + "src/main/resources/vsrc" + ]; + hash = "sha256-CUq9VDwb7ZtclosgOWfDZMOpH+U/yBjL5CNiXZRiB80="; + }; +in +stdenv.mkDerivation { + name = "t1-rocketv-verilated"; + + src = rtl; + + nativeBuildInputs = [ verilator ]; + + propagatedBuildInputs = lib.optionals enable-trace [ zlib ]; + + env.rocketChipVSrc = "${rocket-chip-v-src}/src/main/resources/vsrc/"; + + buildPhase = '' + runHook preBuild + + echo "[nix] running verilator" + # FIXME: fix all the warning and remove -Wno- flag here + verilator \ + -I"$rocketChipVSrc" \ + ${lib.optionalString enable-trace "--trace-fst"} \ + --timing \ + --threads 8 \ + --threads-max-mtasks 8000 \ + -O1 \ + -Wno-WIDTHEXPAND \ + -Wno-LATCH \ + --cc TestBench + + echo "[nix] building verilated C lib" + + # backup srcs + mkdir -p $out/share + cp -r obj_dir $out/share/verilated_src + + rm $out/share/verilated_src/*.dat + + # We can't use -C here because VTestBench.mk is generated with relative path + cd obj_dir + make -j "$NIX_BUILD_CORES" -f VTestBench.mk libVTestBench + + runHook postBuild + ''; + + hardeningDisable = [ "fortify" ]; + + passthru = { + inherit enable-trace rocket-chip-v-src; + }; + + installPhase = '' + runHook preInstall + + mkdir -p $out/include $out/lib + cp *.h $out/include + cp *.a $out/lib + + runHook postInstall + ''; +} diff --git a/rocketemu/offline/Cargo.toml b/rocketemu/offline/Cargo.toml new file mode 100644 index 000000000..2824a161e --- /dev/null +++ b/rocketemu/offline/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "offline" +version = "0.1.0" +edition = "2021" + +[dependencies] +clap = { workspace = true } +tracing = { workspace = true } +tracing-subscriber = { workspace = true } +anyhow = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +num-bigint = { workspace = true } + +libloading = "0.8.1" +xmas-elf = "0.9.1" + +common = { path = "../test_common" } +spike_rs = { path = "../spike_rs" } diff --git a/rocketemu/offline/src/difftest.rs b/rocketemu/offline/src/difftest.rs new file mode 100644 index 000000000..573fbb756 --- /dev/null +++ b/rocketemu/offline/src/difftest.rs @@ -0,0 +1,54 @@ +use common::spike_runner::SpikeRunner; +use std::path::Path; + +use common::rtl_config::RTLConfig; +use common::CommonArgs; + +use crate::dut::Dut; +use crate::json_events::*; + +pub struct Difftest { + runner: SpikeRunner, + dut: Dut, + + #[allow(dead_code)] + config: RTLConfig, +} + +impl Difftest { + pub fn new(args: CommonArgs) -> Self { + let config = RTLConfig { vlen: args.vlen, dlen: args.dlen }; + Self { + runner: SpikeRunner::new(&args, true), + dut: Dut::new(Path::new( + &args.log_file.expect("difftest must be run with a log file"), + )), + config, + } + } + + pub fn diff(&mut self) -> anyhow::Result<()> { + loop { + let se = self.runner.spike_step(); + if se.is_exit() { + return Err(anyhow::anyhow!("exit detected")); + } + if se.is_rd_written() && se.rd_idx != 0 { + let event = self.dut.step()?; + + match event { + JsonEvents::RegWrite { addr, data, cycle } => { + self.runner.cycle = *cycle; + self.runner.check_reg_write( + &RegWriteEvent { addr: *addr, data: *data, cycle: *cycle }, + &se, + )? + } + JsonEvents::SimulationStop { reason, cycle } => { + return Err(anyhow::anyhow!("[{}] simulation stop: {}", *cycle, *reason)); + } + } + } + } + } +} diff --git a/rocketemu/offline/src/dut.rs b/rocketemu/offline/src/dut.rs new file mode 100644 index 000000000..a4cc80821 --- /dev/null +++ b/rocketemu/offline/src/dut.rs @@ -0,0 +1,48 @@ +use anyhow::Context; +use std::io::BufRead; +use std::path::Path; + +use crate::json_events::JsonEvents; + +#[derive(Debug)] +pub struct Dut { + events: Vec, + idx: u32, +} + +impl Dut { + fn read_json(path: &Path) -> anyhow::Result> { + let file = std::fs::File::open(path).unwrap(); + let reader = std::io::BufReader::new(file); + + let mut events = Vec::new(); + + for (i, line) in reader.lines().enumerate() { + let line = line.expect("line read error"); + if line.starts_with("{") { + // ignore illegal lines + let event: JsonEvents = serde_json::from_str(&line) + .with_context(|| format!("parsing {} line {}", path.display(), i + 1))?; + events.push(event); + } + } + + Ok(events) + } + + pub fn new(path: &Path) -> Self { + let events = Self::read_json(path).unwrap(); + let idx = 0; + Self { events, idx } + } + + pub fn step(&mut self) -> anyhow::Result<&JsonEvents> { + let event = match self.events.get(self.idx as usize) { + Some(event) => event, + None => return Err(anyhow::anyhow!("no more events")), + }; + self.idx += 1; + + Ok(event) + } +} diff --git a/rocketemu/offline/src/json_events.rs b/rocketemu/offline/src/json_events.rs new file mode 100644 index 000000000..ed61c23da --- /dev/null +++ b/rocketemu/offline/src/json_events.rs @@ -0,0 +1,39 @@ +use common::spike_runner::SpikeRunner; +use serde::Deserialize; +use spike_rs::spike_event::SpikeEvent; +use tracing::info; + +#[derive(Deserialize, Debug)] +#[serde(tag = "event")] +pub(crate) enum JsonEvents { + RegWrite { addr: u32, data: u32, cycle: u64 }, + SimulationStop { reason: u8, cycle: u64 }, +} + +pub struct RegWriteEvent { + pub addr: u32, + pub data: u32, + pub cycle: u64, +} + +pub(crate) trait JsonEventRunner { + fn check_reg_write(&mut self, reg_write: &RegWriteEvent, se: &SpikeEvent) -> anyhow::Result<()>; +} + +impl JsonEventRunner for SpikeRunner { + fn check_reg_write(&mut self, reg_write: &RegWriteEvent, se: &SpikeEvent) -> anyhow::Result<()> { + let addr = reg_write.addr; + let data = reg_write.data; + let cycle = reg_write.cycle; + + info!("[{cycle}] RegWrite: idx={addr:02x}, data={data:08x}",); + info!( + "[{cycle}] SpikeEvent: idx={:02x}, data={:08x}", + se.rd_idx, se.rd_bits + ); + assert_eq!(addr, se.rd_idx, "addr should be equal to se.rd_idx"); + assert_eq!(data, se.rd_bits, "data should be equal to se.rd_bits"); + + Ok(()) + } +} diff --git a/rocketemu/offline/src/main.rs b/rocketemu/offline/src/main.rs new file mode 100644 index 000000000..0328e2cf3 --- /dev/null +++ b/rocketemu/offline/src/main.rs @@ -0,0 +1,57 @@ +mod difftest; +mod dut; +mod json_events; + +use clap::Parser; +use tracing::info; + +use common::spike_runner::SpikeRunner; +use common::CommonArgs; + +use crate::difftest::Difftest; + +fn run_spike(args: &CommonArgs) -> anyhow::Result<()> { + let mut count: u64 = 0; + + let spike = SpikeRunner::new(args, true); + loop { + count += 1; + if count % 1000000 == 0 { + info!("count = {}", count); + } + match spike.exec() { + Ok(_) => {} + Err(_) => { + info!("total v instrucions count = {}", count); + info!("Simulation quit graceful"); + return Ok(()); + } + }; + } +} + +fn main() -> anyhow::Result<()> { + // parse args + let args = CommonArgs::parse(); + + args.setup_logger()?; + + // if there is no log file, just run spike and quit + if args.log_file.is_none() { + run_spike(&args)?; + return Ok(()); + } + + // if there is a log file, run difftest + let mut diff = Difftest::new(args); + + loop { + match diff.diff() { + Ok(_) => {} + Err(e) => { + info!("Simulation quit/error with {}", e); + return Ok(()); + } + } + } +} diff --git a/rocketemu/spike_rs/Cargo.toml b/rocketemu/spike_rs/Cargo.toml new file mode 100644 index 000000000..411d44f72 --- /dev/null +++ b/rocketemu/spike_rs/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "spike_rs" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +tracing = { workspace = true } +libc = "0.2.155" +xmas-elf = "0.9.1" diff --git a/rocketemu/spike_rs/build.rs b/rocketemu/spike_rs/build.rs new file mode 100644 index 000000000..9399fdaf0 --- /dev/null +++ b/rocketemu/spike_rs/build.rs @@ -0,0 +1,18 @@ +use std::env; + +fn main() { + println!("cargo::rustc-link-search=native={}", env::var("SPIKE_LIB_DIR").expect("SPIKE_LIB_DIR should be set")); + println!("cargo::rustc-link-lib=static=riscv"); + println!("cargo::rustc-link-lib=static=softfloat"); + println!("cargo::rustc-link-lib=static=disasm"); + println!("cargo::rustc-link-lib=static=fesvr"); + println!("cargo::rustc-link-lib=static=fdt"); + + println!("cargo::rustc-link-search=native={}", env::var("SPIKE_INTERFACES_LIB_DIR").expect("SPIKE_INTERFACES_LIB_DIR should be set")); + println!("cargo::rustc-link-lib=static=spike_interfaces"); + + println!("cargo::rerun-if-env-changed=SPIKE_LIB_DIR"); + println!("cargo::rerun-if-env-changed=SPIKE_INTERFACES_LIB_DIR"); + + println!("cargo::rustc-link-lib=stdc++"); +} diff --git a/rocketemu/spike_rs/src/lib.rs b/rocketemu/spike_rs/src/lib.rs new file mode 100644 index 000000000..3744d4a72 --- /dev/null +++ b/rocketemu/spike_rs/src/lib.rs @@ -0,0 +1,289 @@ +pub mod spike_event; +pub mod util; + +use libc::c_char; +use std::ffi::{CStr, CString}; +use tracing::trace; + +pub fn clip(binary: u32, a: i32, b: i32) -> u32 { + assert!(a <= b, "a should be less than or equal to b"); + let nbits = b - a + 1; + let mask = if nbits >= 32 { + u32::MAX + } else { + (1 << nbits) - 1 + }; + (binary >> a) & mask +} + +pub struct Spike { + spike: *mut (), + pub mem: Vec, + pub size: usize, +} + +unsafe impl Send for Spike {} + +extern "C" fn default_addr_to_mem(target: *mut (), addr: u64) -> *mut u8 { + let spike = target as *mut Spike; + let addr = addr as usize; + unsafe { + let spike: &mut Spike = &mut *spike; + let ptr = spike.mem.as_mut_ptr().offset(addr as isize); + ptr + } +} + +type FfiCallback = extern "C" fn(*mut (), u64) -> *mut u8; + +impl Spike { + // we need to have a boxed SpikeCObject, since its pointer will be passed to C to perform FFI call + pub fn new(arch: &str, set: &str, lvl: &str, lane_number: usize, mem_size: usize) -> Box { + let arch = CString::new(arch).unwrap(); + let set = CString::new(set).unwrap(); + let lvl = CString::new(lvl).unwrap(); + let spike = unsafe { spike_new(arch.as_ptr(), set.as_ptr(), lvl.as_ptr(), lane_number) }; + let mut self_: Box = Box::new(Spike { spike, mem: vec![0; mem_size], size: mem_size }); + + // TODO: support customized ffi + let ffi_target: *mut Spike = &mut *self_; + unsafe { + spike_register_callback(ffi_target as *mut (), default_addr_to_mem); + } + + self_ + } + + pub fn get_proc(&self) -> Processor { + let processor = unsafe { spike_get_proc(self.spike) }; + Processor { processor } + } + + pub fn load_bytes_to_mem( + &mut self, + addr: usize, + len: usize, + bytes: Vec, + ) -> anyhow::Result<()> { + trace!("ld: addr: 0x{:x}, len: 0x{:x}", addr, len); + assert!(addr + len <= self.size); + + let dst = &mut self.mem[addr..addr + len]; + for (i, byte) in bytes.iter().enumerate() { + dst[i] = *byte; + } + + Ok(()) + } + + pub fn mem_byte_on_addr(&self, addr: usize) -> anyhow::Result { + Ok(self.mem[addr]) + } +} + +impl Drop for Spike { + fn drop(&mut self) { + unsafe { spike_destruct(self.spike) } + } +} + +pub struct Processor { + processor: *mut (), +} + +impl Processor { + pub fn disassemble(&self) -> String { + let bytes = unsafe { proc_disassemble(self.processor) }; + let c_str = unsafe { CStr::from_ptr(bytes as *mut c_char) }; + format!("{}", c_str.to_string_lossy()) + } + + pub fn reset(&self) { + unsafe { proc_reset(self.processor) } + } + + pub fn get_state(&self) -> State { + let state = unsafe { proc_get_state(self.processor) }; + State { state } + } + + pub fn func(&self) -> u64 { + unsafe { proc_func(self.processor) } + } + + pub fn get_insn(&self) -> u32 { + unsafe { proc_get_insn(self.processor) as u32 } + } + + pub fn get_vreg_data(&self, idx: u32, offset: u32) -> u8 { + unsafe { proc_get_vreg_data(self.processor, idx, offset) } + } + + pub fn get_rs1(&self) -> u32 { + unsafe { proc_get_rs1(self.processor) } + } + + pub fn get_rs2(&self) -> u32 { + unsafe { proc_get_rs2(self.processor) } + } + + pub fn get_rd(&self) -> u32 { + unsafe { proc_get_rd(self.processor) } + } + + // vu + pub fn vu_get_vtype(&self) -> u32 { + unsafe { proc_vu_get_vtype(self.processor) as u32 } + } + + pub fn vu_get_vxrm(&self) -> u32 { + unsafe { proc_vu_get_vxrm(self.processor) } + } + + pub fn vu_get_vnf(&self) -> u32 { + unsafe { proc_vu_get_vnf(self.processor) } + } + + pub fn vu_get_vill(&self) -> bool { + unsafe { proc_vu_get_vill(self.processor) } + } + + pub fn vu_get_vxsat(&self) -> bool { + unsafe { proc_vu_get_vxsat(self.processor) } + } + + pub fn vu_get_vl(&self) -> u32 { + unsafe { proc_vu_get_vl(self.processor) } + } + + pub fn vu_get_vstart(&self) -> u16 { + unsafe { proc_vu_get_vstart(self.processor) } + } +} + +impl Drop for Processor { + fn drop(&mut self) { + unsafe { proc_destruct(self.processor) } + } +} + +pub struct State { + state: *mut (), +} + +impl State { + pub fn set_pc(&self, pc: u64) { + unsafe { state_set_pc(self.state, pc) } + } + + pub fn get_pc(&self) -> u64 { + unsafe { state_get_pc(self.state) } + } + + pub fn handle_pc(&self, pc: u64) -> anyhow::Result<()> { + match unsafe { state_handle_pc(self.state, pc) } { + 0 => Ok(()), + _ => Err(anyhow::anyhow!("Error handling pc")), + } + } + + pub fn get_reg(&self, idx: u32, is_fp: bool) -> u32 { + unsafe { state_get_reg(self.state, idx, is_fp) } + } + + pub fn get_reg_write_size(&self) -> u32 { + unsafe { state_get_reg_write_size(self.state) } + } + + pub fn get_reg_write_index(&self, index: u32) -> u32 { + unsafe { state_get_reg_write_index(self.state, index) } + } + + pub fn get_mem_write_size(&self) -> u32 { + unsafe { state_get_mem_write_size(self.state) } + } + + pub fn get_mem_write(&self, index: u32) -> (u32, u64, u8) { + let addr = unsafe { state_get_mem_write_addr(self.state, index) }; + let value = unsafe { state_get_mem_write_value(self.state, index) }; + let size_by_byte = unsafe { state_get_mem_write_size_by_byte(self.state, index) }; + (addr, value, size_by_byte) + } + + pub fn get_mem_read_size(&self) -> u32 { + unsafe { state_get_mem_read_size(self.state) } + } + + pub fn get_mem_read(&self, index: u32) -> (u32, u8) { + let addr = unsafe { state_get_mem_read_addr(self.state, index) }; + let size_by_byte = unsafe { state_get_mem_read_size_by_byte(self.state, index) }; + (addr, size_by_byte) + } + + pub fn set_mcycle(&self, mcycle: usize) { + unsafe { state_set_mcycle(self.state, mcycle) } + } + + pub fn clear(&self) { + unsafe { state_clear(self.state) } + } + + pub fn exit(&self) -> u64 { + unsafe { state_exit(self.state) } + } +} + +impl Drop for State { + fn drop(&mut self) { + unsafe { state_destruct(self.state) } + } +} + +#[link(name = "spike_interfaces")] +extern "C" { + pub fn spike_register_callback(target: *mut (), callback: FfiCallback); + fn spike_new( + arch: *const c_char, + set: *const c_char, + lvl: *const c_char, + lane_number: usize, + ) -> *mut (); + fn spike_get_proc(spike: *mut ()) -> *mut (); + fn spike_destruct(spike: *mut ()); + fn proc_disassemble(proc: *mut ()) -> *mut c_char; + fn proc_reset(proc: *mut ()); + fn proc_get_state(proc: *mut ()) -> *mut (); + fn proc_func(proc: *mut ()) -> u64; + fn proc_get_insn(proc: *mut ()) -> u64; + fn proc_get_vreg_data(proc: *mut (), vreg_idx: u32, vreg_offset: u32) -> u8; + fn proc_get_rs1(proc: *mut ()) -> u32; + fn proc_get_rs2(proc: *mut ()) -> u32; + fn proc_get_rd(proc: *mut ()) -> u32; + + fn proc_vu_get_vtype(proc: *mut ()) -> u64; + fn proc_vu_get_vxrm(proc: *mut ()) -> u32; + fn proc_vu_get_vnf(proc: *mut ()) -> u32; + fn proc_vu_get_vill(proc: *mut ()) -> bool; + fn proc_vu_get_vxsat(proc: *mut ()) -> bool; + fn proc_vu_get_vl(proc: *mut ()) -> u32; + fn proc_vu_get_vstart(proc: *mut ()) -> u16; + + fn proc_destruct(proc: *mut ()); + fn state_set_pc(state: *mut (), pc: u64); + fn state_get_pc(state: *mut ()) -> u64; + fn state_get_reg(state: *mut (), index: u32, is_fp: bool) -> u32; + fn state_get_reg_write_size(state: *mut ()) -> u32; + fn state_get_reg_write_index(state: *mut (), index: u32) -> u32; + fn state_get_mem_write_size(state: *mut ()) -> u32; + fn state_get_mem_write_addr(state: *mut (), index: u32) -> u32; + fn state_get_mem_write_value(state: *mut (), index: u32) -> u64; + fn state_get_mem_write_size_by_byte(state: *mut (), index: u32) -> u8; + fn state_get_mem_read_size(state: *mut ()) -> u32; + fn state_get_mem_read_addr(state: *mut (), index: u32) -> u32; + fn state_get_mem_read_size_by_byte(state: *mut (), index: u32) -> u8; + fn state_handle_pc(state: *mut (), pc: u64) -> u64; + fn state_set_mcycle(state: *mut (), mcycle: usize); + fn state_clear(state: *mut ()); + fn state_destruct(state: *mut ()); + fn state_exit(state: *mut ()) -> u64; +} diff --git a/rocketemu/spike_rs/src/spike_event.rs b/rocketemu/spike_rs/src/spike_event.rs new file mode 100644 index 000000000..7f6a2f030 --- /dev/null +++ b/rocketemu/spike_rs/src/spike_event.rs @@ -0,0 +1,352 @@ +use std::collections::HashMap; +use tracing::trace; +use Default; + +use crate::clip; +use crate::Spike; + +#[derive(Debug, Clone)] +pub struct SingleMemWrite { + pub val: u8, + pub executed: bool, // set to true when rtl execute this mem access +} + +#[derive(Debug, Clone)] +pub struct SingleMemRead { + pub val: u8, + pub executed: bool, // set to true when rtl execute this mem access +} + +#[derive(Debug, Clone)] +pub struct MemWriteRecord { + pub writes: Vec, + pub num_completed_writes: usize, +} + +#[derive(Debug, Clone)] +pub struct MemReadRecord { + pub reads: Vec, + pub num_completed_reads: usize, +} + +#[derive(Debug, Clone)] +pub struct SingleVrfWrite { + pub byte: u8, + pub executed: bool, // set to true when rtl execute this mem access +} + +#[derive(Default, Debug, Clone)] +pub struct VdWriteRecord { + vd_bytes: Vec, +} + +#[derive(Default, Debug, Clone)] +pub struct MemAccessRecord { + pub all_writes: HashMap, + pub all_reads: HashMap, +} + +#[derive(Default, Debug, Clone)] +pub struct VrfAccessRecord { + pub all_writes: HashMap, + pub unretired_writes: Option, + pub retired_writes: u32, +} + +pub const LSU_IDX_DEFAULT: u8 = 0xff; +pub const ISSUE_IDX_DEFAULT: u8 = 0xff; + +#[derive(Default, Debug, Clone)] +pub struct SpikeEvent { + pub do_log_vrf: bool, + + // index + pub lsu_idx: u8, + pub issue_idx: u8, + + // instruction + pub disasm: String, + pub pc: u64, + pub inst_bits: u32, + + // scalar to vector interface(used for driver) + pub rs1: u32, + pub rs2: u32, + pub rs1_bits: u32, + pub rs2_bits: u32, + pub rd_idx: u32, + + // vtype + pub vtype: u32, + pub vxrm: u32, + pub vnf: u32, + + // other CSR + pub vill: bool, + pub vxsat: bool, + pub vl: u32, + pub vstart: u16, + + // rd + pub rd_bits: u32, + + // mutable states + pub is_rd_written: bool, + pub vd_write_record: VdWriteRecord, + pub mem_access_record: MemAccessRecord, + pub vrf_access_record: VrfAccessRecord, + + pub exit: bool, +} + +impl SpikeEvent { + pub fn new(spike: &Spike, do_log_vrf: bool) -> Self { + let proc = spike.get_proc(); + let state = proc.get_state(); + let inst_bits = proc.get_insn(); + + let opcode = clip(inst_bits, 0, 6); + let width = clip(inst_bits, 12, 14); + + let is_rs_fp = opcode == 0b1010111 && width == 0b101/* OPFVF */; + // early return vsetvl scalar instruction + + // rs1, rs2 + let (rs1, rs2) = (proc.get_rs1(), proc.get_rs2()); + + SpikeEvent { + do_log_vrf, + + lsu_idx: LSU_IDX_DEFAULT, + issue_idx: ISSUE_IDX_DEFAULT, + + disasm: spike.get_proc().disassemble(), + pc: proc.get_state().get_pc(), + inst_bits, + + rs1, + rs2, + rs1_bits: state.get_reg(rs1, is_rs_fp), + rs2_bits: state.get_reg(rs2, is_rs_fp), + rd_idx: proc.get_rd(), + + vtype: proc.vu_get_vtype(), + vxrm: proc.vu_get_vxrm(), + vnf: proc.vu_get_vnf(), + + vill: proc.vu_get_vill(), + vxsat: proc.vu_get_vxsat(), + vl: proc.vu_get_vl(), + vstart: proc.vu_get_vstart(), + + rd_bits: Default::default(), + + is_rd_written: false, + vd_write_record: Default::default(), + mem_access_record: Default::default(), + vrf_access_record: Default::default(), + + exit: false, + } + } + + pub fn opcode(&self) -> u32 { + clip(self.inst_bits, 0, 6) + } + + pub fn width(&self) -> u32 { + clip(self.inst_bits, 12, 14) + } + + pub fn rs1(&self) -> u32 { + clip(self.inst_bits, 15, 19) + } + + pub fn csr(&self) -> u32 { + clip(self.inst_bits, 20, 31) + } + + pub fn funct6(&self) -> u32 { + clip(self.inst_bits, 26, 31) + } + + pub fn mop(&self) -> u32 { + clip(self.inst_bits, 26, 27) + } + + pub fn lumop(&self) -> u32 { + clip(self.inst_bits, 20, 24) + } + + pub fn vm(&self) -> bool { + clip(self.inst_bits, 25, 25) != 0 + } + + // check whether the instruction is a vector load + pub fn is_vload(&self) -> bool { + self.opcode() == 0b0000111 && self.width().wrapping_sub(1) & 0b100 != 0 + } + + // check whether the instruction is a vector store + pub fn is_vstore(&self) -> bool { + self.opcode() == 0b0100111 && self.width().wrapping_sub(1) & 0b100 != 0 + } + + pub fn is_v(&self) -> bool { + (self.opcode() == 0b1010111 || self.is_vload() || self.is_vstore()) && !self.is_vsetvl() + } + + pub fn is_vsetvl(&self) -> bool { + self.opcode() == 0b1010111 && self.width() == 0b111 + } + + pub fn is_scalar(&self) -> bool { + !self.is_v() + } + + // check whether the instruction is a scalar load + pub fn is_load(&self) -> bool { + self.opcode() == 0b0000011 || self.is_cl() + } + + // check whether the instruction is a scalar store + pub fn is_store(&self) -> bool { + self.opcode() == 0b0100011 || self.is_cw() + } + + pub fn is_rd_written(&self) -> bool { + self.is_rd_written + } + + pub fn is_whole(&self) -> bool { + self.mop() == 0 && self.lumop() == 8 + } + + pub fn is_widening(&self) -> bool { + self.opcode() == 0b1010111 && (self.funct6() >> 4) == 0b11 + } + + pub fn is_mask_vd(&self) -> bool { + self.opcode() == 0b1010111 && (self.funct6() >> 4) == 0b11 + } + + pub fn is_exit(&self) -> bool { + self.exit + } + + pub fn is_rd_fp(&self) -> bool { + (self.opcode() == 0b1010111) + && (self.rs1 == 0) + && (self.funct6() == 0b010000) + && self.vm() + && (self.width() == 0b001) + } + + pub fn c_op(&self) -> u32 { + clip(self.inst_bits, 0, 1) + } + + pub fn c_func3(&self) -> u32 { + clip(self.inst_bits, 13, 15) + } + + pub fn is_cl(&self) -> bool { + ( self.c_op() == 0b00 && self.c_func3() & 0b100 == 0 ) || /* c.lw */ + ( self.c_op() == 0b10 && self.c_func3() & 0b100 == 0 ) /* c.lwsp */ + } + + pub fn is_cw(&self) -> bool { + ( self.c_op() == 0b00 && self.c_func3() & 0b100 != 0 ) || /* c.sw */ + ( self.c_op() == 0b10 && self.c_func3() & 0b100 != 0 ) /* c.swsp */ + } + + pub fn vlmul(&self) -> u32 { + clip(self.vtype, 0, 2) + } + + pub fn vma(&self) -> bool { + clip(self.vtype, 7, 7) != 0 + } + + pub fn vta(&self) -> bool { + clip(self.vtype, 6, 6) != 0 + } + + pub fn vsew(&self) -> u32 { + clip(self.vtype, 3, 5) + } + + pub fn vcsr(&self) -> u32 { + self.vxsat as u32 | self.vxrm << 1 + } + + pub fn describe_insn(&self) -> String { + format!( + "pc={:#x}, disasm='{}', bits={:#x}", + self.pc as u32, self.disasm, self.inst_bits + ) + } + + pub fn log_reg_write(&mut self, spike: &Spike) -> anyhow::Result<()> { + let proc = spike.get_proc(); + let state = proc.get_state(); + // in spike, log_reg_write is arrange: + // xx0000 <- x + // xx0001 <- f + // xx0010 <- vreg + // xx0011 <- vec + // xx0100 <- csr + let reg_write_size = state.get_reg_write_size(); + // TODO: refactor it. + (0..reg_write_size).for_each(|idx| match state.get_reg_write_index(idx) & 0xf { + 0b0000 => { + // scalar rf + let data = state.get_reg(self.rd_idx, false); + self.is_rd_written = true; + self.rd_bits = data; + trace!("ScalarRFChange: idx={:02x}, data={:08x}", self.rd_idx, self.rd_bits); + } + 0b0001 => { + let data = state.get_reg(self.rd_idx, true); + self.is_rd_written = true; + self.rd_bits = data; + trace!("FloatRFChange: idx={:02x}, data={:08x}", self.rd_idx, self.rd_bits); + } + _ => trace!( + "UnknownRegChange, idx={:02x}, spike detect unknown reg change", + state.get_reg_write_index(idx) + ), + }); + + Ok(()) + } + + pub fn log_mem_write(&mut self, spike: &Spike) -> anyhow::Result<()> { + let proc = spike.get_proc(); + let state = proc.get_state(); + + let mem_write_size = state.get_mem_write_size(); + (0..mem_write_size).for_each(|i| { + let (addr, value, size) = state.get_mem_write(i); + (0..size).for_each(|offset| { + self + .mem_access_record + .all_writes + .entry(addr + offset as u32) + .or_insert(MemWriteRecord { writes: vec![], num_completed_writes: 0 }) + .writes + .push(SingleMemWrite { + val: (value >> (offset * 8)) as u8, + executed: false, + }); + }); + trace!("SpikeMemWrite: addr={addr:x}, value={value:x}, size={size}"); + if addr == 0x4000_0000 && value == 0xdead_beef && size == 4 { + self.exit = true; + return; + } + }); + + Ok(()) + } +} diff --git a/rocketemu/spike_rs/src/util.rs b/rocketemu/spike_rs/src/util.rs new file mode 100644 index 000000000..6ded0eec5 --- /dev/null +++ b/rocketemu/spike_rs/src/util.rs @@ -0,0 +1,65 @@ +use crate::Spike; +use std::fs::File; +use std::io::Read; +use std::path::Path; +use xmas_elf::program::{ProgramHeader, Type}; +use xmas_elf::{header, ElfFile}; + +pub fn load_elf(spike: &mut Spike, fname: &Path) -> anyhow::Result { + let mut file = File::open(fname).unwrap(); + let mut buffer = Vec::new(); + file.read_to_end(&mut buffer).unwrap(); + + let elf_file = ElfFile::new(&buffer).unwrap(); + + let header = elf_file.header; + assert_eq!(header.pt2.machine().as_machine(), header::Machine::RISC_V); + assert_eq!(header.pt1.class(), header::Class::ThirtyTwo); + + for ph in elf_file.program_iter() { + if let ProgramHeader::Ph32(ph) = ph { + if ph.get_type() == Ok(Type::Load) { + let offset = ph.offset as usize; + let size = ph.file_size as usize; + let addr = ph.virtual_addr as usize; + + let slice = &buffer[offset..offset + size]; + spike.load_bytes_to_mem(addr, size, slice.to_vec()).unwrap(); + } + } + } + + Ok(header.pt2.entry_point()) +} + +// todo: unify load_elf and load_elf_to_buffer +pub fn load_elf_to_buffer(mem: &mut [u8], fname: &Path) -> anyhow::Result { + let mut file = File::open(fname).unwrap(); + let mut buffer = Vec::new(); + file.read_to_end(&mut buffer).unwrap(); + + let elf_file = ElfFile::new(&buffer).unwrap(); + + let header = elf_file.header; + assert_eq!(header.pt2.machine().as_machine(), header::Machine::RISC_V); + assert_eq!(header.pt1.class(), header::Class::ThirtyTwo); + + for ph in elf_file.program_iter() { + if let ProgramHeader::Ph32(ph) = ph { + if ph.get_type() == Ok(Type::Load) { + let offset = ph.offset as usize; + let size = ph.file_size as usize; + let addr = ph.virtual_addr as usize; + + let slice = &buffer[offset..offset + size]; + + let dst: &mut _ = &mut mem[addr..addr + size]; + for (i, byte) in slice.iter().enumerate() { + dst[i] = *byte; + } + } + } + } + + Ok(header.pt2.entry_point()) +} diff --git a/rocketemu/src/AXI4SlaveAgent.scala b/rocketemu/src/AXI4SlaveAgent.scala new file mode 100644 index 000000000..8c5937476 --- /dev/null +++ b/rocketemu/src/AXI4SlaveAgent.scala @@ -0,0 +1,203 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022-2024 Jiuyang Liu + +package org.chipsalliance.t1.rocketv.dpi + +// TODO: upstream to AMBA as VIP +import chisel3._ +import chisel3.util.circt.dpi.{RawClockedVoidFunctionCall, RawUnclockedNonVoidFunctionCall} +import chisel3.util.{isPow2, log2Ceil} +import org.chipsalliance.amba.axi4.bundle.{ARChannel, ARFlowControl, AWChannel, AWFlowControl, AXI4BundleParameter, AXI4ROIrrevocableVerilog, AXI4RWIrrevocableVerilog, AXI4WOIrrevocableVerilog, BChannel, BFlowControl, RChannel, RFlowControl, WChannel, WFlowControl} + +case class AXI4SlaveAgentParameter(name: String, axiParameter: AXI4BundleParameter, outstanding: Int, readPayloadSize: Int, writePayloadSize: Int) + +class AXI4SlaveAgentInterface(parameter: AXI4SlaveAgentParameter) extends Bundle { + val clock: Clock = Input(Clock()) + val reset: Reset = Input(Reset()) + val channelId: UInt = Input(Const(UInt(64.W))) + // don't issue read DPI + val gateRead: Bool = Input(Bool()) + // don't issue write DPI + val gateWrite: Bool = Input(Bool()) + val channel = Flipped( + org.chipsalliance.amba.axi4.bundle.verilog.irrevocable(parameter.axiParameter) + ) +} + +class WritePayload(length: Int, dataWidth: Int) extends Bundle { + val data = Vec(length, UInt(dataWidth.W)) + // For dataWidth <= 8, align strb to u8 for a simple C-API + val strb = Vec(length, UInt(math.max(8, dataWidth / 8).W)) +} + +class ReadPayload(length: Int,dataWidth: Int) extends Bundle { + val data = Vec(length, UInt(dataWidth.W)) +} + +// consume transaction from DPI, drive RTL signal +class AXI4SlaveAgent(parameter: AXI4SlaveAgentParameter) + extends FixedIORawModule[AXI4SlaveAgentInterface](new AXI4SlaveAgentInterface(parameter)) { + dontTouch(io) + io.channel match { + case channel: AXI4RWIrrevocableVerilog => + new WriteManager(channel) + new ReadManager(channel) + case channel: AXI4ROIrrevocableVerilog => + new ReadManager(channel) + case channel: AXI4WOIrrevocableVerilog => + new WriteManager(channel) + } + + private class WriteManager( + channel: AWChannel with AWFlowControl with WChannel with WFlowControl with BChannel with BFlowControl) { + withClockAndReset(io.clock, io.reset) { + /** There is an aw in the register. */ + val awIssued = RegInit(false.B) + /** There is a w in the register. */ + val last = RegInit(false.B) + + /** memory to store the write payload + * @todo limit the payload size based on the RTL configuration. + */ + val writePayload = RegInit(0.U.asTypeOf(new WritePayload(parameter.writePayloadSize, parameter.axiParameter.dataWidth))) + /** AWID, latch at AW fire, used at B fire. */ + val awid = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWID))) + val awaddr = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWADDR))) + val awlen = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWLEN))) + val awsize = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWSIZE))) + val awburst = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWBURST))) + val awlock = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWLOCK))) + val awcache = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWCACHE))) + val awprot = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWPROT))) + val awqos = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWQOS))) + val awregion = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWREGION))) + val awuser = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWUSER))) + + /** index the payload, used to write [[writePayload]] */ + val writeIdx = RegInit(0.U.asTypeOf(UInt(8.W))) + val bFire = channel.BREADY && channel.BVALID + val awFire = channel.AWREADY && channel.AWVALID + val wLastFire = channel.WVALID && channel.WREADY && channel.WLAST + val awExist = channel.AWVALID || awIssued + val wExist = channel.WVALID && channel.WLAST || last + + // AW + channel.AWREADY := !awIssued || (wExist && channel.BREADY) + when(channel.AWREADY && channel.AWVALID) { + awid := channel.AWID + awaddr := channel.AWADDR + awlen := channel.AWLEN + awsize := channel.AWSIZE + awburst := channel.AWBURST + awlock := channel.AWLOCK + awcache := channel.AWCACHE + awprot := channel.AWPROT + awqos := channel.AWQOS + awregion := channel.AWREGION + awuser := channel.AWUSER + } + when(awFire ^ bFire) { + awIssued := awFire + } + + // W + val writePayloadUpdate = WireDefault(writePayload) + channel.WREADY := !last || (awExist && channel.BREADY) + when(channel.WVALID && channel.WREADY) { + writePayload.data(writeIdx) := channel.WDATA + writePayloadUpdate.data(writeIdx) := channel.WDATA + writePayload.strb(writeIdx) := channel.WSTRB.pad(writePayload.strb.getWidth) + writePayloadUpdate.strb(writeIdx) := channel.WSTRB.pad(writePayload.strb.getWidth) + writeIdx := writeIdx + 1.U + when(channel.WLAST) { + writeIdx := 0.U + } + } + when(wLastFire ^ bFire) { + last := wLastFire + } + + // B + channel.BVALID := awExist && wExist + channel.BID := Mux(awIssued, awid, channel.AWID) + channel.BRESP := 0.U(2.W) // OK + channel.BUSER := Mux(awIssued, awuser, channel.AWUSER) + when(channel.BVALID && channel.BREADY) { + RawClockedVoidFunctionCall(s"axi_write_${parameter.name}")( + io.clock, + when.cond && !io.gateWrite, + io.channelId, + // handle AW and W at same beat. + Mux(awIssued, awid.asTypeOf(UInt(64.W)), channel.AWID), + Mux(awIssued, awaddr.asTypeOf(UInt(64.W)), channel.AWADDR), + Mux(awIssued, awlen.asTypeOf(UInt(64.W)), channel.AWLEN), + Mux(awIssued, awsize.asTypeOf(UInt(64.W)), channel.AWSIZE), + Mux(awIssued, awburst.asTypeOf(UInt(64.W)), channel.AWBURST), + Mux(awIssued, awlock.asTypeOf(UInt(64.W)), channel.AWLOCK), + Mux(awIssued, awcache.asTypeOf(UInt(64.W)), channel.AWCACHE), + Mux(awIssued, awprot.asTypeOf(UInt(64.W)), channel.AWPROT), + Mux(awIssued, awqos.asTypeOf(UInt(64.W)), channel.AWQOS), + Mux(awIssued, awregion.asTypeOf(UInt(64.W)), channel.AWREGION), + writePayloadUpdate + ) + } + } + } + + private class ReadManager(channel: ARChannel with ARFlowControl with RChannel with RFlowControl) { + withClockAndReset(io.clock, io.reset) { + class CAMValue extends Bundle { + val arid = UInt(16.W) + val arlen = UInt(8.W) + val readPayload = new ReadPayload(parameter.readPayloadSize, parameter.axiParameter.dataWidth) + val readPayloadIndex = UInt(8.W) + val valid = Bool() + } + /** CAM to maintain order of read requests. This is maintained as FIFO. */ + val cam: Vec[CAMValue] = RegInit(0.U.asTypeOf(Vec(parameter.outstanding, new CAMValue))) + require(isPow2(parameter.outstanding), "Need to handle pointers") + val arPtr = RegInit(0.U.asTypeOf(UInt(log2Ceil(parameter.outstanding).W))) + val rPtr = RegInit(0.U.asTypeOf(UInt(log2Ceil(parameter.outstanding).W))) + + // AR + channel.ARREADY := !cam(arPtr).valid + when(channel.ARREADY && channel.ARVALID) { + cam(arPtr).arid := channel.ARID + cam(arPtr).arlen := channel.ARLEN + cam(arPtr).readPayload := RawUnclockedNonVoidFunctionCall(s"axi_read_${parameter.name}", new ReadPayload(parameter.readPayloadSize, parameter.axiParameter.dataWidth))( + when.cond && !io.gateRead, + io.channelId, + channel.ARID.asTypeOf(UInt(64.W)), + channel.ARADDR.asTypeOf(UInt(64.W)), + channel.ARLEN.asTypeOf(UInt(64.W)), + channel.ARSIZE.asTypeOf(UInt(64.W)), + channel.ARBURST.asTypeOf(UInt(64.W)), + channel.ARLOCK.asTypeOf(UInt(64.W)), + channel.ARCACHE.asTypeOf(UInt(64.W)), + channel.ARPROT.asTypeOf(UInt(64.W)), + channel.ARQOS.asTypeOf(UInt(64.W)), + channel.ARREGION.asTypeOf(UInt(64.W)) + ) + cam(arPtr).readPayloadIndex := 0.U + cam(arPtr).valid := true.B + arPtr := arPtr + 1.U + } + + // R + channel.RVALID := cam(rPtr).valid + channel.RID := cam(rPtr).arid + channel.RDATA := cam(rPtr).readPayload.data(cam(rPtr).readPayloadIndex) + channel.RRESP := 0.U // OK + channel.RLAST := (cam(rPtr).arlen === cam(rPtr).readPayloadIndex) && cam(rPtr).valid + channel.RUSER := DontCare + when(channel.RREADY && channel.RVALID) { + // increase index + cam(rPtr).readPayloadIndex := cam(rPtr).readPayloadIndex + 1.U + when(channel.RLAST) { + cam(rPtr).valid := false.B + rPtr := rPtr + 1.U + } + } + } + } +} diff --git a/rocketemu/src/TestBench.scala b/rocketemu/src/TestBench.scala new file mode 100644 index 000000000..035012337 --- /dev/null +++ b/rocketemu/src/TestBench.scala @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 Jiuyang Liu + +package org.chipsalliance.t1.rocketv + +import chisel3._ +import chisel3.experimental.{ExtModule, SerializableModuleGenerator} +import chisel3.experimental.dataview.DataViewable +import chisel3.probe.{Probe, define} +import chisel3.util.{log2Ceil, HasExtModuleInline, PopCount, UIntToOH, Valid} +import chisel3.util.circt.dpi.RawUnclockedNonVoidFunctionCall +import org.chipsalliance.amba.axi4.bundle._ +import org.chipsalliance.t1.rocketv.dpi._ +import org.chipsalliance.rocketv.{RocketTile, RocketTileParameter} + +class TestBench(generator: SerializableModuleGenerator[RocketTile, RocketTileParameter]) + extends RawModule + with ImplicitClock + with ImplicitReset { + val clockGen = Module(new ExtModule with HasExtModuleInline { + override def desiredName = "ClockGen" + setInline( + s"$desiredName.sv", + s"""module $desiredName(output reg clock, output reg reset); + | export "DPI-C" function dump_wave; + | function dump_wave(input string file); + | $$dumpfile(file); + | $$dumpvars(0); + | endfunction; + | + | import "DPI-C" function void cosim_init(); + | initial begin + | cosim_init(); + | clock = 1'b0; + | reset = 1'b1; + | end + | initial #(101) reset = 1'b0; + | always #10 clock = ~clock; + |endmodule + |""".stripMargin + ) + val clock = IO(Output(Bool())) + val reset = IO(Output(Bool())) + }) + + val clock: Clock = clockGen.clock.asClock + val reset: Bool = clockGen.reset + + override protected def implicitClock: Clock = clockGen.clock.asClock + override protected def implicitReset: Reset = clockGen.reset + + val simulationTime: UInt = withClockAndReset(clock, reset)(RegInit(0.U(64.W))) + simulationTime := simulationTime + 1.U + + withClockAndReset(clock, reset) { + val watchdog = RawUnclockedNonVoidFunctionCall("cosim_watchdog", UInt(8.W))(simulationTime(9, 0) === 0.U) + when(watchdog =/= 0.U) { + stop(cf"""{"event":"SimulationStop","reason": ${watchdog},"cycle":${simulationTime}}\n""") + } + } + + val dut: RocketTile = withClockAndReset(clock, reset)(Module(generator.module())) + dut.io.clock := clockGen.clock.asClock + dut.io.reset := clockGen.reset + dut.io.hartid := 0.U + dut.io.debug := 0.U + dut.io.mtip := 0.U + dut.io.meip := 0.U + dut.io.msip := 0.U + dut.io.buserror := 0.U + + // get resetVector from simulator + dut.io.resetVector := RawUnclockedNonVoidFunctionCall("get_resetvector", Const(UInt(64.W)))(simulationTime === 0.U) + + // output probes + val rocketProbe = probe.read(dut.io.rocketProbe) + when(rocketProbe.rfWen && rocketProbe.rfWaddr =/= 0.U)(printf(cf"""{"event":"RegWrite","addr":${rocketProbe.rfWaddr},"data":${rocketProbe.rfWdata},"cycle":${simulationTime}}\n""")) + + // Memory Drivers + val instFetchAXI = dut.io.instructionFetchAXI.viewAs[AXI4ROIrrevocableVerilog] + val instFetchAgent = Module( + new AXI4SlaveAgent( + AXI4SlaveAgentParameter( + name = "instructionFetchAXI", + axiParameter = instFetchAXI.parameter, + outstanding = 4, + readPayloadSize = 1, + writePayloadSize = 1 + ) + ).suggestName("axi4_channel0_instructionFetchAXI") + ) + instFetchAgent.io.channel match { + case io: AXI4ROIrrevocableVerilog => io <> instFetchAXI + } + instFetchAgent.io.clock := clock + instFetchAgent.io.reset := reset + instFetchAgent.io.channelId := 0.U + instFetchAgent.io.gateRead := false.B + instFetchAgent.io.gateWrite := false.B + + val loadStoreAXI = dut.io.loadStoreAXI.viewAs[AXI4RWIrrevocableVerilog] + val loadStoreAgent = Module( + new AXI4SlaveAgent( + AXI4SlaveAgentParameter( + name = "loadStoreAXI", + axiParameter = loadStoreAXI.parameter, + outstanding = 4, + readPayloadSize = 8, // todo: align with parameter in the future + writePayloadSize = 8 + ) + ).suggestName("axi4_channel1_loadStoreAXI") + ) + loadStoreAgent.io.channel match { + case io: AXI4RWIrrevocableVerilog => io <> loadStoreAXI + } + loadStoreAgent.io.clock := clock + loadStoreAgent.io.reset := reset + loadStoreAgent.io.channelId := 0.U + loadStoreAgent.io.gateRead := false.B + loadStoreAgent.io.gateWrite := false.B +} diff --git a/rocketemu/test_common/Cargo.toml b/rocketemu/test_common/Cargo.toml new file mode 100644 index 000000000..d5b3f32aa --- /dev/null +++ b/rocketemu/test_common/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "common" +version.workspace = true +edition = "2021" + +[dependencies] +spike_rs = { path = "../spike_rs" } +clap = { workspace = true } +tracing = { workspace = true } +tracing-subscriber = { workspace = true } +anyhow = { workspace = true } diff --git a/rocketemu/test_common/src/lib.rs b/rocketemu/test_common/src/lib.rs new file mode 100644 index 000000000..17851612e --- /dev/null +++ b/rocketemu/test_common/src/lib.rs @@ -0,0 +1,64 @@ +use anyhow::Result; +use clap::Parser; +use spike_rs::Spike; +use std::path::PathBuf; +use tracing::Level; +use tracing_subscriber::{EnvFilter, FmtSubscriber}; + +pub mod rtl_config; +pub mod spike_runner; + +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +pub struct CommonArgs { + /// Path to the ELF file + #[arg(long)] + pub elf_file: PathBuf, + + /// Path to the log file + #[arg(long)] + pub log_file: Option, + + /// Log level: trace, debug, info, warn, error + #[arg(long, default_value = "info")] + pub log_level: String, + + /// vlen config + #[arg(long, default_value = option_env!("DESIGN_VLEN").unwrap_or("0"))] + pub vlen: u32, + + /// dlen config + #[arg(long, default_value = option_env!("DESIGN_DLEN").unwrap_or("0"))] + pub dlen: u32, + + /// ISA config + #[arg(long, default_value = "rv32gcv")] + pub set: String, +} + +pub static MEM_SIZE: usize = 1usize << 32; + +impl CommonArgs { + pub fn to_spike_c_handler(&self) -> Box { + let arch = &format!("vlen:{},elen:32", self.vlen); + let lvl = "MSU"; + + Spike::new(arch, &self.set, lvl, (self.dlen / 32) as usize, MEM_SIZE) + } + + pub fn setup_logger(&self) -> Result<()> { + // setup log + let log_level: Level = self.log_level.parse()?; + let global_logger = FmtSubscriber::builder() + .with_env_filter(EnvFilter::from_default_env()) + .with_max_level(log_level) + .without_time() + .with_target(false) + .with_ansi(true) + .compact() + .finish(); + tracing::subscriber::set_global_default(global_logger) + .expect("internal error: fail to setup log subscriber"); + Ok(()) + } +} diff --git a/rocketemu/test_common/src/rtl_config.rs b/rocketemu/test_common/src/rtl_config.rs new file mode 100644 index 000000000..0daf72624 --- /dev/null +++ b/rocketemu/test_common/src/rtl_config.rs @@ -0,0 +1,20 @@ +pub struct RTLConfig { + pub vlen: u32, + pub dlen: u32, +} + +// TODO: read from json + +impl RTLConfig { + pub fn xlen(&self) -> u32 { + 32 // TODO: configurable + } + + pub fn vlen_in_bytes(&self) -> u32 { + self.vlen / 8 + } + + pub fn lane_num(&self) -> u32 { + self.dlen / self.xlen() + } +} diff --git a/rocketemu/test_common/src/spike_runner.rs b/rocketemu/test_common/src/spike_runner.rs new file mode 100644 index 000000000..24e0a2e29 --- /dev/null +++ b/rocketemu/test_common/src/spike_runner.rs @@ -0,0 +1,97 @@ +use std::path::Path; +use tracing::debug; + +use spike_rs::spike_event::SpikeEvent; +use spike_rs::util::load_elf; +use spike_rs::Spike; + +use crate::CommonArgs; + +pub struct SpikeRunner { + spike: Box, + + /// config for v extension + pub vlen: u32, + pub dlen: u32, + + /// implement the get_t() for mcycle csr update + pub cycle: u64, + + /// for mcycle csr update + pub spike_cycle: u64, + + pub do_log_vrf: bool, +} + +impl SpikeRunner { + pub fn new(args: &CommonArgs, do_log_vrf: bool) -> Self { + // load the elf file + // initialize spike + let mut spike = args.to_spike_c_handler(); + + let entry_addr = load_elf(&mut spike, Path::new(&args.elf_file)).unwrap(); + + // initialize processor + let proc = spike.get_proc(); + let state = proc.get_state(); + proc.reset(); + state.set_pc(entry_addr); + + SpikeRunner { + spike, + vlen: args.vlen, + dlen: args.dlen, + cycle: 0, + spike_cycle: 0, + do_log_vrf, + } + } + + pub fn load_elf(&mut self, fname: &Path) -> anyhow::Result { + load_elf(&mut *self.spike, fname) + } + + // just execute one instruction for non-difftest + pub fn exec(&self) -> anyhow::Result<()> { + let spike = &self.spike; + let proc = spike.get_proc(); + let state = proc.get_state(); + + let new_pc = proc.func(); + + state.handle_pc(new_pc).unwrap(); + + let ret = state.exit(); + + if ret == 0 { + return Err(anyhow::anyhow!("simulation finished!")); + } + + Ok(()) + } + + // execute the spike processor for one instruction and record + // the spike event for difftest + pub fn spike_step(&mut self) -> SpikeEvent { + let spike = &self.spike; + let proc = self.spike.get_proc(); + let state = proc.get_state(); + + state.set_mcycle((self.cycle + self.spike_cycle) as usize); + + let mut event = SpikeEvent::new(spike, self.do_log_vrf); + state.clear(); + + // inst is scalar + debug!("SpikeStep: spike run scalar insn ({})", event.describe_insn()); + let new_pc = proc.func(); + event.log_mem_write(spike).unwrap(); + event.log_reg_write(spike).unwrap(); + + state.handle_pc(new_pc).unwrap(); + + self.spike_cycle += 1; + + event + } +} diff --git a/rocketv/configs/meowth.json b/rocketv/configs/meowth.json new file mode 100644 index 000000000..12ddcfa36 --- /dev/null +++ b/rocketv/configs/meowth.json @@ -0,0 +1,70 @@ +{ + "parameter": { + "useAsyncReset": false, + "clockGate": true, + "instructionSets": ["rv32_i", "rv_f"], + "priv": "m", + "hartIdLen": 4, + "useBPWatch": false, + "mcontextWidth": 0, + "scontextWidth": 0, + "asidBits": 0, + "resetVectorBits": 32, + "nBreakpoints": 0, + "dtlbNWays": 32, + "dtlbNSets": 64, + "itlbNSets": 64, + "itlbNWays": 32, + "itlbNSectors": 4, + "itlbNSuperpageEntries": 4, + "nPTECacheEntries": 0, + "nL2TLBWays": 1, + "nL2TLBEntries": 0, + "paddrBits": 32, + "cacheBlockBytes": 32, + "nPMPs": 8, + "legal": "b????????????????????????????????", + "cacheable": "b1???????????????????????????????", + "read": "b????????????????????????????????", + "write": "b????????????????????????????????", + "putPartial": "b????????????????????????????????", + "logic": "b0", + "arithmetic": "b0", + "exec": "b1???????????????????????????????", + "sideEffects": "b00??????????????????????????????", + "btbEntries": 28, + "btbNMatchBits": 14, + "btbUpdatesOutOfOrder": false, + "nPages": 6, + "nRAS": 6, + "bhtParameter": [ + { + "nEntries": 512, + "counterLength": 1, + "historyLength": 8, + "historyBits": 3 + } + ], + "mulDivLatency": 2, + "divUnroll": 1, + "divEarlyOut": false, + "divEarlyOutGranularity": 0, + "mulUnroll": 1, + "mulEarlyOut": false, + "sfmaLatency": 3, + "dfmaLatency": 3, + "divSqrt": true, + "flushOnFenceI": true, + "fastLoadByte": false, + "fastLoadWord": false, + "dcacheNSets": 64, + "dcacheNWays": 4, + "dcacheRowBits": 32, + "maxUncachedInFlight": 1, + "separateUncachedResp": false, + "iCacheNSets": 32, + "iCacheNWays": 4, + "iCachePrefetch": false + }, + "generator": "org.chipsalliance.rocketv.RocketTile" +} diff --git a/rocketv/src/ALU.scala b/rocketv/src/ALU.scala new file mode 100644 index 000000000..ecf34d50a --- /dev/null +++ b/rocketv/src/ALU.scala @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util.{Cat, Fill, Reverse} + +object ALUParameter { + implicit def rwP: upickle.default.ReadWriter[ALUParameter] = upickle.default.macroRW[ALUParameter] +} + +case class ALUParameter(xLen: Int) extends SerializableModuleParameter { + val uopSize: Int = 4 + // static to false for now + val usingConditionalZero = false + + // TODO:move these to decoder. + val FN_ADD = 0.U + val FN_SL = 1.U + val FN_SEQ = 2.U + val FN_SNE = 3.U + val FN_XOR = 4.U + val FN_SR = 5.U + val FN_OR = 6.U + val FN_AND = 7.U + val FN_CZEQZ = 8.U + val FN_CZNEZ = 9.U + val FN_SUB = 10.U + val FN_SRA = 11.U + val FN_SLT = 12.U + + def isSub(cmd: UInt) = cmd(3) + def isCmp(cmd: UInt) = cmd >= FN_SLT + def cmpUnsigned(cmd: UInt) = cmd(1) + def cmpInverted(cmd: UInt) = cmd(0) + def cmpEq(cmd: UInt) = !cmd(3) + + def DW_32 = false.B + def DW_64 = true.B +} + +class ALUInterface(parameter: ALUParameter) extends Bundle { + val dw = Input(UInt(1.W)) + val fn = Input(UInt(parameter.uopSize.W)) + val in2 = Input(UInt(parameter.xLen.W)) + val in1 = Input(UInt(parameter.xLen.W)) + val out = Output(UInt(parameter.xLen.W)) + val adder_out = Output(UInt(parameter.xLen.W)) + val cmp_out = Output(Bool()) +} + +@instantiable +class ALU(val parameter: ALUParameter) + extends FixedIORawModule(new ALUInterface(parameter)) + with SerializableModule[ALUParameter] { + // compatibility layer + val aluFn = parameter + val xLen = parameter.xLen + val DW_64 = parameter.DW_64 + val usingConditionalZero = parameter.usingConditionalZero + val DW_32 = parameter.DW_32 + + + // Original implementation + + // ADD, SUB + val in2_inv = Mux(aluFn.isSub(io.fn), ~io.in2, io.in2) + val in1_xor_in2 = io.in1 ^ in2_inv + io.adder_out := io.in1 + in2_inv + aluFn.isSub(io.fn) + + // SLT, SLTU + val slt = + Mux( + io.in1(xLen - 1) === io.in2(xLen - 1), + io.adder_out(xLen - 1), + Mux(aluFn.cmpUnsigned(io.fn), io.in2(xLen - 1), io.in1(xLen - 1)) + ) + io.cmp_out := aluFn.cmpInverted(io.fn) ^ Mux(aluFn.cmpEq(io.fn), in1_xor_in2 === 0.U, slt) + + // SLL, SRL, SRA + val (shamt, shin_r) = + if (xLen == 32) (io.in2(4, 0), io.in1) + else { + require(xLen == 64) + val shin_hi_32 = Fill(32, aluFn.isSub(io.fn) && io.in1(31)) + val shin_hi = Mux(io.dw === DW_64, io.in1(63, 32), shin_hi_32) + val shamt = Cat(io.in2(5) & (io.dw === DW_64), io.in2(4, 0)) + (shamt, Cat(shin_hi, io.in1(31, 0))) + } + val shin = Mux(io.fn === aluFn.FN_SR || io.fn === aluFn.FN_SRA, shin_r, Reverse(shin_r)) + val shout_r = (Cat(aluFn.isSub(io.fn) & shin(xLen - 1), shin).asSInt >> shamt)(xLen - 1, 0) + val shout_l = Reverse(shout_r) + val shout = Mux(io.fn === aluFn.FN_SR || io.fn === aluFn.FN_SRA, shout_r, 0.U) | + Mux(io.fn === aluFn.FN_SL, shout_l, 0.U) + + // CZEQZ, CZNEZ + val in2_not_zero = io.in2.orR + val cond_out = Option.when(usingConditionalZero)( + Mux((io.fn === aluFn.FN_CZEQZ && in2_not_zero) || (io.fn === aluFn.FN_CZNEZ && !in2_not_zero), io.in1, 0.U) + ) + + // AND, OR, XOR + val logic = Mux(io.fn === aluFn.FN_XOR || io.fn === aluFn.FN_OR, in1_xor_in2, 0.U) | + Mux(io.fn === aluFn.FN_OR || io.fn === aluFn.FN_AND, io.in1 & io.in2, 0.U) + + val shift_logic = (aluFn.isCmp(io.fn) && slt) | logic | shout + val shift_logic_cond = cond_out match { + case Some(co) => shift_logic | co + case _ => shift_logic + } + val out = Mux(io.fn === aluFn.FN_ADD || io.fn === aluFn.FN_SUB, io.adder_out, shift_logic_cond) + + io.out := out + if (xLen > 32) { + require(xLen == 64) + when(io.dw === DW_32) { io.out := Cat(Fill(32, out(31)), out(31, 0)) } + } +} diff --git a/rocketv/src/AMOALU.scala b/rocketv/src/AMOALU.scala new file mode 100644 index 000000000..84e9ec5a8 --- /dev/null +++ b/rocketv/src/AMOALU.scala @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util.{FillInterleaved, PriorityMux, log2Ceil} + +object AMOALUParameter { + implicit def rwP: upickle.default.ReadWriter[AMOALUParameter] = upickle.default.macroRW[AMOALUParameter] +} + +case class AMOALUParameter(operandBits: Int) extends SerializableModuleParameter { + val uopSize: Int = 4 + def M_XA_ADD = "b01000".U + def M_XA_XOR = "b01001".U + def M_XA_OR = "b01010".U + def M_XA_AND = "b01011".U + def M_XA_MIN = "b01100".U + def M_XA_MAX = "b01101".U + def M_XA_MINU = "b01110".U + def M_XA_MAXU = "b01111".U +} + +class AMOALUInterface(parameter: AMOALUParameter) extends Bundle { + val mask = Input(UInt((parameter.operandBits / 8).W)) + val cmd = Input(UInt(parameter.uopSize.W)) + val lhs = Input(UInt(parameter.operandBits.W)) + val rhs = Input(UInt(parameter.operandBits.W)) + val out = Output(UInt(parameter.operandBits.W)) + val out_unmasked = Output(UInt(parameter.operandBits.W)) +} + +@instantiable +class AMOALU(val parameter: AMOALUParameter) + extends FixedIORawModule(new AMOALUInterface(parameter)) + with SerializableModule[AMOALUParameter] { + val M_XA_MAX = parameter.M_XA_MAX + val M_XA_MAXU = parameter.M_XA_MAXU + val M_XA_MIN = parameter.M_XA_MIN + val M_XA_MINU = parameter.M_XA_MINU + val M_XA_ADD = parameter.M_XA_ADD + val M_XA_OR = parameter.M_XA_OR + val M_XA_AND = parameter.M_XA_AND + val M_XA_XOR = parameter.M_XA_XOR + val operandBits = parameter.operandBits + val minXLen = 32 + val widths = (0 to log2Ceil(operandBits / minXLen)).map(minXLen << _) + + // Original implementation + + val max = io.cmd === M_XA_MAX || io.cmd === M_XA_MAXU + val min = io.cmd === M_XA_MIN || io.cmd === M_XA_MINU + val add = io.cmd === M_XA_ADD + val logic_and = io.cmd === M_XA_OR || io.cmd === M_XA_AND + val logic_xor = io.cmd === M_XA_XOR || io.cmd === M_XA_OR + + val adder_out = { + // partition the carry chain to support sub-xLen addition + val mask = ~(0.U(operandBits.W) +: widths.init.map(w => !io.mask(w / 8 - 1) << (w - 1))).reduce(_ | _) + (io.lhs & mask) + (io.rhs & mask) + } + + val less = { + // break up the comparator so the lower parts will be CSE'd + def isLessUnsigned(x: UInt, y: UInt, n: Int): Bool = { + if (n == minXLen) x(n - 1, 0) < y(n - 1, 0) + else x(n - 1, n / 2) < y(n - 1, n / 2) || x(n - 1, n / 2) === y(n - 1, n / 2) && isLessUnsigned(x, y, n / 2) + } + + def isLess(x: UInt, y: UInt, n: Int): Bool = { + val signed = { + val mask = M_XA_MIN ^ M_XA_MINU + (io.cmd & mask) === (M_XA_MIN & mask) + } + Mux(x(n - 1) === y(n - 1), isLessUnsigned(x, y, n), Mux(signed, x(n - 1), y(n - 1))) + } + + PriorityMux(widths.reverse.map(w => (io.mask(w / 8 / 2), isLess(io.lhs, io.rhs, w)))) + } + + val minmax = Mux(Mux(less, min, max), io.lhs, io.rhs) + val logic = + Mux(logic_and, io.lhs & io.rhs, 0.U) | + Mux(logic_xor, io.lhs ^ io.rhs, 0.U) + val out = + Mux(add, adder_out, Mux(logic_and || logic_xor, logic, minmax)) + + val wmask = FillInterleaved(8, io.mask) + io.out := wmask & out | ~wmask & io.lhs + io.out_unmasked := out +} diff --git a/rocketv/src/BTB.scala b/rocketv/src/BTB.scala new file mode 100644 index 000000000..7a7d85f29 --- /dev/null +++ b/rocketv/src/BTB.scala @@ -0,0 +1,343 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util._ + +object BHTParameter { + implicit def rwP: upickle.default.ReadWriter[BHTParameter] = upickle.default.macroRW[BHTParameter] +} + +case class BHTParameter(nEntries: Int, counterLength: Int, historyLength: Int, historyBits: Int) + +object BTBParameter { + implicit def rwP: upickle.default.ReadWriter[BTBParameter] = upickle.default.macroRW[BTBParameter] +} + +case class BTBParameter( + useAsyncReset: Boolean, + fetchBytes: Int, + vaddrBits: Int, + entries: Int, + nMatchBits: Int, + nPages: Int, + nRAS: Int, + cacheBlockBytes: Int, + iCacheSet: Int, + useCompressed: Boolean, + updatesOutOfOrder: Boolean, + fetchWidth: Int, + // below is for BHT, notice, the BHT is not a actually module:( + bhtParameter: Option[BHTParameter]) + extends SerializableModuleParameter { + val nEntries: Int = entries +} + +class BTBInterface(parameter: BTBParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if(parameter.useAsyncReset) AsyncReset() else Bool()) + val req = Flipped(Valid(new BTBReq(parameter.vaddrBits))) + val resp = Valid( + new BTBResp( + parameter.vaddrBits, + parameter.entries, + parameter.fetchWidth, + parameter.bhtParameter.map(_.historyLength), + parameter.bhtParameter.map(_.counterLength) + ) + ) + val btb_update = Flipped( + Valid( + new BTBUpdate( + parameter.vaddrBits, + parameter.entries, + parameter.fetchWidth, + parameter.bhtParameter.map(_.historyLength), + parameter.bhtParameter.map(_.counterLength) + ) + ) + ) + val bht_update = Flipped( + Valid( + new BHTUpdate( + parameter.bhtParameter.map(_.historyLength), + parameter.bhtParameter.map(_.counterLength), + parameter.vaddrBits + ) + ) + ) + val bht_advance = Flipped( + Valid( + new BTBResp( + parameter.vaddrBits, + parameter.entries, + parameter.fetchWidth, + parameter.bhtParameter.map(_.historyLength), + parameter.bhtParameter.map(_.counterLength) + ) + ) + ) + val ras_update = Flipped(Valid(new RASUpdate(parameter.vaddrBits))) + val ras_head = Valid(UInt(parameter.vaddrBits.W)) + val flush = Input(Bool()) +} + +@instantiable +class BTB(val parameter: BTBParameter) + extends FixedIORawModule(new BTBInterface(parameter)) + with SerializableModule[BTBParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + // compatibility layer + val entries = parameter.entries + val nMatchBits = parameter.nMatchBits + val matchBits = parameter.nMatchBits.max(log2Ceil(parameter.cacheBlockBytes * parameter.iCacheSet)) + val coreInstBytes = (if (parameter.useCompressed) 16 else 32) / 8 + val nPages = (parameter.nPages + 1) / 2 * 2 // control logic assumes 2 divides pages + val vaddrBits = parameter.vaddrBits + val fetchWidth = parameter.fetchWidth + val updatesOutOfOrder = parameter.updatesOutOfOrder + // original implementation. + + val idxs = Reg(Vec(entries, UInt((matchBits - log2Up(coreInstBytes)).W))) + val idxPages = Reg(Vec(entries, UInt(log2Up(nPages).W))) + val tgts = Reg(Vec(entries, UInt((matchBits - log2Up(coreInstBytes)).W))) + val tgtPages = Reg(Vec(entries, UInt(log2Up(nPages).W))) + val pages = Reg(Vec(nPages, UInt((vaddrBits - matchBits).W))) + val pageValid = RegInit(0.U(nPages.W)) + val pagesMasked = (pageValid.asBools.zip(pages)).map { case (v, p) => Mux(v, p, 0.U) } + + val isValid = RegInit(0.U(entries.W)) + val cfiType = Reg(Vec(entries, UInt(CFIType.width.W))) + val brIdx = Reg(Vec(entries, UInt(log2Up(fetchWidth).W))) + + private def page(addr: UInt) = addr >> matchBits + private def pageMatch(addr: UInt) = { + val p = page(addr) + pageValid & VecInit(pages.map(_ === p)).asUInt + } + private def idxMatch(addr: UInt) = { + val idx = addr(matchBits - 1, log2Up(coreInstBytes)) + VecInit(idxs.map(_ === idx)).asUInt & isValid + } + + val r_btb_update = Pipe(io.btb_update) + val update_target = io.req.bits.addr + + val pageHit = pageMatch(io.req.bits.addr) + val idxHit = idxMatch(io.req.bits.addr) + + val updatePageHit = pageMatch(r_btb_update.bits.pc) + val (updateHit, updateHitAddr) = + if (updatesOutOfOrder) { + val updateHits = (pageHit << 1)(Mux1H(idxMatch(r_btb_update.bits.pc), idxPages)) + (updateHits.orR, OHToUInt(updateHits)) + } else (r_btb_update.bits.prediction.entry < entries.U, r_btb_update.bits.prediction.entry) + + val useUpdatePageHit = updatePageHit.orR + val usePageHit = pageHit.orR + val doIdxPageRepl = !useUpdatePageHit + val nextPageRepl = RegInit(0.U(log2Ceil(nPages).W)) + val idxPageRepl = Cat(pageHit(nPages - 2, 0), pageHit(nPages - 1)) | Mux(usePageHit, 0.U, UIntToOH(nextPageRepl)) + val idxPageUpdateOH = Mux(useUpdatePageHit, updatePageHit, idxPageRepl) + val idxPageUpdate = OHToUInt(idxPageUpdateOH) + val idxPageReplEn = Mux(doIdxPageRepl, idxPageRepl, 0.U) + + val samePage = page(r_btb_update.bits.pc) === page(update_target) + val doTgtPageRepl = !samePage && !usePageHit + val tgtPageRepl = Mux(samePage, idxPageUpdateOH, Cat(idxPageUpdateOH(nPages - 2, 0), idxPageUpdateOH(nPages - 1))) + val tgtPageUpdate = OHToUInt(pageHit | Mux(usePageHit, 0.U, tgtPageRepl)) + val tgtPageReplEn = Mux(doTgtPageRepl, tgtPageRepl, 0.U) + + when(r_btb_update.valid && (doIdxPageRepl || doTgtPageRepl)) { + val both = doIdxPageRepl && doTgtPageRepl + val next = nextPageRepl + Mux[UInt](both, 2.U, 1.U) + nextPageRepl := Mux(next >= nPages.U, next(0), next) + } + + val repl = new PseudoLRU(entries) + val waddr = Mux(updateHit, updateHitAddr, repl.way) + val r_resp = Pipe(io.resp) + when(r_resp.valid && r_resp.bits.taken || r_btb_update.valid) { + repl.access(Mux(r_btb_update.valid, waddr, r_resp.bits.entry)) + } + + when(r_btb_update.valid) { + val mask = UIntToOH(waddr) + idxs(waddr) := r_btb_update.bits.pc(matchBits - 1, log2Up(coreInstBytes)) + tgts(waddr) := update_target(matchBits - 1, log2Up(coreInstBytes)) + idxPages(waddr) := idxPageUpdate +& 1.U // the +1 corresponds to the <<1 on io.resp.valid + tgtPages(waddr) := tgtPageUpdate + cfiType(waddr) := r_btb_update.bits.cfiType + isValid := Mux(r_btb_update.bits.isValid, isValid | mask, isValid & ~mask) + if (fetchWidth > 1) + brIdx(waddr) := r_btb_update.bits.br_pc >> log2Up(coreInstBytes) + + require(nPages % 2 == 0) + val idxWritesEven = !idxPageUpdate(0) + + def writeBank(i: Int, mod: Int, en: UInt, data: UInt) = + for (i <- i until nPages by mod) + when(en(i)) { pages(i) := data } + + writeBank( + 0, + 2, + Mux(idxWritesEven, idxPageReplEn, tgtPageReplEn), + Mux(idxWritesEven, page(r_btb_update.bits.pc), page(update_target)) + ) + writeBank( + 1, + 2, + Mux(idxWritesEven, tgtPageReplEn, idxPageReplEn), + Mux(idxWritesEven, page(update_target), page(r_btb_update.bits.pc)) + ) + pageValid := pageValid | tgtPageReplEn | idxPageReplEn + } + + io.resp.valid := (pageHit << 1)(Mux1H(idxHit, idxPages)) + io.resp.bits.taken := true.B + io.resp.bits.target := Cat( + VecInit(pagesMasked)(Mux1H(idxHit, tgtPages)), + Mux1H(idxHit, tgts) << log2Up(coreInstBytes) + ) + io.resp.bits.entry := OHToUInt(idxHit) + io.resp.bits.bridx := (if (fetchWidth > 1) Mux1H(idxHit, brIdx) else 0.U) + io.resp.bits.mask := Cat((1.U << ~Mux(io.resp.bits.taken, ~io.resp.bits.bridx, 0.U)) - 1.U, 1.U) + io.resp.bits.cfiType := Mux1H(idxHit, cfiType) + + // if multiple entries for same PC land in BTB, zap them + when(PopCountAtLeast(idxHit, 2)) { + isValid := isValid & ~idxHit + } + when(io.flush) { + isValid := 0.U + } + + parameter.bhtParameter.foreach { bhtParameter => + /** BHT contains table of 2-bit counters and a global history register. + * The BHT only predicts and updates when there is a BTB hit. + * The global history: + * - updated speculatively in fetch (if there's a BTB hit). + * - on a mispredict, the history register is reset (again, only if BTB hit). + * The counter table: + * - each counter corresponds with the address of the fetch packet ("fetch pc"). + * - updated when a branch resolves (and BTB was a hit for that branch). + * The updating branch must provide its "fetch pc". + */ + class BHT { + def index(addr: UInt, history: UInt) = { + def hashHistory(hist: UInt) = if (bhtParameter.historyLength == bhtParameter.historyBits) hist + else { + val k = math.sqrt(3) / 2 + val i = BigDecimal(k * math.pow(2, bhtParameter.historyLength)).toBigInt + (i.U * hist)(bhtParameter.historyLength - 1, bhtParameter.historyLength - bhtParameter.historyBits) + } + def hashAddr(addr: UInt) = { + val hi = addr >> log2Ceil(parameter.fetchBytes) + hi(log2Ceil(bhtParameter.nEntries) - 1, 0) ^ (hi >> log2Ceil(bhtParameter.nEntries))(1, 0) + } + hashAddr(addr) ^ (hashHistory(history) << (log2Up(bhtParameter.nEntries) - bhtParameter.historyBits)) + } + def get(addr: UInt): BHTResp = { + val res = Wire(new BHTResp(Some(bhtParameter.historyLength), Some(bhtParameter.counterLength))) + res.value := Mux(resetting, 0.U, table(index(addr, history))) + res.history := history + res + } + def updateTable(addr: UInt, d: BHTResp, taken: Bool): Unit = { + wen := true.B + when(!resetting) { + waddr := index(addr, d.history) + wdata := (bhtParameter.counterLength match { + case 1 => taken + case 2 => Cat(taken ^ d.value(0), d.value === 1.U || d.value(1) && taken) + }) + } + } + def resetHistory(d: BHTResp): Unit = { + history := d.history + } + def updateHistory(addr: UInt, d: BHTResp, taken: Bool): Unit = { + history := Cat(taken, d.history >> 1) + } + def advanceHistory(taken: Bool): Unit = { + history := Cat(taken, history >> 1) + } + + // todo: make sure if this is SRAM, then change it to SRAM. + private val table = Mem(bhtParameter.nEntries, UInt(bhtParameter.counterLength.W)) + val history = RegInit(0.U(bhtParameter.historyLength.W)) + + private val reset_waddr = RegInit(0.U((log2Ceil(bhtParameter.nEntries) + 1).W)) + private val resetting = !reset_waddr(log2Ceil(bhtParameter.nEntries)) + private val wen = WireInit(resetting) + private val waddr = WireInit(reset_waddr) + private val wdata = WireInit(0.U) + when(resetting) { reset_waddr := reset_waddr + 1.U } + when(wen) { table(waddr) := wdata } + } + val bht = new BHT + val isBranch = (idxHit & VecInit(cfiType.map(_ === CFIType.branch)).asUInt).orR + val res = bht.get(io.req.bits.addr) + when(io.bht_advance.valid) { + bht.advanceHistory(BHTResp.taken(io.bht_advance.bits.bht)) + } + when(io.bht_update.valid) { + when(io.bht_update.bits.branch) { + bht.updateTable(io.bht_update.bits.pc, io.bht_update.bits.prediction, io.bht_update.bits.taken) + when(io.bht_update.bits.mispredict) { + bht.updateHistory(io.bht_update.bits.pc, io.bht_update.bits.prediction, io.bht_update.bits.taken) + } + }.elsewhen(io.bht_update.bits.mispredict) { + bht.resetHistory(io.bht_update.bits.prediction) + } + } + when(!BHTResp.taken(res) && isBranch) { io.resp.bits.taken := false.B } + io.resp.bits.bht := res + } + + if (parameter.nRAS > 0) { + class RAS { + def push(addr: UInt): Unit = { + when(count < parameter.nRAS.U) { count := count + 1.U } + val nextPos = Mux(isPow2(parameter.nRAS).B || pos < (parameter.nRAS - 1).U, pos + 1.U, 0.U) + stack(nextPos) := addr + pos := nextPos + } + def peek: UInt = stack(pos) + def pop(): Unit = when(!isEmpty) { + count := count - 1.U + pos := Mux((isPow2(parameter.nRAS)).B || pos > 0.U, pos - 1.U, (parameter.nRAS - 1).U) + } + def clear(): Unit = count := 0.U + def isEmpty: Bool = count === 0.U + + private val count = RegInit(0.U(log2Up(parameter.nRAS + 1).W)) + private val pos = RegInit(0.U(log2Up(parameter.nRAS).W)) + private val stack = Reg(Vec(parameter.nRAS, UInt())) + } + val ras = new RAS + val doPeek = (idxHit & VecInit(cfiType.map(_ === CFIType.ret)).asUInt).orR + io.ras_head.valid := !ras.isEmpty + io.ras_head.bits := ras.peek + when(!ras.isEmpty && doPeek) { + io.resp.bits.target := ras.peek + } + when(io.ras_update.valid) { + when(io.ras_update.bits.cfiType === CFIType.call) { + ras.push(io.ras_update.bits.returnAddr) + }.elsewhen(io.ras_update.bits.cfiType === CFIType.ret) { + ras.pop() + } + } + } +} diff --git a/rocketv/src/Breakpoint.scala b/rocketv/src/Breakpoint.scala new file mode 100644 index 000000000..e55a28d64 --- /dev/null +++ b/rocketv/src/Breakpoint.scala @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} + +object BreakpointUnitParameter { + implicit def rwP: upickle.default.ReadWriter[BreakpointUnitParameter] = upickle.default.macroRW[BreakpointUnitParameter] +} + +case class BreakpointUnitParameter(nBreakpoints: Int, xLen: Int, useBPWatch: Boolean, vaddrBits: Int, mcontextWidth: Int, scontextWidth: Int) extends SerializableModuleParameter + +class BreakpointUnitInterface(parameter: BreakpointUnitParameter) extends Bundle { + val status = Input(new MStatus) + val bp = Input(Vec(parameter.nBreakpoints, new BP(parameter.xLen, parameter.useBPWatch, parameter.vaddrBits, parameter.mcontextWidth, parameter.scontextWidth))) + val pc = Input(UInt(parameter.vaddrBits.W)) + val ea = Input(UInt(parameter.vaddrBits.W)) + val mcontext = Input(UInt(parameter.mcontextWidth.W)) + val scontext = Input(UInt(parameter.scontextWidth.W)) + val xcpt_if = Output(Bool()) + val xcpt_ld = Output(Bool()) + val xcpt_st = Output(Bool()) + val debug_if = Output(Bool()) + val debug_ld = Output(Bool()) + val debug_st = Output(Bool()) + val bpwatch = Output(Vec(parameter.nBreakpoints, new BPWatch)) +} + +@instantiable +class BreakpointUnit(val parameter: BreakpointUnitParameter) + extends FixedIORawModule(new BreakpointUnitInterface(parameter)) + with SerializableModule[BreakpointUnitParameter] { + io.xcpt_if := false.B + io.xcpt_ld := false.B + io.xcpt_st := false.B + io.debug_if := false.B + io.debug_ld := false.B + io.debug_st := false.B + + (io.bpwatch.zip(io.bp)).foldLeft((true.B, true.B, true.B)) { + case ((ri, wi, xi), (bpw, bp)) => + val en = BPControl.enabled(bp.control, io.status) + val cx = BP.contextMatch(bp, io.mcontext, io.scontext, parameter.xLen, parameter.mcontextWidth, parameter.scontextWidth) + val r = en && bp.control.r && BP.addressMatch(bp, io.ea) && cx + val w = en && bp.control.w && BP.addressMatch(bp, io.ea) && cx + val x = en && bp.control.x && BP.addressMatch(bp, io.pc) && cx + val end = !bp.control.chain + val action = bp.control.action + + bpw.action := action + bpw.valid := false.B + bpw.rvalid := false.B + bpw.wvalid := false.B + bpw.ivalid := false.B + + when(end && r && ri) { + io.xcpt_ld := (action === 0.U); io.debug_ld := (action === 1.U); bpw.valid := true.B; bpw.rvalid := true.B + } + when(end && w && wi) { + io.xcpt_st := (action === 0.U); io.debug_st := (action === 1.U); bpw.valid := true.B; bpw.wvalid := true.B + } + when(end && x && xi) { + io.xcpt_if := (action === 0.U); io.debug_if := (action === 1.U); bpw.valid := true.B; bpw.ivalid := true.B + } + + (end || r, end || w, end || x) + } +} diff --git a/rocketv/src/Bundle.scala b/rocketv/src/Bundle.scala new file mode 100644 index 000000000..31bc2ba16 --- /dev/null +++ b/rocketv/src/Bundle.scala @@ -0,0 +1,1450 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.util.{Cat, Decoupled, DecoupledIO, Valid, ValidIO, isPow2, log2Ceil} + +// This file defines Bundle shared in the project. +// all Bundle only have datatype without any helper or functions, while they only exist in the companion Bundle. + +// TODO: make it Enum +object PRV { + val SZ = 2 + val U = 0 + val S = 1 + val H = 2 + val M = 3 +} + +class MStatus extends Bundle { + // not truly part of mstatus, but convenient + val debug = Bool() + val cease = Bool() + val wfi = Bool() + val isa = UInt(32.W) + + val dprv = UInt(PRV.SZ.W) // effective prv for data accesses + val dv = Bool() // effective v for data accesses + val prv = UInt(PRV.SZ.W) + val v = Bool() + + val sd = Bool() + val zero2 = UInt(23.W) + val mpv = Bool() + val gva = Bool() + val mbe = Bool() + val sbe = Bool() + val sxl = UInt(2.W) + val uxl = UInt(2.W) + val sd_rv32 = Bool() + val zero1 = UInt(8.W) + val tsr = Bool() + val tw = Bool() + val tvm = Bool() + val mxr = Bool() + val sum = Bool() + val mprv = Bool() + val xs = UInt(2.W) + val fs = UInt(2.W) + val mpp = UInt(2.W) + val vs = UInt(2.W) + val spp = UInt(1.W) + val mpie = Bool() + val ube = Bool() + val spie = Bool() + val upie = Bool() + val mie = Bool() + val hie = Bool() + val sie = Bool() + val uie = Bool() +} + +object BP { + def contextMatch(bp: BP, mcontext: UInt, scontext: UInt, xLen: Int, mcontextWidth: Int, scontextWidth: Int): Bool = + (if (mcontextWidth > 0) + !bp.textra.mselect || (mcontext(TExtra.mvalueBits(xLen, mcontextWidth) - 1, 0) === bp.textra.mvalue) + else true.B) && + (if (scontextWidth > 0) + !bp.textra.sselect || (scontext(TExtra.svalueBits(xLen, scontextWidth) - 1, 0) === bp.textra.svalue) + else true.B) + + def addressMatch(bp: BP, x: UInt) = { + def rangeAddressMatch(x: UInt) = + (x >= bp.address) ^ bp.control.tmatch(0) + + def pow2AddressMatch(x: UInt): Bool = { + def mask(): UInt = { + import chisel3.experimental.conversions.seq2vec + def maskMax = 4 + (0 until maskMax - 1).scanLeft(bp.control.tmatch(0))((m, i) => m && bp.address(i)).asUInt + } + (~x | mask()) === (~bp.address | mask()) + } + Mux(bp.control.tmatch(1), rangeAddressMatch(x), pow2AddressMatch(x)) + } +} + +class BP(xLen: Int, useBPWatch: Boolean, vaddrBits: Int, mcontextWidth: Int, scontextWidth: Int) extends Bundle { + val control = new BPControl(xLen, useBPWatch) + val address = UInt(vaddrBits.W) + val textra = new TExtra(xLen, mcontextWidth, scontextWidth) +} + +object BPControl { + def enabled(bpControl: BPControl, mstatus: MStatus): Bool = + !mstatus.debug && Cat(bpControl.m, bpControl.h, bpControl.s, bpControl.u)(mstatus.prv) +} + +class BPControl(xLen: Int, useBPWatch: Boolean) extends Bundle { + val ttype = UInt(4.W) + val dmode = Bool() + val maskmax = UInt(6.W) + val reserved = UInt((xLen - (if (useBPWatch) 26 else 24)).W) + val action = UInt((if (useBPWatch) 3 else 1).W) + val chain = Bool() + val zero = UInt(2.W) + val tmatch = UInt(2.W) + val m = Bool() + val h = Bool() + val s = Bool() + val u = Bool() + val x = Bool() + val w = Bool() + val r = Bool() +} + +object TExtra { + def mvalueBits(xLen: Int, mcontextWidth: Int): Int = if (xLen == 32) mcontextWidth.min(6) else mcontextWidth.min(13) + def svalueBits(xLen: Int, scontextWidth: Int): Int = if (xLen == 32) scontextWidth.min(16) else scontextWidth.min(34) + def mselectPos(xLen: Int): Int = if (xLen == 32) 25 else 50 + def mvaluePos(xLen: Int): Int = mselectPos(xLen) + 1 + def sselectPos: Int = 0 + def svaluePos: Int = 2 +} + +class TExtra(xLen: Int, mcontextWidth: Int, scontextWidth: Int) extends Bundle { + import TExtra._ + val mvalue = UInt(mvalueBits(xLen, mcontextWidth).W) + val mselect = Bool() + val pad2 = UInt((mselectPos(xLen) - svalueBits(xLen, scontextWidth) - 2).W) + val svalue = UInt(svalueBits(xLen, scontextWidth).W) + val pad1 = UInt(1.W) + val sselect = Bool() +} + +// originally in RocketChip, there is (n: Int) as parameter. this is designed for retire width, +// since Rocket is a single issue core, we removed it. +class BPWatch extends Bundle() { + val valid = Bool() + val rvalid = Bool() + val wvalid = Bool() + val ivalid = Bool() + val action = UInt(3.W) +} + +class BTBReq(vaddrBits: Int) extends Bundle { + val addr = UInt(vaddrBits.W) +} + +class BTBResp( + vaddrBits: Int, + entries: Int, + fetchWidth: Int, + bhtHistoryLength: Option[Int], + bhtCounterLength: Option[Int]) + extends Bundle { + + val cfiType = UInt(CFIType.width.W) + val taken = Bool() + val mask = UInt(fetchWidth.W) + val bridx = UInt(log2Ceil(fetchWidth).W) + val target = UInt(vaddrBits.W) + val entry = UInt(log2Ceil(entries + 1).W) + // @todo make it optional with bhtHistoryLength and bhtCounterLength + val bht = new BHTResp(bhtHistoryLength, bhtCounterLength) +} + +object BHTResp { + def taken(bht: BHTResp): Bool = bht.value(0) + def strongly_taken(bhtResp: BHTResp): Bool = bhtResp.value === 1.U +} + +class BHTResp(bhtHistoryLength: Option[Int], bhtCounterLength: Option[Int]) extends Bundle { + val history = UInt(bhtHistoryLength.getOrElse(1).W) + val value = UInt(bhtCounterLength.getOrElse(1).W) + + // @todo: change to: + // val history = bhtHistoryLength.map(i => UInt(i.W)) + // val value = bhtCounterLength.map(i => UInt(i.W)) +} + +class BTBUpdate( + vaddrBits: Int, + entries: Int, + fetchWidth: Int, + bhtHistoryLength: Option[Int], + bhtCounterLength: Option[Int]) + extends Bundle { + def fetchWidth: Int = 1 + + val prediction = new BTBResp(vaddrBits, entries, fetchWidth, bhtHistoryLength, bhtCounterLength) + val pc = UInt(vaddrBits.W) + val target = UInt(vaddrBits.W) + val taken = Bool() + val isValid = Bool() + val br_pc = UInt(vaddrBits.W) + val cfiType = UInt(CFIType.width.W) +} + +class BHTUpdate(bhtHistoryLength: Option[Int], bhtCounterLength: Option[Int], vaddrBits: Int) extends Bundle { + val prediction = new BHTResp(bhtHistoryLength, bhtCounterLength) + val pc = UInt(vaddrBits.W) + val branch = Bool() + val taken = Bool() + val mispredict = Bool() +} + +class RASUpdate(vaddrBits: Int) extends Bundle { + val cfiType = UInt(CFIType.width.W) + val returnAddr = UInt(vaddrBits.W) +} + +// TODO: make it Enum +object CFIType { + def width = 2 + def branch = 0.U + def jump = 1.U + def call = 2.U + def ret = 3.U +} + +class CustomCSRIO(xLen: Int) extends Bundle { + val ren = Output(Bool()) // set by CSRFile, indicates an instruction is reading the CSR + val wen = Output(Bool()) // set by CSRFile, indicates an instruction is writing the CSR + val wdata = Output(UInt(xLen.W)) // wdata provided by instruction writing CSR + val value = Output(UInt(xLen.W)) // current value of CSR in CSRFile + + val stall = Input(Bool()) // reads and writes to this CSR should stall (must be bounded) + + val set = Input(Bool()) // set/sdata enables external agents to set the value of this CSR + val sdata = Input(UInt(xLen.W)) +} + +class CustomCSRs(xLen: Int) extends Bundle { + val csrs = Vec(decls.size, new CustomCSRIO(xLen)) + + // Not all cores have these CSRs, but those that do should follow the same + // numbering conventions. So we list them here but default them to None. + protected def bpmCSRId = 0x7c0 + protected def bpmCSR: Option[CustomCSR] = None + protected def chickenCSRId = 0x7c1 + protected def chickenCSR: Option[CustomCSR] = None + // If you override this, you'll want to concatenate super.decls + def decls: Seq[CustomCSR] = bpmCSR.toSeq ++ chickenCSR + def flushBTB = getOrElse(bpmCSR, _.wen, false.B) + def bpmStatic = getOrElse(bpmCSR, _.value(0), false.B) + def disableDCacheClockGate = getOrElse(chickenCSR, _.value(0), false.B) + def disableICacheClockGate = getOrElse(chickenCSR, _.value(1), false.B) + def disableCoreClockGate = getOrElse(chickenCSR, _.value(2), false.B) + def disableSpeculativeICacheRefill = getOrElse(chickenCSR, _.value(3), false.B) + def suppressCorruptOnGrantData = getOrElse(chickenCSR, _.value(9), false.B) + protected def getByIdOrElse[T](id: Int, f: CustomCSRIO => T, alt: T): T = { + val idx = decls.indexWhere(_.id == id) + if (idx < 0) alt else f(csrs(idx)) + } + + protected def getOrElse[T](csr: Option[CustomCSR], f: CustomCSRIO => T, alt: T): T = + csr.map(c => getByIdOrElse(c.id, f, alt)).getOrElse(alt) +} + +class TileInterrupts(usingSupervisor: Boolean, nLocalInterrupts: Int, usingNMI: Boolean, resetVectorLen: Int) extends Bundle { + val debug: Bool = Bool() + val mtip: Bool = Bool() + val msip: Bool = Bool() + val meip: Bool = Bool() + val seip: Option[Bool] = Option.when(usingSupervisor)(Bool()) + val lip: Vec[Bool] = Vec(nLocalInterrupts, Bool()) + val nmi = Option.when(usingNMI)(new NMI(resetVectorLen)) +} + +class NMI(w: Int) extends Bundle { + val rnmi = Bool() + val rnmi_interrupt_vector = UInt(w.W) + val rnmi_exception_vector = UInt(w.W) +} + +class CoreInterrupts(usingSupervisor: Boolean, nLocalInterrupts: Int, hasBeu: Boolean, usingNMI: Boolean, resetVectorLen: Int) extends Bundle { + val tileInterrupts = new TileInterrupts(usingSupervisor, nLocalInterrupts, usingNMI, resetVectorLen) + val buserror = Option.when(hasBeu)(Bool()) +} + +class HStatus extends Bundle { + val zero6 = UInt(30.W) + val vsxl = UInt(2.W) + val zero5 = UInt(9.W) + val vtsr = Bool() + val vtw = Bool() + val vtvm = Bool() + val zero3 = UInt(2.W) + val vgein = UInt(6.W) + val zero2 = UInt(2.W) + val hu = Bool() + val spvp = Bool() + val spv = Bool() + val gva = Bool() + val vsbe = Bool() + val zero1 = UInt(5.W) +} + +class CSRDecodeIO(iLen: Int) extends Bundle { + val inst = Input(UInt(iLen.W)) + val fpIllegal = Output(Bool()) + val fpCsr = Output(Bool()) + val readIllegal = Output(Bool()) + val writeIllegal = Output(Bool()) + val writeFlush = Output(Bool()) + val systemIllegal = Output(Bool()) + val virtualAccessIllegal = Output(Bool()) + val virtualSystemIllegal = Output(Bool()) +} + +object PTBR { + def additionalPgLevels(ptbr: PTBR, pgLevels: Int, minPgLevels: Int) = ptbr.mode(log2Ceil(pgLevels - minPgLevels + 1) - 1, 0) + def modeBits(xLen: Int) = xLen match { + case 32 => 1 + case 64 => 4 + } + def maxASIdBits(xLen: Int) = xLen match { + case 32 => 9 + case 64 => 16 + } +} + +class PTBR(xLen: Int, maxPAddrBits: Int, pgIdxBits: Int) extends Bundle { + val mode: UInt = UInt(PTBR.modeBits(xLen).W) + val asid = UInt(PTBR.maxASIdBits(xLen).W) + val ppn = UInt((maxPAddrBits - pgIdxBits).W) +} + +// TODO: remove me. +object FPConstants { + val RM_SZ = 3 + val FLAGS_SZ = 5 +} + + +object PMP { + def lgAlign = 2 + private def UIntToOH1(x: UInt, width: Int): UInt = ~((-1).S(width.W).asUInt << x)(width - 1, 0) + + // For PMPReg + def reset(pmp: PMP): Unit = { + pmp.cfg.a := 0.U + pmp.cfg.l := 0.U + } + def readAddr(pmp: PMP, pmpGranularity: Int) = + if (log2Ceil(pmpGranularity) == PMP.lgAlign) + pmp.addr + else { + val mask = ((BigInt(1) << (log2Ceil(pmpGranularity) - PMP.lgAlign)) - 1).U + Mux(napot(pmp), pmp.addr | (mask >> 1), ~(~pmp.addr | mask)) + } + def napot(pmp: PMP) = pmp.cfg.a(1) + def napot(pmp: PMPReg) = pmp.cfg.a(1) + def torNotNAPOT(pmp: PMP) = pmp.cfg.a(0) + def tor(pmp: PMP) = !napot(pmp) && torNotNAPOT(pmp) + def cfgLocked(pmp: PMP) = pmp.cfg.l + def addrLocked(pmp: PMP, next: PMP) = cfgLocked(pmp) || cfgLocked(next) && tor(next) + // PMP + def computeMask(pmp: PMP, pmpGranularity: Int): UInt = { + val base = Cat(pmp.addr, pmp.cfg.a(0)) | ((pmpGranularity - 1).U >> lgAlign) + Cat(base & ~(base + 1.U), ((1 << lgAlign) - 1).U) + } + private def comparand(pmp: PMP, pmpGranularity: Int): UInt = ~(~(pmp.addr << lgAlign) | (pmpGranularity - 1).U) + + private def pow2Match(pmp: PMP, x: UInt, lgSize: UInt, lgMaxSize: Int, pmpGranularity: Int): Bool = { + def eval(a: UInt, b: UInt, m: UInt) = ((a ^ b) & ~m) === 0.U + if (lgMaxSize <= log2Ceil(pmpGranularity)) { + eval(x, comparand(pmp, pmpGranularity), pmp.mask) + } else { + // break up the circuit; the MSB part will be CSE'd + val lsbMask = pmp.mask | UIntToOH1(lgSize, lgMaxSize) + val msbMatch: Bool = eval(x >> lgMaxSize, comparand(pmp, pmpGranularity) >> lgMaxSize, pmp.mask >> lgMaxSize) + val lsbMatch: Bool = eval(x(lgMaxSize - 1, 0), comparand(pmp, pmpGranularity)(lgMaxSize - 1, 0), lsbMask(lgMaxSize - 1, 0)) + msbMatch && lsbMatch + } + } + + private def boundMatch(pmp: PMP, x: UInt, lsbMask: UInt, lgMaxSize: Int, pmpGranularity: Int): Bool = { + if (lgMaxSize <= log2Ceil(pmpGranularity)) { + x < comparand(pmp, pmpGranularity) + } else { + // break up the circuit; the MSB part will be CSE'd + val msbsLess: Bool = (x >> lgMaxSize) < (comparand(pmp, pmpGranularity) >> lgMaxSize) + val msbsEqual: Bool = ((x >> lgMaxSize) ^ (comparand(pmp, pmpGranularity) >> lgMaxSize)) === 0.U + val lsbsLess: Bool = (x(lgMaxSize - 1, 0) | lsbMask) < comparand(pmp, pmpGranularity)(lgMaxSize - 1, 0) + msbsLess || (msbsEqual && lsbsLess) + } + } + + private def lowerBoundMatch(pmp: PMP, x: UInt, lgSize: UInt, lgMaxSize: Int, pmpGranularity: Int): Bool = + !boundMatch(pmp: PMP, x, UIntToOH1(lgSize, lgMaxSize), lgMaxSize, pmpGranularity: Int) + + private def upperBoundMatch(pmp: PMP, x: UInt, lgMaxSize: Int, pmpGranularity: Int): Bool = + boundMatch(pmp, x, 0.U, lgMaxSize, pmpGranularity) + + private def rangeMatch(pmp: PMP, x: UInt, lgSize: UInt, lgMaxSize: Int, prev: PMP, pmpGranularity: Int) = + lowerBoundMatch(prev, x, lgSize, lgMaxSize, pmpGranularity) && upperBoundMatch(pmp, x, lgMaxSize, pmpGranularity) + + private def pow2Homogeneous(pmp: PMP, x: UInt, pgLevel: UInt, paddrBits: Int, pmpGranularity: Int, pgLevels: Int, pgIdxBits: Int, pgLevelBits: Int): Bool = { + val maskHomogeneous = VecInit(pgLevelMap(pgLevels, pgIdxBits, pgLevelBits) { idxBits => if (idxBits > paddrBits) false.B else pmp.mask(idxBits - 1) })(pgLevel) + maskHomogeneous || VecInit(pgLevelMap(pgLevels, pgIdxBits, pgLevelBits) { idxBits => ((x ^ comparand(pmp, pmpGranularity)) >> idxBits) =/= 0.U })(pgLevel) + } + + private def pgLevelMap[T](pgLevels: Int, pgIdxBits: Int, pgLevelBits: Int)(f: Int => T): Seq[T] = (0 until pgLevels).map { i => + f(pgIdxBits + (pgLevels - 1 - i) * pgLevelBits) + } + + private def rangeHomogeneous(pmp: PMP, x: UInt, pgLevel: UInt, prev: PMP, paddrBits: Int, pmpGranularity: Int, pgLevels: Int, pgIdxBits: Int, pgLevelBits: Int) = { + val beginsAfterLower = !(x < comparand(prev, pmpGranularity)) + val beginsAfterUpper = !(x < comparand(pmp, pmpGranularity)) + + val pgMask = VecInit(pgLevelMap(pgLevels, pgIdxBits, pgLevelBits) { idxBits => (((BigInt(1) << paddrBits) - (BigInt(1) << idxBits)).max(0)).U })(pgLevel) + val endsBeforeLower = (x & pgMask) < (comparand(prev, pmpGranularity) & pgMask) + val endsBeforeUpper = (x & pgMask) < (comparand(pmp, pmpGranularity) & pgMask) + + endsBeforeLower || beginsAfterUpper || (beginsAfterLower && endsBeforeUpper) + } + + // returns whether this PMP completely contains, or contains none of, a page + def homogeneous(pmp: PMP, x: UInt, pgLevel: UInt, prev: PMP, paddrBits: Int, pmpGranularity: Int, pgLevels: Int, pgIdxBits: Int, pgLevelBits: Int): Bool = + Mux(napot(pmp), pow2Homogeneous(pmp, x, pgLevel, paddrBits, pmpGranularity, pgLevels, pgIdxBits, pgLevelBits), !torNotNAPOT(pmp) || rangeHomogeneous(pmp, x, pgLevel, prev, paddrBits, pmpGranularity, pgLevels, pgIdxBits, pgLevelBits)) + + // returns whether this matching PMP fully contains the access + def aligned(pmp: PMP, x: UInt, lgSize: UInt, lgMaxSize: Int, prev: PMP, pmpGranularity: Int): Bool = if (lgMaxSize <= log2Ceil(pmpGranularity)) true.B + else { + val lsbMask = UIntToOH1(lgSize, lgMaxSize) + val straddlesLowerBound: Bool = + ((x >> lgMaxSize) ^ (comparand(prev, pmpGranularity) >> lgMaxSize)) === 0.U && + (comparand(prev, pmpGranularity)(lgMaxSize - 1, 0) & ~x(lgMaxSize - 1, 0)) =/= 0.U + val straddlesUpperBound: Bool = + ((x >> lgMaxSize) ^ (comparand(pmp, pmpGranularity) >> lgMaxSize)) === 0.U && + (comparand(pmp, pmpGranularity)(lgMaxSize - 1, 0) & (x(lgMaxSize - 1, 0) | lsbMask)) =/= 0.U + val rangeAligned = !(straddlesLowerBound || straddlesUpperBound) + val pow2Aligned = (lsbMask & ~pmp.mask(lgMaxSize - 1, 0)) === 0.U + Mux(napot(pmp), pow2Aligned, rangeAligned) + } + + // returns whether this PMP matches at least one byte of the access + def hit(pmp: PMP, x: UInt, lgSize: UInt, lgMaxSize: Int, prev: PMP, pmpGranularity: Int): Bool = + Mux(napot(pmp), pow2Match(pmp, x, lgSize, lgMaxSize, pmpGranularity), torNotNAPOT(pmp) && rangeMatch(pmp, x, lgSize, lgMaxSize, prev, pmpGranularity)) + +} + +class PMP(paddrBits: Int) extends Bundle { + val mask = UInt(paddrBits.W) + val cfg = new PMPConfig + val addr = UInt((paddrBits - PMP.lgAlign).W) +} + +class PMPConfig extends Bundle { + val l = Bool() + val res = UInt(2.W) + val a = UInt(2.W) + val x = Bool() + val w = Bool() + val r = Bool() +} + +class PerfCounterIO(xLen: Int, retireWidth: Int) extends Bundle { + val eventSel = Output(UInt(xLen.W)) + val inc = Input(UInt(log2Ceil(1 + retireWidth).W)) +} + +class Envcfg extends Bundle { + val stce = Bool() // only for menvcfg/henvcfg + val pbmte = Bool() // only for menvcfg/henvcfg + val zero54 = UInt(54.W) + val cbze = Bool() + val cbcfe = Bool() + val cbie = UInt(2.W) + val zero3 = UInt(3.W) + val fiom = Bool() +} + +class DCSR extends Bundle { + val xdebugver = UInt(2.W) + val zero4 = UInt(2.W) + val zero3 = UInt(12.W) + val ebreakm = Bool() + val ebreakh = Bool() + val ebreaks = Bool() + val ebreaku = Bool() + val zero2 = Bool() + val stopcycle = Bool() + val stoptime = Bool() + val cause = UInt(3.W) + val v = Bool() + val zero1 = UInt(2.W) + val step = Bool() + val prv = UInt(PRV.SZ.W) +} + +class VCSR extends Bundle { + val vtype: UInt = UInt(32.W) + val vl: UInt = UInt(32.W) + val vcsr: UInt = UInt(32.W) + val vstart: UInt = UInt(32.W) +} + + +class MIP(nLocalInterrupts: Int) extends Bundle { + val lip = Vec(nLocalInterrupts, Bool()) + val zero1 = Bool() + val debug = Bool() // keep in sync with CSR.debugIntCause + val sgeip = Bool() + val meip = Bool() + val vseip = Bool() + val seip = Bool() + val ueip = Bool() + val mtip = Bool() + val vstip = Bool() + val stip = Bool() + val utip = Bool() + val msip = Bool() + val vssip = Bool() + val ssip = Bool() + val usip = Bool() +} + +object PMPReg { + def napot(pmp: PMPReg) = pmp.cfg.a(1) +} + +class PMPReg(paddrBits: Int) extends Bundle { + val cfg = new PMPConfig + val addr = UInt((paddrBits - PMP.lgAlign).W) +} + +class MNStatus extends Bundle { + val mpp = UInt(2.W) + val zero3 = UInt(3.W) + val mpv = Bool() + val zero2 = UInt(3.W) + val mie = Bool() + val zero1 = UInt(3.W) +} + +class ExpandedInstruction extends Bundle { + val bits = UInt(32.W) + val rd = UInt(5.W) + val rs1 = UInt(5.W) + val rs2 = UInt(5.W) + val rs3 = UInt(5.W) +} + +class FrontendResp( + vaddrBits: Int, + entries: Int, + bhtHistoryLength: Option[Int], + bhtCounterLength: Option[Int], + vaddrBitsExtended: Int, + coreInstBits: Int, + fetchWidth: Int) + extends Bundle { + val btb = new BTBResp(vaddrBits, entries, fetchWidth, bhtHistoryLength: Option[Int], bhtCounterLength: Option[Int]) + val pc = UInt(vaddrBitsExtended.W) // ID stage PC + val data = UInt((fetchWidth * coreInstBits).W) + val mask = UInt(fetchWidth.W) + val xcpt = new FrontendExceptions + val replay = Bool() +} + +class FrontendExceptions extends Bundle { + val pf = Bool() + val gf = Bool() + val ae = Bool() +} + +class Instruction extends Bundle { + val xcpt0 = new FrontendExceptions // exceptions on first half of instruction + val xcpt1 = new FrontendExceptions // exceptions on second half of instruction + val replay = Bool() + val rvc = Bool() + val inst = new ExpandedInstruction + val raw = UInt(32.W) +} + +class MultiplierReq(dataBits: Int, tagBits: Int, uopWidth: Int) extends Bundle { + val fn = Bits(uopWidth.W) + val dw = Bool() + val in1 = Bits(dataBits.W) + val in2 = Bits(dataBits.W) + val tag = UInt(tagBits.W) +} + +class MultiplierResp(dataBits: Int, tagBits: Int) extends Bundle { + val data = Bits(dataBits.W) + val full_data = Bits((2 * dataBits).W) + val tag = UInt(tagBits.W) +} + +class PMACheckerResponse extends Bundle { + val cacheable = Bool() + val r = Bool() + val w = Bool() + val pp = Bool() + val al = Bool() + val aa = Bool() + val x = Bool() + val eff = Bool() +} + + +/** IO between TLB and PTW + * + * PTW receives : + * - PTE request + * - CSRs info + * - pmp results from PMP(in TLB) + */ +class TLBPTWIO(nPMPs: Int, vpnBits: Int, paddrBits: Int, vaddrBits: Int, pgLevels: Int, xLen: Int, maxPAddrBits: Int, pgIdxBits: Int) extends Bundle { + val req = Decoupled(Valid(new PTWReq(vpnBits))) + val resp = Flipped(Valid(new PTWResp(vaddrBits, pgLevels))) + val ptbr = Input(new PTBR(xLen, maxPAddrBits, pgIdxBits)) + val hgatp = Input(new PTBR(xLen, maxPAddrBits, pgIdxBits)) + val vsatp = Input(new PTBR(xLen, maxPAddrBits, pgIdxBits)) + val status = Input(new MStatus) + val hstatus = Input(new HStatus) + val gstatus = Input(new MStatus) + val pmp = Input(Vec(nPMPs, new PMP(paddrBits))) + // No customCSR for the first time refactor. + // val customCSRs = Flipped(coreParams.customCSRs) +} + +class PTWReq(vpnBits: Int) extends Bundle { + val addr = UInt(vpnBits.W) + val need_gpa = Bool() + val vstage1 = Bool() + val stage2 = Bool() +} + +/** PTE info from L2TLB to TLB + * + * containing: target PTE, exceptions, two-satge tanslation info + */ +class PTWResp(vaddrBits: Int, pgLevels: Int) extends Bundle { + + /** ptw access exception */ + val ae_ptw = Bool() + + /** final access exception */ + val ae_final = Bool() + + /** page fault */ + val pf = Bool() + + /** guest page fault */ + val gf = Bool() + + /** hypervisor read */ + val hr = Bool() + + /** hypervisor write */ + val hw = Bool() + + /** hypervisor execute */ + val hx = Bool() + + /** PTE to refill L1TLB + * + * source: L2TLB + */ + val pte = new PTE + + /** pte pglevel */ + val level = UInt(log2Ceil(pgLevels).W) + + /** fragmented_superpage support */ + val fragmented_superpage = Bool() + + /** homogeneous for both pma and pmp */ + val homogeneous = Bool() + val gpa = Valid(UInt(vaddrBits.W)) + val gpa_is_pte = Bool() +} + +object PTE { + /** return true if find a pointer to next level page table */ + def table(pte: PTE) = pte.v && !pte.r && !pte.w && !pte.x && !pte.d && !pte.a && !pte.u && pte.reserved_for_future === 0.U + /** return true if find a leaf PTE */ + def leaf(pte: PTE) = pte.v && (pte.r || (pte.x && !pte.w)) && pte.a + /** user read */ + def ur(pte: PTE) = sr(pte) && pte.u + /** user write*/ + def uw(pte: PTE) = sw(pte) && pte.u + /** user execute */ + def ux(pte: PTE) = sx(pte) && pte.u + /** supervisor read */ + def sr(pte: PTE) = leaf(pte) && pte.r + /** supervisor write */ + def sw(pte: PTE) = leaf(pte) && pte.w && pte.d + /** supervisor execute */ + def sx(pte: PTE) = leaf(pte) && pte.x + /** full permission: writable and executable in user mode */ + def isFullPerm(pte: PTE) = uw(pte) && ux(pte) +} + +/** PTE template for transmission + * + * contains useful methods to check PTE attributes + * @see RV-priv spec 4.3.1 for pgae table entry format + */ +class PTE extends Bundle { + val reserved_for_future = UInt(10.W) + val ppn = UInt(44.W) + val reserved_for_software = UInt(2.W) + + /** dirty bit */ + val d = Bool() + + /** access bit */ + val a = Bool() + + /** global mapping */ + val g = Bool() + + /** user mode accessible */ + val u = Bool() + + /** whether the page is executable */ + val x = Bool() + + /** whether the page is writable */ + val w = Bool() + + /** whether the page is readable */ + val r = Bool() + + /** valid bit */ + val v = Bool() +} + + +class HellaCacheIO( + coreMaxAddrBits: Int, + usingVM: Boolean, + untagBits: Int, + pgIdxBits: Int, + dcacheReqTagBits: Int, + dcacheArbPorts: Int, + coreDataBytes: Int, + paddrBits: Int, + vaddrBitsExtended: Int, + separateUncachedResp: Boolean) + extends Bundle { + val req = Decoupled( + new HellaCacheReq(coreMaxAddrBits, usingVM, untagBits, pgIdxBits, dcacheReqTagBits, dcacheArbPorts, coreDataBytes) + ) + val s1_kill = Output(Bool()) // kill previous cycle's req + val s1_data = Output(new HellaCacheWriteData(coreDataBytes)) // data for previous cycle's req + val s2_nack = Input(Bool()) // req from two cycles ago is rejected + val s2_nack_cause_raw = Input(Bool()) // reason for nack is store-load RAW hazard (performance hint) + val s2_kill = Output(Bool()) // kill req from two cycles ago + val s2_uncached = Input(Bool()) // advisory signal that the access is MMIO + val s2_paddr = Input(UInt(paddrBits.W)) // translated address + + val resp = Flipped( + Valid( + new HellaCacheResp( + coreMaxAddrBits, + usingVM, + untagBits, + pgIdxBits, + dcacheReqTagBits, + dcacheArbPorts, + coreDataBytes + ) + ) + ) + val replay_next = Input(Bool()) + val s2_xcpt = Input(new HellaCacheExceptions) + val s2_gpa = Input(UInt(vaddrBitsExtended.W)) + val s2_gpa_is_pte = Input(Bool()) + val uncached_resp = Option.when(separateUncachedResp)( + Flipped( + Decoupled( + new HellaCacheResp( + coreMaxAddrBits, + usingVM, + untagBits, + pgIdxBits, + dcacheReqTagBits, + dcacheArbPorts, + coreDataBytes + ) + ) + ) + ) + val ordered = Input(Bool()) + val perf = Input(new HellaCachePerfEvents()) + + val keep_clock_enabled = Output(Bool()) // should D$ avoid clock-gating itself? + val clock_enabled = Input(Bool()) // is D$ currently being clocked? +} + +class HellaCacheReq( + coreMaxAddrBits: Int, + usingVM: Boolean, + untagBits: Int, + pgIdxBits: Int, + dcacheReqTagBits: Int, + dcacheArbPorts: Int, + coreDataBytes: Int) + extends Bundle { + require(isPow2(coreDataBytes)) + val coreDataBits: Int = coreDataBytes * 8 + val M_SZ = 5 + + val phys = Bool() + val no_alloc = Bool() + val no_xcpt = Bool() + + val addr = UInt(coreMaxAddrBits.W) + val idx = Option.when(usingVM && untagBits > pgIdxBits)(UInt(coreMaxAddrBits.W)) + val tag = UInt((dcacheReqTagBits + log2Ceil(dcacheArbPorts)).W) + // TODO: handle this uop + val cmd = UInt(M_SZ.W) + val size = UInt(log2Ceil(log2Ceil(coreDataBytes) + 1).W) + val signed = Bool() + // TODO: handle this uop + val dprv = UInt(PRV.SZ.W) + val dv = Bool() + + val data = UInt(coreDataBits.W) + val mask = UInt(coreDataBytes.W) +} + + +class HellaCacheWriteData(coreDataBytes: Int) extends Bundle { + require(isPow2(coreDataBytes)) + val coreDataBits: Int = coreDataBytes * 8 + + val data = UInt(coreDataBits.W) + val mask = UInt(coreDataBytes.W) +} + + +class HellaCacheResp( + coreMaxAddrBits: Int, + usingVM: Boolean, + untagBits: Int, + pgIdxBits: Int, + dcacheReqTagBits: Int, + dcacheArbPorts: Int, + coreDataBytes: Int) + extends Bundle { + require(isPow2(coreDataBytes)) + val coreDataBits: Int = coreDataBytes * 8 + val M_SZ = 5 + + val replay = Bool() + val has_data = Bool() + val data_word_bypass = UInt(coreDataBits.W) + val data_raw = UInt(coreDataBits.W) + val store_data = UInt(coreDataBits.W) + + val addr = UInt(coreMaxAddrBits.W) + val idx = Option.when(usingVM && untagBits > pgIdxBits)(UInt(coreMaxAddrBits.W)) + val tag = UInt((dcacheReqTagBits + log2Ceil(dcacheArbPorts)).W) + val cmd = UInt(M_SZ.W) + val size = UInt(log2Ceil(log2Ceil(coreDataBytes) + 1).W) + val signed = Bool() + val dprv = UInt(PRV.SZ.W) + val dv = Bool() + + val data = UInt(coreDataBits.W) + val mask = UInt(coreDataBytes.W) +} + +class HellaCacheExceptions extends Bundle { + val ma = new AlignmentExceptions + val pf = new AlignmentExceptions + val gf = new AlignmentExceptions + val ae = new AlignmentExceptions +} + +class AlignmentExceptions extends Bundle { + val ld = Bool() + val st = Bool() +} + +class HellaCachePerfEvents extends Bundle { + val acquire = Bool() + val release = Bool() + val grant = Bool() + val tlbMiss = Bool() + val blocked = Bool() + val canAcceptStoreThenLoad = Bool() + val canAcceptStoreThenRMW = Bool() + val canAcceptLoadThenLoad = Bool() + val storeBufferEmptyAfterLoad = Bool() + val storeBufferEmptyAfterStore = Bool() +} + +class DatapathPTWIO( + xLen: Int, + maxPAddrBits: Int, + pgIdxBits: Int, + vaddrBits: Int, + asidBits: Int, + nPMPs: Int, + paddrBits: Int) + extends Bundle { + val ptbr = Input(new PTBR(xLen, maxPAddrBits, pgIdxBits)) + val hgatp = Input(new PTBR(xLen, maxPAddrBits, pgIdxBits)) + val vsatp = Input(new PTBR(xLen, maxPAddrBits, pgIdxBits)) + val sfence = Flipped(Valid(new SFenceReq(vaddrBits, asidBits))) + val status = Input(new MStatus()) + val hstatus = Input(new HStatus()) + val gstatus = Input(new MStatus()) + val pmp = Input(Vec(nPMPs, new PMP(paddrBits))) + val perf = Output(new PTWPerfEvents()) + // No customCSR for the first time refactor. + // val customCSRs = Flipped(coreParams.customCSRs) + + /** enable clock generated by ptw */ + val clock_enabled = Output(Bool()) +} + +class SFenceReq(vaddrBits: Int, asidBits: Int) extends Bundle { + val rs1 = Bool() + val rs2 = Bool() + val addr = UInt(vaddrBits.W) + val asid = UInt(asidBits.W) + val hv = Bool() + val hg = Bool() +} + +class PTWPerfEvents extends Bundle { + val l2miss = Bool() + val l2hit = Bool() + val pte_miss = Bool() + val pte_hit = Bool() +} + +/** L2TLB PTE template + * + * contains tag bits + * @param nSets number of sets in L2TLB + * @see RV-priv spec 4.3.1 for page table entry format + */ +class L2TLBEntry(nSets: Int, ppnBits: Int, maxSVAddrBits: Int, pgIdxBits: Int, usingHypervisor: Boolean) extends Bundle { + val idxBits = log2Ceil(nSets) + val tagBits = maxSVAddrBits - pgIdxBits - idxBits + (if (usingHypervisor) 1 else 0) + val tag = UInt(tagBits.W) + val ppn = UInt(ppnBits.W) + + /** dirty bit */ + val d = Bool() + + /** access bit */ + val a = Bool() + + /** user mode accessible */ + val u = Bool() + + /** whether the page is executable */ + val x = Bool() + + /** whether the page is writable */ + val w = Bool() + + /** whether the page is readable */ + val r = Bool() +} + +class ICacheReq(vaddrBits: Int) extends Bundle { + val addr = UInt(vaddrBits.W) +} + +class ICacheResp(fetchBytes: Int) extends Bundle { + + /** data to CPU. + * @todo why 4 instructions? + */ + val data = UInt((fetchBytes * 8).W) + + /** ask CPU to replay fetch when tag or data ECC error happened. */ + val replay = Bool() + + /** access exception: + * indicate CPU an tag ECC error happened. + * if [[outer.icacheParams.latency]] is 1, tie 0. + */ + val ae = Bool() + +} + +class ICacheErrors(hasCorrectable: Boolean, hasUncorrectable: Boolean, paddrBits: Int) extends Bundle { + val correctable = Option.when(hasCorrectable)(Valid(UInt(paddrBits.W))) + val uncorrectable = Option.when(hasUncorrectable)(Valid(UInt(paddrBits.W))) + val bus = Valid(UInt(paddrBits.W)) +} + +class ICachePerfEvents extends Bundle { + val acquire = Bool() +} + +class FPInput(fLen: Int) extends Bundle { + val fpuControl = new FPUCtrlSigs + val rm = UInt(FPConstants.RM_SZ.W) + val fmaCmd = UInt(2.W) + val typ = UInt(2.W) + val fmt = UInt(2.W) + val in1 = UInt((fLen+1).W) + val in2 = UInt((fLen+1).W) + val in3 = UInt((fLen+1).W) +} + +// @todo DecodeBundle +class FPUCtrlSigs extends Bundle { + val ldst = Bool() + val wen = Bool() + val ren1 = Bool() + val ren2 = Bool() + val ren3 = Bool() + val swap12 = Bool() + val swap23 = Bool() + val typeTagIn = UInt(2.W) + val typeTagOut = UInt(2.W) + val fromint = Bool() + val toint = Bool() + val fastpipe = Bool() + val fma = Bool() + val div = Bool() + val sqrt = Bool() + val wflags = Bool() +} + +class FPResult(fLen: Int) extends Bundle { + val data = UInt((fLen+1).W) + val exc = UInt(FPConstants.FLAGS_SZ.W) +} + +class FPToIntOutput(fLen: Int, xLen: Int) extends Bundle { + val in = new FPInput(fLen) + val lt = Bool() + val store = UInt(fLen.W) + val toint = UInt(xLen.W) + val exc = UInt(FPConstants.FLAGS_SZ.W) +} + +class IntToFPInput(xLen: Int) extends Bundle { + val fpuControl = new FPUCtrlSigs + val rm = UInt(FPConstants.RM_SZ.W) + val typ = UInt(2.W) + val in1 = UInt(xLen.W) +} + +class FPUCoreIO(hartIdLen: Int, xLen: Int, fLen: Int) extends Bundle { + val hartid = Input(UInt(hartIdLen.W)) + val time = Input(UInt(xLen.W)) + + val inst = Input(UInt(32.W)) + val fromint_data = Input(UInt(xLen.W)) + + val fcsr_rm = Input(UInt(FPConstants.RM_SZ.W)) + val fcsr_flags = Valid(UInt(FPConstants.FLAGS_SZ.W)) + + val store_data = Output(UInt(fLen.W)) + val toint_data = Output(UInt(xLen.W)) + + val dmem_resp_val = Input(Bool()) + val dmem_resp_type = Input(UInt(3.W)) + val dmem_resp_tag = Input(UInt(5.W)) + val dmem_resp_data = Input(UInt(fLen.W)) + + val valid = Input(Bool()) + val fcsr_rdy = Output(Bool()) + val nack_mem = Output(Bool()) + val illegal_rm = Output(Bool()) + val killx = Input(Bool()) + val killm = Input(Bool()) + val dec = Output(new FPUCtrlSigs()) + val sboard_set = Output(Bool()) + val sboard_clr = Output(Bool()) + val sboard_clra = Output(UInt(5.W)) + + val keep_clock_enabled = Input(Bool()) +} + +class TLBReq(lgMaxSize: Int, vaddrBitsExtended: Int)() extends Bundle { + // TODO: remove it. + val M_SZ = 5 + + /** request address from CPU. */ + val vaddr = UInt(vaddrBitsExtended.W) + + /** don't lookup TLB, bypass vaddr as paddr */ + val passthrough = Bool() + + /** granularity */ + val size = UInt(log2Ceil(lgMaxSize + 1).W) + + /** memory command. */ + val cmd = UInt(M_SZ.W) + val prv = UInt(PRV.SZ.W) + + /** virtualization mode */ + val v = Bool() + +} + +class TLBResp(paddrBits: Int, vaddrBitsExtended: Int) extends Bundle { + // lookup responses + val miss = Bool() + + /** physical address */ + val paddr = UInt(paddrBits.W) + val gpa = UInt(vaddrBitsExtended.W) + val gpa_is_pte = Bool() + + /** page fault exception */ + val pf = new TLBExceptions + + /** guest page fault exception */ + val gf = new TLBExceptions + + /** access exception */ + val ae = new TLBExceptions + + /** misaligned access exception */ + val ma = new TLBExceptions + + /** if this address is cacheable */ + val cacheable = Bool() + + /** if caches must allocate this address */ + val must_alloc = Bool() + + /** if this address is prefetchable for caches */ + val prefetchable = Bool() +} + +class TLBExceptions extends Bundle { + val ld = Bool() + val st = Bool() + val inst = Bool() +} + +object TLBEntry { + + /** returns all entry data in this entry */ + def entry_data(tlbEntry: TLBEntry) = tlbEntry.data.map(_.asTypeOf(new TLBEntryData(tlbEntry.ppnBits))) + + /** returns the index of sector */ + private def sectorIdx(tlbEntry: TLBEntry, vpn: UInt) = vpn(log2Ceil(tlbEntry.nSectors) - 1, 0) + + /** returns the entry data matched with this vpn */ + def getData(tlbEntry: TLBEntry, vpn: UInt) = tlbEntry.data(sectorIdx(tlbEntry, vpn)).asTypeOf(new TLBEntryData(tlbEntry.ppnBits)) + + /** returns whether a sector hits */ + def sectorHit(tlbEntry: TLBEntry, vpn: UInt, virtual: Bool) = tlbEntry.valid.asUInt.orR && sectorTagMatch(tlbEntry, vpn, virtual) + + /** returns whether tag matches vpn */ + def sectorTagMatch(tlbEntry: TLBEntry, vpn: UInt, virtual: Bool) = (((tlbEntry.tag_vpn ^ vpn) >> log2Ceil(tlbEntry.nSectors)) === 0.U) && (tlbEntry.tag_v === virtual) + + /** returns hit signal */ + def hit(tlbEntry: TLBEntry, vpn: UInt, virtual: Bool, usingVM: Boolean, pgLevelBits: Int, hypervisorExtraAddrBits: Int, superpage: Boolean, superpageOnly: Boolean): Bool = { + if (superpage && usingVM) { + var tagMatch = tlbEntry.valid.head && (tlbEntry.tag_v === virtual) + for (j <- 0 until tlbEntry.pgLevels) { + val base = (tlbEntry.pgLevels - 1 - j) * pgLevelBits + val n = pgLevelBits + (if (j == 0) hypervisorExtraAddrBits else 0) + val ignore = tlbEntry.level < j.U || (superpageOnly && (j == (tlbEntry.pgLevels - 1))).B + tagMatch = tagMatch && (ignore || (tlbEntry.tag_vpn ^ vpn)(base + n - 1, base) === 0.U) + } + tagMatch + } else { + val idx = sectorIdx(tlbEntry, vpn) + tlbEntry.valid(idx) && sectorTagMatch(tlbEntry, vpn, virtual) + } + } + + /** returns the ppn of the input TLBEntryData */ + def ppn(tlbEntry: TLBEntry, vpn: UInt, data: TLBEntryData, usingVM: Boolean, pgLevelBits: Int, superpage: Boolean, superpageOnly: Boolean) = { + val supervisorVPNBits = tlbEntry.pgLevels * pgLevelBits + if (superpage && usingVM) { + var res = data.ppn >> pgLevelBits * (tlbEntry.pgLevels - 1) + for (j <- 1 until tlbEntry.pgLevels) { + val ignore = tlbEntry.level < j.U || (superpageOnly && j == tlbEntry.pgLevels - 1).B + res = Cat( + res, + (Mux(ignore, vpn, 0.U) | data.ppn)( + supervisorVPNBits - j * pgLevelBits - 1, + supervisorVPNBits - (j + 1) * pgLevelBits + ) + ) + } + res + } else { + data.ppn + } + } + + /** does the refill + * + * find the target entry with vpn tag + * and replace the target entry with the input entry data + */ + def insert(tlbEntry: TLBEntry, vpn: UInt, virtual: Bool, level: UInt, entry: TLBEntryData, superpageOnly: Boolean): Unit = { + tlbEntry.tag_vpn := vpn + tlbEntry.tag_v := virtual + tlbEntry.level := level(log2Ceil(tlbEntry.pgLevels - (if (superpageOnly) 1 else 0)) - 1, 0) + + val idx = sectorIdx(tlbEntry, vpn) + tlbEntry.valid(idx) := true.B + tlbEntry.data(idx) := entry.asUInt + } + + def invalidate(tlbEntry: TLBEntry): Unit = { tlbEntry.valid.foreach(_ := false.B) } + def invalidate(tlbEntry: TLBEntry, virtual: Bool): Unit = { + for ((v, e) <- tlbEntry.valid.zip(entry_data(tlbEntry))) + when(tlbEntry.tag_v === virtual) { v := false.B } + } + def invalidateVPN(tlbEntry: TLBEntry, vpn: UInt, virtual: Bool, usingVM: Boolean, pgLevelBits: Int, hypervisorExtraAddrBits: Int, superpage: Boolean, superpageOnly: Boolean): Unit = { + if (superpage) { + when(hit(tlbEntry, vpn, virtual, usingVM, pgLevelBits, hypervisorExtraAddrBits, superpage, superpageOnly)) { invalidate(tlbEntry) } + } else { + when(sectorTagMatch(tlbEntry, vpn, virtual)) { + for (((v, e), i) <- (tlbEntry.valid.zip(entry_data(tlbEntry))).zipWithIndex) + when(tlbEntry.tag_v === virtual && i.U === sectorIdx(tlbEntry, vpn)) { v := false.B } + } + } + // For fragmented superpage mappings, we assume the worst (largest) + // case, and zap entries whose most-significant VPNs match + when(((tlbEntry.tag_vpn ^ vpn) >> (pgLevelBits * (tlbEntry.pgLevels - 1))) === 0.U) { + for ((v, e) <- tlbEntry.valid.zip(entry_data(tlbEntry))) + when(tlbEntry.tag_v === virtual && e.fragmented_superpage) { v := false.B } + } + } + def invalidateNonGlobal(tlbEntry: TLBEntry, virtual: Bool): Unit = { + for ((v, e) <- tlbEntry.valid.zip(entry_data(tlbEntry))) + when(tlbEntry.tag_v === virtual && !e.g) { v := false.B } + } +} + +class TLBEntry(val nSectors: Int, val pgLevels: Int, vpnBits: Int, val ppnBits: Int) extends Bundle { + + val level = UInt(log2Ceil(pgLevels).W) + + /** use vpn as tag */ + val tag_vpn = UInt(vpnBits.W) + + /** tag in vitualization mode */ + val tag_v = Bool() + + /** entry data */ + val data = Vec(nSectors, UInt(new TLBEntryData(ppnBits).getWidth.W)) + + /** valid bit */ + val valid = Vec(nSectors, Bool()) +} + +class TLBEntryData(ppnBits: Int) extends Bundle { + val ppn = UInt(ppnBits.W) + + /** pte.u user */ + val u = Bool() + + /** pte.g global */ + val g = Bool() + + /** access exception. + * D$ -> PTW -> TLB AE + * Alignment failed. + */ + val ae_ptw = Bool() + val ae_final = Bool() + val ae_stage2 = Bool() + + /** page fault */ + val pf = Bool() + + /** guest page fault */ + val gf = Bool() + + /** supervisor write */ + val sw = Bool() + + /** supervisor execute */ + val sx = Bool() + + /** supervisor read */ + val sr = Bool() + + /** hypervisor write */ + val hw = Bool() + + /** hypervisor excute */ + val hx = Bool() + + /** hypervisor read */ + val hr = Bool() + + /** prot_w */ + val pw = Bool() + + /** prot_x */ + val px = Bool() + + /** prot_r */ + val pr = Bool() + + /** PutPartial */ + val ppp = Bool() + + /** AMO logical */ + val pal = Bool() + + /** AMO arithmetic */ + val paa = Bool() + + /** get/put effects */ + val eff = Bool() + + /** cacheable */ + val c = Bool() + + /** fragmented_superpage support */ + val fragmented_superpage = Bool() +} + +class DCacheErrors(hasCorrectable: Boolean, hasUncorrectable: Boolean, paddrBits: Int) extends Bundle { + val correctable: Option[Valid[UInt]] = Option.when(hasCorrectable)(Valid(UInt(paddrBits.W))) + val uncorrectable: Option[Valid[UInt]] = Option.when(hasUncorrectable)(Valid(UInt(paddrBits.W))) + val bus: Valid[UInt] = Valid(UInt(paddrBits.W)) +} + +class DCacheTLBPort(paddrBits: Int, vaddrBitsExtended: Int) extends Bundle { + val req: DecoupledIO[TLBReq] = Flipped(Decoupled(new TLBReq(paddrBits, vaddrBitsExtended))) + val s1_resp: TLBResp = Output(new TLBResp(paddrBits, vaddrBitsExtended)) + val s2_kill: Bool = Input(Bool()) +} + +object ClientStates { + val width = 2 + + def Nothing = 0.U(width.W) + def Branch = 1.U(width.W) + def Trunk = 2.U(width.W) + def Dirty = 3.U(width.W) + + def hasReadPermission(state: UInt): Bool = state > Nothing + def hasWritePermission(state: UInt): Bool = state > Branch +} + +class ClientMetadata extends Bundle { + /** Actual state information stored in this bundle */ + val state = UInt(ClientStates.width.W) +} + +class L1Metadata(tagBits: Int) extends Bundle { + val coh = new ClientMetadata + val tag = UInt(tagBits.W) +} + +class DCacheMetadataReq(vaddrBitsExtended: Int, idxBits: Int, nWays: Int, dataWidth: Int) extends Bundle { + val write: Bool = Bool() + val addr: UInt = UInt(vaddrBitsExtended.W) + val idx: UInt = UInt(idxBits.W) + val way_en: UInt = UInt(nWays.W) + val data: UInt = UInt(dataWidth.W) +} + +class DCacheDataReq(untagBits: Int, encBits: Int, rowBytes: Int, eccBytes: Int, subWordBytes: Int, wordBytes: Int, nWays: Int) extends Bundle { + val addr: UInt = UInt(untagBits.W) + val write: Bool = Bool() + val wdata: UInt = UInt((encBits * rowBytes / eccBytes).W) + val wordMask: UInt = UInt((rowBytes / subWordBytes).W) + val eccMask: UInt = UInt((wordBytes / eccBytes).W) + val way_en: UInt = UInt(nWays.W) +} + +class FrontendReq(vaddrBitsExtended: Int) extends Bundle { + val pc = UInt(vaddrBitsExtended.W) + val speculative = Bool() +} + +class FrontendPerfEvents extends Bundle { + val acquire = Bool() + val tlbMiss = Bool() +} + +class FrontendIO(vaddrBitsExtended: Int, vaddrBits: Int, asidBits: Int, entries: Int, bhtHistoryLength: Option[Int], bhtCounterLength: Option[Int], coreInstBits: Int, fetchWidth: Int) extends Bundle { + val might_request = Output(Bool()) + val clock_enabled = Input(Bool()) + val req = Valid(new FrontendReq(vaddrBitsExtended)) + val sfence = Valid(new SFenceReq(vaddrBits, asidBits)) + val resp = Flipped(Decoupled(new FrontendResp(vaddrBits, entries, bhtHistoryLength, bhtCounterLength, vaddrBitsExtended, coreInstBits, fetchWidth))) + val gpa = Flipped(Valid(UInt(vaddrBitsExtended.W))) + val btb_update = Valid(new BTBUpdate(vaddrBits, entries, fetchWidth, bhtHistoryLength, bhtCounterLength)) + val bht_update = Valid(new BHTUpdate(bhtHistoryLength, bhtCounterLength, vaddrBits)) + val ras_update = Valid(new RASUpdate(vaddrBits)) + val flush_icache = Output(Bool()) + val npc = Input(UInt(vaddrBitsExtended.W)) + val perf = Input(new FrontendPerfEvents) + val progress = Output(Bool()) +} + +// Non-diplomatic version of Frontend +class FrontendBundle(vaddrBitsExtended: Int, vaddrBits: Int, asidBits: Int, entries: Int, bhtHistoryLength: Option[Int], bhtCounterLength: Option[Int], coreInstBits: Int, nPMPs: Int, vpnBits: Int, paddrBits: Int, pgLevels: Int, xLen: Int, maxPAddrBits: Int, pgIdxBits: Int, hasCorrectable: Boolean, hasUncorrectable: Boolean, fetchWidth: Int) extends Bundle { + val cpu = Flipped(new FrontendIO(vaddrBitsExtended, vaddrBits, asidBits, entries, bhtHistoryLength, bhtCounterLength, coreInstBits, fetchWidth)) + val ptw = new TLBPTWIO(nPMPs, vpnBits, paddrBits, vaddrBits, pgLevels, xLen, maxPAddrBits, pgIdxBits) + val errors = new ICacheErrors(hasCorrectable, hasUncorrectable, paddrBits) +} + +// Interface between T1 <> Rocket integration +class RocketCoreToT1(xLen: Int, vlWidth: Int) extends Bundle { + val issue: DecoupledIO[T1Issue] = Decoupled(new T1Issue(xLen, vlWidth)) + val retire: T1Retire = Flipped(new T1Retire(xLen)) +} + +class T1Issue(xLen: Int, vlWidth: Int) extends Bundle { + val instruction: UInt = UInt(32.W) + val rs1Data: UInt = UInt(xLen.W) + val rs2Data: UInt = UInt(xLen.W) + val vtype: UInt = UInt(32.W) + val vl: UInt = UInt(32.W) + val vstart: UInt = UInt(32.W) + val vcsr: UInt = UInt(32.W) +} + +object T1Issue { + def vlmul(issue: T1Issue): UInt = issue.vtype(2, 0) + def vsew(issue: T1Issue): UInt = issue.vtype(5, 3) + def vta(issue: T1Issue): Bool = issue.vtype(6) + def vma(issue: T1Issue): Bool = issue.vtype(7) + def vxrm(issue: T1Issue): UInt = issue.vcsr(2, 1) +} + +class T1RdRetire(xLen: Int) extends Bundle { + val rdAddress: UInt = UInt(5.W) + val rdData: UInt = UInt(xLen.W) + val isFp: Bool = Bool() +} + +class T1CSRRetire extends Bundle { + val vxsat: UInt = UInt(32.W) + val fflag: UInt = UInt(32.W) +} + +class T1Retire(xLen: Int) extends Bundle { + val rd: Valid[T1RdRetire] = Valid(new T1RdRetire(xLen)) + val csr: Valid[T1CSRRetire] = Valid(new T1CSRRetire) + val mem: Valid[Bundle] = Valid(new Bundle {}) +} \ No newline at end of file diff --git a/rocketv/src/CSR.scala b/rocketv/src/CSR.scala new file mode 100644 index 000000000..aa72c4fda --- /dev/null +++ b/rocketv/src/CSR.scala @@ -0,0 +1,1842 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util._ +// @todo: remove me +import org.chipsalliance.rocketv.rvdecoderdbcompat._ + +import scala.collection.mutable + +class EventSet(val gate: (UInt, UInt) => Bool, val events: Seq[(String, () => Bool)]) { + def size = events.size + val hits = WireDefault(VecInit(Seq.fill(size)(false.B))) + def check(mask: UInt) = { + hits := events.map(_._2()) + gate(mask, hits.asUInt) + } + def dump(): Unit = { + for (((name, _), i) <- events.zipWithIndex) + when(check(1.U << i)) { printf(s"Event $name\n") } + } +} + +class EventSets(val eventSets: Seq[EventSet]) { + def maskEventSelector(eventSel: UInt): UInt = { + // allow full associativity between counters and event sets (for now?) + val setMask = (BigInt(1) << eventSetIdBits) - 1 + val maskMask = ((BigInt(1) << eventSets.map(_.size).max) - 1) << maxEventSetIdBits + eventSel & (setMask | maskMask).U + } + + private def decode(counter: UInt): (UInt, UInt) = { + require(eventSets.size <= (1 << maxEventSetIdBits)) + require(eventSetIdBits > 0) + (counter(eventSetIdBits - 1, 0), counter >> maxEventSetIdBits) + } + + def evaluate(eventSel: UInt): Bool = { + val (set, mask) = decode(eventSel) + val sets = for (e <- eventSets) yield { + require(e.hits.getWidth <= mask.getWidth, s"too many events ${e.hits.getWidth} wider than mask ${mask.getWidth}") + e.check(mask) + } + VecInit(sets).asUInt(set) + } + +// def cover() = eventSets.foreach { _.withCovers } + + private def eventSetIdBits = log2Up(eventSets.size) + private def maxEventSetIdBits = 8 + + require(eventSetIdBits <= maxEventSetIdBits) +} + +case class CustomCSR(id: Int, mask: BigInt, init: Option[BigInt]) + +object CustomCSR { + def constant(id: Int, value: BigInt): CustomCSR = CustomCSR(id, BigInt(0), Some(value)) +} + +object CSRParameter { + implicit def rwP: upickle.default.ReadWriter[CSRParameter] = upickle.default.macroRW[CSRParameter] +} + +case class CSRParameter( + useAsyncReset: Boolean, + vLen: Int, + xLen: Int, + fLen: Int, + hartIdLen: Int, + mcontextWidth: Int, + scontextWidth: Int, + asidBits: Int, + vmidBits: Int, + nPMPs: Int, + nPerfCounters: Int, + paddrBits: Int, + nBreakpoints: Int, + usingSupervisor: Boolean, + usingFPU: Boolean, + usingUser: Boolean, + usingVM: Boolean, + usingCompressed: Boolean, + usingAtomics: Boolean, + usingDebug: Boolean, + usingMulDiv: Boolean, + usingVector: Boolean) + extends SerializableModuleParameter { + + def pgLevels: Int = xLen match { + case 32 => 2 + case 64 => 3 + } + + // compatibility mode + // TODO: Below is parameter that not configurable + def decodeWidth = 1 + def nLocalInterrupts: Int = 0 + def minPgLevels: Int = { + val res = xLen match { + case 32 => 2 + case 64 => 3 + } + require(pgLevels >= res) + res + } + def maxPAddrBits: Int = xLen match { + case 32 => 34 + case 64 => 56 + } + def customCSRSize: Int = 0 + def haveBasicCounters: Boolean = true + def resetVectorLen: Int = { + val externalLen = paddrBits + require(externalLen <= xLen, s"External reset vector length ($externalLen) must be <= XLEN ($xLen)") + require( + externalLen <= vaddrBitsExtended, + s"External reset vector length ($externalLen) must be <= virtual address bit width ($vaddrBitsExtended)" + ) + externalLen + } + def iLen: Int = 32 + private def vpnBitsExtended: Int = vpnBits + (if (vaddrBits < xLen) 1 + (if (usingHypervisor) 1 else 0) else 0) + def vaddrBitsExtended: Int = vpnBitsExtended + pgIdxBits + def hasBeu = false + def usingHypervisor = false + def usingNMI = false + def haveCFlush = false + def retireWidth: Int = 1 + private def pgLevelBits: Int = 10 - log2Ceil(xLen / 32) + def maxSVAddrBits: Int = pgIdxBits + pgLevels * pgLevelBits + def haveFSDirty: Boolean = false + def useBPWatch: Boolean = false + def maxHypervisorExtraAddrBits: Int = 2 + def hypervisorExtraAddrBits: Int = { + if (usingHypervisor) maxHypervisorExtraAddrBits + else 0 + } + def maxHVAddrBits: Int = maxSVAddrBits + hypervisorExtraAddrBits + def vaddrBits: Int = if (usingVM) { + val v = maxHVAddrBits + require(v == xLen || xLen > v && v > paddrBits) + v + } else { + // since virtual addresses sign-extend but physical addresses + // zero-extend, make room for a zero sign bit for physical addresses + (paddrBits + 1).min(xLen) + } + def vpnBits: Int = vaddrBits - pgIdxBits + def ppnBits: Int = paddrBits - pgIdxBits + def pmpGranularity: Int = if (usingHypervisor) 4096 else 4 + def mtvecInit: Option[BigInt] = Some(0) + def misaWritable: Boolean = false + def mtvecWritable: Boolean = true + def customIsaExt: Option[String] = None + def useRVE: Boolean = false + def debugEntry: Option[BigInt] = Some(0 + 0x800) + def debugException: Option[BigInt] = Some(0 + 0x808) + // TODO: use layer for DV + def enableCommitLog: Boolean = false + + // original CSR object + // commands + val SZ = 3 + def X = BitPat.dontCare(SZ) + def N = 0.U(SZ.W) + def R = 2.U(SZ.W) + def I = 4.U(SZ.W) + def W = 5.U(SZ.W) + def S = 6.U(SZ.W) + def C = 7.U(SZ.W) + + // mask a CSR cmd with a valid bit + def maskCmd(valid: Bool, cmd: UInt): UInt = { + // all commands less than CSR.I are treated by CSRFile as NOPs + cmd & ~Mux(valid, 0.U, I) + } + + val ADDRSZ = 12 + def pgIdxBits: Int = 12 + + def modeLSB: Int = 8 + def mode(addr: Int): Int = (addr >> modeLSB) % (1 << PRV.SZ) + def mode(addr: UInt): UInt = addr(modeLSB + PRV.SZ - 1, modeLSB) + + def busErrorIntCause = 128 + def debugIntCause = 14 // keep in sync with MIP.debug + def debugTriggerCause = { + val res = debugIntCause + require(!(Causes.all contains res)) + res + } + def rnmiIntCause = 13 // NMI: Higher numbers = higher priority, must not reuse debugIntCause + def rnmiBEUCause = 12 + + val firstCtr = CSRs.cycle + val firstCtrH = CSRs.cycleh + val firstHPC = CSRs.hpmcounter3 + val firstHPCH = CSRs.hpmcounter3h + val firstHPE = CSRs.mhpmevent3 + val firstMHPC = CSRs.mhpmcounter3 + val firstMHPCH = CSRs.mhpmcounter3h + val firstHPM = 3 + val nCtr = 32 + val nHPM = nCtr - firstHPM + val hpmWidth = 40 + val maxPMPs = 16 +} + +class CSRInterface(parameter: CSRParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + val ungatedClock = Input(Clock()) + val interrupts = Input( + new CoreInterrupts( + parameter.usingSupervisor, + parameter.nLocalInterrupts, + parameter.hasBeu, + parameter.usingNMI, + parameter.resetVectorLen + ) + ) + val hartid = Input(UInt(parameter.hartIdLen.W)) + val rw = new Bundle { + val addr = Input(UInt(parameter.ADDRSZ.W)) + val cmd = Input(Bits(parameter.SZ.W)) + val rdata = Output(Bits(parameter.xLen.W)) + val wdata = Input(Bits(parameter.xLen.W)) + } + val decode = Vec(parameter.decodeWidth, new CSRDecodeIO(parameter.iLen)) + val csrStall = Output(Bool()) // stall retire for wfi + val rwStall = Output(Bool()) // stall rw, rw will have no effect while rw_stall + val eret = Output(Bool()) + val singleStep = Output(Bool()) + val status = Output(new MStatus) + val hstatus = Output(new HStatus) + val gstatus = Output(new MStatus) + val ptbr = Output(new PTBR(parameter.xLen, parameter.maxPAddrBits, parameter.pgIdxBits)) + val hgatp = Output(new PTBR(parameter.xLen, parameter.maxPAddrBits, parameter.pgIdxBits)) + val vsatp = Output(new PTBR(parameter.xLen, parameter.maxPAddrBits, parameter.pgIdxBits)) + val evec = Output(UInt(parameter.vaddrBitsExtended.W)) + val exception = Input(Bool()) + val retire = Input(UInt(log2Up(1 + parameter.retireWidth).W)) + val cause = Input(UInt(parameter.xLen.W)) + val pc = Input(UInt(parameter.vaddrBitsExtended.W)) + val tval = Input(UInt(parameter.vaddrBitsExtended.W)) + val htval = Input(UInt(((parameter.maxSVAddrBits + 1).min(parameter.xLen)).W)) + val gva = Input(Bool()) + val time = Output(UInt(parameter.xLen.W)) + val fcsrRm = Output(Bits(FPConstants.RM_SZ.W)) + val fcsrFlags = Flipped(Valid(Bits(FPConstants.FLAGS_SZ.W))) + val setFsDirty = Option.when(parameter.haveFSDirty)(Input(Bool())) + val interrupt = Output(Bool()) + val interruptCause = Output(UInt(parameter.xLen.W)) + val bp = Output( + Vec( + parameter.nBreakpoints, + new BP( + parameter.xLen, + parameter.useBPWatch, + parameter.vaddrBits, + parameter.mcontextWidth, + parameter.scontextWidth + ) + ) + ) + val pmp = Output(Vec(parameter.nPMPs, new PMP(parameter.paddrBits))) + val counters = Vec(parameter.nPerfCounters, new PerfCounterIO(parameter.xLen, parameter.retireWidth)) + val csrwCounter = Output(UInt(parameter.nCtr.W)) + val inhibitCycle = Output(Bool()) + val inst = Input(Vec(parameter.retireWidth, UInt(parameter.iLen.W))) + val mcontext = Output(UInt(parameter.mcontextWidth.W)) + val scontext = Output(UInt(parameter.scontextWidth.W)) + val fiom = Output(Bool()) + val vectorCsr = Option.when(parameter.usingVector)(Input(Bool())) + val wbRegRS2 = Option.when(parameter.usingVector)(Input(UInt(parameter.xLen.W))) + val csrToVector = Option.when(parameter.usingVector)(Output(new VCSR)) + // @todo custom CSR + val customCSRs = Vec(parameter.customCSRSize, new CustomCSRIO(parameter.xLen)) +} + +@instantiable +class CSR(val parameter: CSRParameter) + extends FixedIORawModule(new CSRInterface(parameter)) + with SerializableModule[CSRParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + val perfEventSets: EventSets = new EventSets(Nil) + val customCSRs: Seq[CustomCSR] = Nil + // compatibility mode + // TODO: remove me. + def HLV_B = BitPat("b011000000000?????100?????1110011") + def HLV_BU = BitPat("b011000000001?????100?????1110011") + def HLV_D = BitPat("b011011000000?????100?????1110011") + def HLV_H = BitPat("b011001000000?????100?????1110011") + def HLV_HU = BitPat("b011001000001?????100?????1110011") + def HLV_W = BitPat("b011010000000?????100?????1110011") + def HLV_WU = BitPat("b011010000001?????100?????1110011") + def HLVX_HU = BitPat("b011001000011?????100?????1110011") + def HLVX_WU = BitPat("b011010000011?????100?????1110011") + def HSV_B = BitPat("b0110001??????????100000001110011") + def HSV_D = BitPat("b0110111??????????100000001110011") + def HSV_H = BitPat("b0110011??????????100000001110011") + def HSV_W = BitPat("b0110101??????????100000001110011") + def EBREAK = BitPat("b00000000000100000000000001110011") + def ECALL = BitPat("b00000000000000000000000001110011") + def MRET = BitPat("b00110000001000000000000001110011") + def WFI = BitPat("b00010000010100000000000001110011") + def DRET = BitPat("b01111011001000000000000001110011") + def SRET = BitPat("b00010000001000000000000001110011") + def SFENCE_VMA = BitPat("b0001001??????????000000001110011") + def HFENCE_VVMA = BitPat("b0010001??????????000000001110011") + def HFENCE_GVMA = BitPat("b0110001??????????000000001110011") + + // custom + def CEASE = BitPat("b00110000010100000000000001110011") + def MNRET = BitPat("b01110000001000000000000001110011") + def CFLUSH_D_L1 = BitPat("b111111000000?????000000001110011") + def Y = BitPat.Y() + def N = BitPat.N() + def X = BitPat.dontCare(1) + + val hasBeu: Boolean = parameter.hasBeu + val usingVector = parameter.usingVector + val customIsaExt = parameter.customIsaExt + val usingCompressed = parameter.usingCompressed + val vLen = parameter.vLen + val xLen = parameter.xLen + val fLen = parameter.fLen + val ppnBits = parameter.ppnBits + val asIdBits = parameter.asidBits + val vmIdBits = parameter.vmidBits + val nPMPs = parameter.nPMPs + val vpnBits = parameter.vpnBits + val useBPWatch = parameter.useBPWatch + val vaddrBits = parameter.vaddrBits + val paddrBits = parameter.paddrBits + val pmpGranularity = parameter.pmpGranularity + val usingHypervisor = parameter.usingHypervisor + val usingSupervisor = parameter.usingSupervisor + val usingVM = parameter.usingVM + val nLocalInterrupts: Int = parameter.nLocalInterrupts + val usingNMI = parameter.usingNMI + val usingFPU = parameter.usingFPU + val usingMulDiv = parameter.usingMulDiv + val usingAtomics = parameter.usingAtomics + val usingUser = parameter.usingUser + val vaddrBitsExtended = parameter.vaddrBitsExtended + val maxSVAddrBits = parameter.maxSVAddrBits + val nBreakpoints = parameter.nBreakpoints + val mtvecInit = parameter.mtvecInit + val nPerfCounters = parameter.nPerfCounters + val maxPAddrBits = parameter.maxPAddrBits + val pgIdxBits = parameter.pgIdxBits + val enableCommitLog = parameter.enableCommitLog + val usingDebug = parameter.usingDebug + val minPgLevels = parameter.minPgLevels + val pgLevels = parameter.pgLevels + val mtvecWritable = parameter.mtvecWritable + def pgLevelsToMode(i: Int) = (xLen, i) match { + case (32, 2) => 1 + case (64, x) if x >= 3 && x <= 6 => x + 5 + } + def write(fiom: Envcfg, wdata: UInt) { + val new_envcfg = wdata.asTypeOf(new Envcfg) + fiom := new_envcfg.fiom // only FIOM is writable currently + } + object CSR { + val busErrorIntCause = parameter.busErrorIntCause + val debugTriggerCause = parameter.debugTriggerCause + val firstHPM = parameter.firstHPM + val hpmWidth = parameter.hpmWidth + val debugIntCause = parameter.debugIntCause + val rnmiIntCause = parameter.rnmiIntCause + val rnmiBEUCause = parameter.rnmiBEUCause + val nHPM = parameter.nHPM + val firstHPE = parameter.firstHPE + val firstMHPC = parameter.firstMHPC + val firstHPC = parameter.firstHPC + val firstMHPCH = parameter.firstMHPCH + val firstHPCH = parameter.firstHPCH + val maxPMPs = parameter.maxPMPs + val N = parameter.N + val S = parameter.S + val C = parameter.C + val W = parameter.W + val I = parameter.I + def mode(addr: Int): Int = (addr >> modeLSB) % (1 << PRV.SZ) + def mode(addr: UInt): UInt = addr(modeLSB + PRV.SZ - 1, modeLSB) + val modeLSB = parameter.modeLSB + val firstCtr = parameter.firstCtr + val nCtr = parameter.nCtr + val firstCtrH = parameter.firstCtrH + } + object coreParams { + val mcontextWidth = parameter.mcontextWidth + val scontextWidth = parameter.scontextWidth + val useRVE = parameter.useRVE + val haveBasicCounters = parameter.haveBasicCounters + val haveCFlush = parameter.haveCFlush + val misaWritable = parameter.misaWritable + val haveFSDirty = parameter.haveFSDirty + } + def inRange(x: UInt, base: UInt, bounds: UInt) = x >= base && x < bounds + + def isOneOf(x: UInt, s: Seq[UInt]): Bool = VecInit(s.map(x === _)).asUInt.orR + + def sextTo(x: UInt, n: Int): UInt = { + require(x.getWidth <= n) + if (x.getWidth == n) x + else Cat(Fill(n - x.getWidth, x(x.getWidth - 1)), x) + } + + def padTo(x: UInt, n: Int): UInt = { + require(x.getWidth <= n) + if (x.getWidth == n) x + else Cat(0.U((n - x.getWidth).W), x) + } + + // a counter that clock gates most of its MSBs using the LSB carry-out + case class WideCounter(width: Int, inc: UInt = 1.U, reset: Boolean = true, inhibit: Bool = false.B) { + private val isWide = width > (2 * inc.getWidth) + private val smallWidth = if (isWide) inc.getWidth.max(log2Up(width)) else width + private val small = if (reset) RegInit(0.U(smallWidth.W)) else Reg(UInt(smallWidth.W)) + private val nextSmall = small +& inc + when(!inhibit) { small := nextSmall } + + private val large = if (isWide) { + val r = if (reset) RegInit(0.U((width - smallWidth).W)) else Reg(UInt((width - smallWidth).W)) + when(nextSmall(smallWidth) && !inhibit) { r := r + 1.U } + r + } else null + + val value = if (isWide) Cat(large, small) else small + lazy val carryOut = { + val lo = (small ^ nextSmall) >> 1 + if (!isWide) + lo + else { + val hi = Mux(nextSmall(smallWidth), large ^ (large +& 1.U), 0.U) >> 1 + Cat(hi, lo) + } + } + + def assign(x: UInt) = { + small := x + if (isWide) large := x >> smallWidth + } + } + + // end + + val vector = Option.when(usingVector)(new csr.V(vLen, usingHypervisor)) + + io.rwStall := false.B + + val reset_mstatus = WireDefault(0.U.asTypeOf(new MStatus())) + reset_mstatus.mpp := PRV.M.U + reset_mstatus.prv := PRV.M.U + reset_mstatus.xs := 0.U + val reg_mstatus = RegInit(reset_mstatus) + + val new_prv = WireDefault(reg_mstatus.prv) + reg_mstatus.prv := legalizePrivilege(new_prv) + + val reset_dcsr = WireDefault(0.U.asTypeOf(new DCSR())) + reset_dcsr.xdebugver := 1.U + reset_dcsr.prv := PRV.M.U + val reg_dcsr = RegInit(reset_dcsr) + + val (supported_interrupts, delegable_interrupts) = { + val sup = Wire(new MIP(nLocalInterrupts)) + sup.usip := false.B + sup.ssip := usingSupervisor.B + sup.vssip := usingHypervisor.B + sup.msip := true.B + sup.utip := false.B + sup.stip := usingSupervisor.B + sup.vstip := usingHypervisor.B + sup.mtip := true.B + sup.ueip := false.B + sup.seip := usingSupervisor.B + sup.vseip := usingHypervisor.B + sup.meip := true.B + sup.sgeip := false.B + sup.debug := false.B + sup.zero1 := false.B + sup.lip.foreach { _ := true.B } + val supported_high_interrupts = + if (io.interrupts.buserror.nonEmpty && !usingNMI) (BigInt(1) << CSR.busErrorIntCause).U else 0.U + + val del = WireDefault(sup) + del.msip := false.B + del.mtip := false.B + del.meip := false.B + + (sup.asUInt | supported_high_interrupts, del.asUInt) + } + val delegable_base_exceptions = Seq( + Causes.misaligned_fetch, + Causes.fetch_page_fault, + Causes.breakpoint, + Causes.load_page_fault, + Causes.store_page_fault, + Causes.misaligned_load, + Causes.misaligned_store, + Causes.illegal_instruction, + Causes.user_ecall + ) + val delegable_hypervisor_exceptions = Seq( + Causes.virtual_supervisor_ecall, + Causes.fetch_guest_page_fault, + Causes.load_guest_page_fault, + Causes.virtual_instruction, + Causes.store_guest_page_fault + ) + val delegable_exceptions = ( + delegable_base_exceptions + ++ (if (usingHypervisor) delegable_hypervisor_exceptions else Seq()) + ).map(1 << _).sum.U + + val hs_delegable_exceptions = Seq( + Causes.misaligned_fetch, + Causes.fetch_access, + Causes.illegal_instruction, + Causes.breakpoint, + Causes.misaligned_load, + Causes.load_access, + Causes.misaligned_store, + Causes.store_access, + Causes.user_ecall, + Causes.fetch_page_fault, + Causes.load_page_fault, + Causes.store_page_fault + ).map(1 << _).sum.U + + val (hs_delegable_interrupts, mideleg_always_hs) = { + val always = WireDefault(0.U.asTypeOf(new MIP(nLocalInterrupts))) + always.vssip := usingHypervisor.B + always.vstip := usingHypervisor.B + always.vseip := usingHypervisor.B + + val deleg = WireDefault(always) + deleg.lip.foreach { _ := usingHypervisor.B } + + (deleg.asUInt, always.asUInt) + } + + val reg_debug = RegInit(false.B) + val reg_dpc = Reg(UInt(vaddrBitsExtended.W)) + val reg_dscratch0 = Reg(UInt(xLen.W)) + // val reg_dscratch1 = (p(DebugModuleKey).map(_.nDscratch).getOrElse(1) > 1).option(Reg(UInt(xLen.W))) + // @todo: optional, if debug base is not zero. + val reg_dscratch1 = Reg(UInt(parameter.xLen.W)) + val reg_singleStepped = Reg(Bool()) + + val reg_mcontext = Option.when(coreParams.mcontextWidth > 0)(RegInit(0.U(coreParams.mcontextWidth.W))) + val reg_scontext = Option.when(coreParams.scontextWidth > 0)(RegInit(0.U(coreParams.scontextWidth.W))) + + val reg_tselect = Reg(UInt(log2Up(nBreakpoints).W)) + val reg_bp = Reg( + Vec( + 1 << log2Up(nBreakpoints), + new BP(xLen, useBPWatch, vaddrBits, coreParams.mcontextWidth, coreParams.scontextWidth) + ) + ) + + val reg_pmp = Reg(Vec(nPMPs, new PMPReg(paddrBits))) + + val reg_mie = Reg(UInt(xLen.W)) + val (reg_mideleg, read_mideleg) = { + val reg = Reg(UInt(xLen.W)) + (reg, Mux(usingSupervisor.B, reg & delegable_interrupts | mideleg_always_hs, 0.U)) + } + val (reg_medeleg, read_medeleg) = { + val reg = Reg(UInt(xLen.W)) + (reg, Mux(usingSupervisor.B, reg & delegable_exceptions, 0.U)) + } + val reg_mip = Reg(new MIP(nLocalInterrupts)) + val reg_mepc = Reg(UInt(vaddrBitsExtended.W)) + val reg_mcause = RegInit(0.U(xLen.W)) + val reg_mtval = Reg(UInt(vaddrBitsExtended.W)) + val reg_mtval2 = Reg(UInt(((maxSVAddrBits + 1).min(xLen)).W)) + val reg_mscratch = Reg(Bits(xLen.W)) + val mtvecWidth = paddrBits.min(xLen) + val reg_mtvec = mtvecInit match { + case Some(addr) => RegInit(addr.U(mtvecWidth.W)) + case None => Reg(UInt(mtvecWidth.W)) + } + + val reset_mnstatus = WireDefault(0.U.asTypeOf(new MNStatus())) + reset_mnstatus.mpp := PRV.M.U + val reg_mnscratch = Reg(Bits(xLen.W)) + val reg_mnepc = Reg(UInt(vaddrBitsExtended.W)) + val reg_mncause = RegInit(0.U(xLen.W)) + val reg_mnstatus = RegInit(reset_mnstatus) + val reg_rnmie = RegInit(true.B) + val nmie = reg_rnmie + + val reg_menvcfg = RegInit(0.U.asTypeOf(new Envcfg)) + val reg_senvcfg = RegInit(0.U.asTypeOf(new Envcfg)) + val reg_henvcfg = RegInit(0.U.asTypeOf(new Envcfg)) + + val delegable_counters = ((BigInt(1) << (nPerfCounters + CSR.firstHPM)) - 1).U + val (reg_mcounteren, read_mcounteren) = { + val reg = Reg(UInt(32.W)) + (reg, Mux(usingUser.B, reg & delegable_counters, 0.U)) + } + val (reg_scounteren, read_scounteren) = { + val reg = Reg(UInt(32.W)) + (reg, Mux(usingSupervisor.B, reg & delegable_counters, 0.U)) + } + + val (reg_hideleg: UInt, read_hideleg: UInt) = { + val reg = Reg(UInt(xLen.W)) + (reg, Mux(usingHypervisor.B, reg & hs_delegable_interrupts, 0.U)) + } + val (reg_hedeleg, read_hedeleg) = { + val reg = Reg(UInt(xLen.W)) + (reg, Mux(usingHypervisor.B, reg & hs_delegable_exceptions, 0.U)) + } + val hs_delegable_counters = delegable_counters + val (reg_hcounteren, read_hcounteren) = { + val reg = Reg(UInt(32.W)) + (reg, Mux(usingHypervisor.B, reg & hs_delegable_counters, 0.U)) + } + val reg_hstatus = RegInit(0.U.asTypeOf(new HStatus)) + val reg_hgatp = Reg(new PTBR(xLen, maxPAddrBits, pgIdxBits)) + val reg_htval = Reg(reg_mtval2.cloneType) + val read_hvip = reg_mip.asUInt & hs_delegable_interrupts + val read_hie = reg_mie & hs_delegable_interrupts + + val (reg_vstvec, read_vstvec) = { + val reg = Reg(UInt(vaddrBitsExtended.W)) + (reg, sextTo(formTVec(reg), xLen)) + } + val reg_vsstatus = Reg(new MStatus) + val reg_vsscratch = Reg(Bits(xLen.W)) + val reg_vsepc = Reg(UInt(vaddrBitsExtended.W)) + val reg_vscause = Reg(Bits(xLen.W)) + val reg_vstval = Reg(UInt(vaddrBitsExtended.W)) + val reg_vsatp = Reg(new PTBR(xLen, maxPAddrBits, pgIdxBits)) + + val reg_sepc = Reg(UInt(vaddrBitsExtended.W)) + val reg_scause = Reg(Bits(xLen.W)) + val reg_stval = Reg(UInt(vaddrBitsExtended.W)) + val reg_sscratch = Reg(Bits(xLen.W)) + val reg_stvec = Reg(UInt((if (usingHypervisor) vaddrBitsExtended else vaddrBits).W)) + val reg_satp = Reg(new PTBR(xLen, maxPAddrBits, pgIdxBits)) + val reg_wfi = withClock(io.ungatedClock) { RegInit(false.B) } + + val reg_fflags = Reg(UInt(5.W)) + val reg_frm = Reg(UInt(3.W)) + + val reg_mcountinhibit = RegInit(0.U((CSR.firstHPM + nPerfCounters).W)) + io.inhibitCycle := reg_mcountinhibit(0) + val reg_instret = WideCounter(64, io.retire, inhibit = reg_mcountinhibit(2)) + val reg_cycle = + if (enableCommitLog) WideCounter(64, io.retire, inhibit = reg_mcountinhibit(0)) + else withClock(io.ungatedClock) { WideCounter(64, !io.csrStall, inhibit = reg_mcountinhibit(0)) } + val reg_hpmevent = io.counters.map(c => RegInit(0.U(xLen.W))) + (io.counters.zip(reg_hpmevent)).foreach { case (c, e) => c.eventSel := e } + val reg_hpmcounter = io.counters.zipWithIndex.map { + case (c, i) => + WideCounter(CSR.hpmWidth, c.inc, reset = false, inhibit = reg_mcountinhibit(CSR.firstHPM + i)) + } + + val mip = WireDefault(reg_mip) + mip.lip := io.interrupts.tileInterrupts.lip + mip.mtip := io.interrupts.tileInterrupts.mtip + mip.msip := io.interrupts.tileInterrupts.msip + mip.meip := io.interrupts.tileInterrupts.meip + // seip is the OR of reg_mip.seip and the actual line from the PLIC + io.interrupts.tileInterrupts.seip.foreach { mip.seip := reg_mip.seip || _ } + // Simimlar sort of thing would apply if the PLIC had a VSEIP line: + //io.interrupts.vseip.foreach { mip.vseip := reg_mip.vseip || _ } + val read_mip = mip.asUInt & supported_interrupts + val read_hip = read_mip & hs_delegable_interrupts + val high_interrupts = (if (usingNMI) 0.U else io.interrupts.buserror.map(_ << CSR.busErrorIntCause).getOrElse(0.U)) + + val pending_interrupts: UInt = high_interrupts | (read_mip & reg_mie) + val d_interrupts: UInt = io.interrupts.tileInterrupts.debug << CSR.debugIntCause + val (nmi_interrupts: UInt, nmiFlag: Bool) = io.interrupts.tileInterrupts.nmi + .map(nmi => + ( + ((nmi.rnmi && reg_rnmie) << CSR.rnmiIntCause) | + io.interrupts.buserror.map(_ << CSR.rnmiBEUCause).getOrElse(0.U), + !io.interrupts.tileInterrupts.debug && nmi.rnmi && reg_rnmie + ) + ) + .getOrElse(0.U, false.B) + val m_interrupts = + Mux(nmie && (reg_mstatus.prv <= PRV.S.U || reg_mstatus.mie), ~(~pending_interrupts | read_mideleg), 0.U) + val s_interrupts = Mux( + nmie && (reg_mstatus.v || reg_mstatus.prv < PRV.S.U || (reg_mstatus.prv === PRV.S.U && reg_mstatus.sie)), + pending_interrupts & read_mideleg & ~read_hideleg, + 0.U + ) + val vs_interrupts = Mux( + nmie && (reg_mstatus.v && (reg_mstatus.prv < PRV.S.U || reg_mstatus.prv === PRV.S.U && reg_vsstatus.sie)), + pending_interrupts & read_hideleg, + 0.U + ) + val (anyInterrupt, whichInterrupt) = chooseInterrupt( + Seq(vs_interrupts, s_interrupts, m_interrupts, nmi_interrupts, d_interrupts) + ) + val interruptMSB = BigInt(1) << (xLen - 1) + val interruptCause: UInt = interruptMSB.U + ((nmiFlag << (xLen - 2)): UInt) + whichInterrupt + io.interrupt := (anyInterrupt && !io.singleStep || reg_singleStepped) && !(reg_debug || io.status.cease) + io.interruptCause := interruptCause + io.bp := reg_bp.take(nBreakpoints) + io.mcontext := reg_mcontext.getOrElse(0.U) + io.scontext := reg_scontext.getOrElse(0.U) + io.fiom := (reg_mstatus.prv < PRV.M.U && reg_menvcfg.fiom) || (reg_mstatus.prv < PRV.S.U && reg_senvcfg.fiom) || (reg_mstatus.v && reg_henvcfg.fiom) + def genPMP(reg: PMPReg): PMP = { + val pmp = Wire(new PMP(parameter.paddrBits)) + pmp.cfg := reg.cfg + pmp.addr := reg.addr + def computeMask(pmp: PMP) = { + val base = Cat(pmp.addr, pmp.cfg.a(0)) | ((parameter.pmpGranularity - 1).U >> PMP.lgAlign) + Cat(base & ~(base + 1.U), ((1 << PMP.lgAlign) - 1).U) + } + pmp.mask := computeMask(pmp) + pmp + } + io.pmp := reg_pmp.map(genPMP) + + val isaMaskString = + (if (usingMulDiv) "M" else "") + + (if (usingAtomics) "A" else "") + + (if (fLen >= 32) "F" else "") + + (if (fLen >= 64) "D" else "") + + (if (usingVector) "V" else "") + + (if (usingCompressed) "C" else "") + val isaString = (if (coreParams.useRVE) "E" else "I") + + isaMaskString + + (if (customIsaExt.isDefined) "X" else "") + + (if (usingSupervisor) "S" else "") + + (if (usingHypervisor) "H" else "") + + (if (usingUser) "U" else "") + val isaMax = (BigInt(log2Ceil(xLen) - 4) << (xLen - 2)) | isaStringToMask(isaString) + val reg_misa = RegInit(isaMax.U) + val read_mstatus = io.status.asUInt + val read_mtvec = padTo(formTVec(reg_mtvec), xLen) + val read_stvec = sextTo(formTVec(reg_stvec), xLen) + + val read_mapping = mutable.LinkedHashMap[Int, Bits]( + CSRs.tselect -> reg_tselect, + CSRs.tdata1 -> reg_bp(reg_tselect).control.asUInt, + CSRs.tdata2 -> sextTo(reg_bp(reg_tselect).address, xLen), + CSRs.tdata3 -> reg_bp(reg_tselect).textra.asUInt, + CSRs.misa -> reg_misa, + CSRs.mstatus -> read_mstatus, + CSRs.mtvec -> read_mtvec, + CSRs.mip -> read_mip, + CSRs.mie -> reg_mie, + CSRs.mscratch -> reg_mscratch, + CSRs.mepc -> sextTo(readEPC(reg_mepc), xLen), + CSRs.mtval -> sextTo(reg_mtval, xLen), + CSRs.mcause -> reg_mcause, + CSRs.mhartid -> io.hartid + ) + + val debug_csrs = + if (!usingDebug) mutable.LinkedHashMap() + else + mutable.LinkedHashMap[Int, Bits]( + CSRs.dcsr -> reg_dcsr.asUInt, + CSRs.dpc -> sextTo(readEPC(reg_dpc), xLen), + CSRs.dscratch0 -> reg_dscratch0.asUInt, + CSRs.dscratch1 -> reg_dscratch1.asUInt + ) + + val read_mnstatus = WireInit(0.U.asTypeOf(new MNStatus())) + read_mnstatus.mpp := reg_mnstatus.mpp + read_mnstatus.mpv := reg_mnstatus.mpv + read_mnstatus.mie := reg_rnmie + val nmi_csrs = + if (!usingNMI) mutable.LinkedHashMap() + else + mutable.LinkedHashMap[Int, Bits]( + CustomCSRs.mnscratch -> reg_mnscratch, + CustomCSRs.mnepc -> sextTo(readEPC(reg_mnepc), xLen), + CustomCSRs.mncause -> reg_mncause, + CustomCSRs.mnstatus -> read_mnstatus.asUInt + ) + + val context_csrs = mutable.LinkedHashMap[Int, Bits]() ++ + reg_mcontext.map(r => CSRs.mcontext -> r) ++ + reg_scontext.map(r => CSRs.scontext -> r) + + val read_fcsr = Cat(reg_frm, reg_fflags) + val fp_csrs = mutable.LinkedHashMap[Int, Bits]() ++ + Option.when(usingFPU)(CSRs.fflags -> reg_fflags) ++ + Option.when(usingFPU)(CSRs.frm -> reg_frm) ++ + Option.when(usingFPU)(CSRs.fcsr -> read_fcsr) + + read_mapping ++= debug_csrs + read_mapping ++= nmi_csrs + read_mapping ++= context_csrs + read_mapping ++= fp_csrs + + // Vector read CSR logic injection + vector.foreach { v => + read_mapping ++= mutable.LinkedHashMap[Int, Bits]( + CSRs.vxsat -> v.states("vxsat"), + CSRs.vxrm -> v.states("vxrm"), + CSRs.vcsr -> v.states("vxrm") ## v.states("vxsat"), + CSRs.vstart -> v.states("vstart"), + CSRs.vtype -> v.states("vlmul") ## v.states("vsew") ## v.states("vta") ## v.states("vma") ## 0.U(23.W) ## v + .states("vill"), + CSRs.vl -> v.states("vl"), + CSRs.vlenb -> v.constants("vlenb") + ) + } + + if (coreParams.haveBasicCounters) { + read_mapping += CSRs.mcountinhibit -> reg_mcountinhibit + read_mapping += CSRs.mcycle -> reg_cycle.value + read_mapping += CSRs.minstret -> reg_instret.value + + for ( + ((e, c), i) <- (reg_hpmevent + .padTo(CSR.nHPM, 0.U) + .zip(reg_hpmcounter.map(x => x.value).padTo(CSR.nHPM, 0.U))) + .zipWithIndex + ) { + read_mapping += (i + CSR.firstHPE) -> e // mhpmeventN + read_mapping += (i + CSR.firstMHPC) -> c // mhpmcounterN + read_mapping += (i + CSR.firstHPC) -> c // hpmcounterN + if (xLen == 32) { + read_mapping += (i + CSR.firstMHPCH) -> (c >> 32) // mhpmcounterNh + read_mapping += (i + CSR.firstHPCH) -> (c >> 32) // hpmcounterNh + } + } + + if (usingUser) { + read_mapping += CSRs.mcounteren -> read_mcounteren + } + read_mapping += CSRs.cycle -> reg_cycle.value + read_mapping += CSRs.instret -> reg_instret.value + + if (xLen == 32) { + read_mapping += CSRs.mcycleh -> (reg_cycle.value >> 32) + read_mapping += CSRs.minstreth -> (reg_instret.value >> 32) + read_mapping += CSRs.cycleh -> (reg_cycle.value >> 32) + read_mapping += CSRs.instreth -> (reg_instret.value >> 32) + } + } + + if (usingUser) { + read_mapping += CSRs.menvcfg -> reg_menvcfg.asUInt + if (xLen == 32) + read_mapping += CSRs.menvcfgh -> (reg_menvcfg.asUInt >> 32) + } + + val sie_mask = { + val sgeip_mask = WireInit(0.U.asTypeOf(new MIP(nLocalInterrupts))) + sgeip_mask.sgeip := true.B + read_mideleg & ~(hs_delegable_interrupts | sgeip_mask.asUInt) + } + if (usingSupervisor) { + val read_sie = reg_mie & sie_mask + val read_sip = read_mip & sie_mask + val read_sstatus = WireDefault(0.U.asTypeOf(new MStatus)) + read_sstatus.sd := io.status.sd + read_sstatus.uxl := io.status.uxl + read_sstatus.sd_rv32 := io.status.sd_rv32 + read_sstatus.mxr := io.status.mxr + read_sstatus.sum := io.status.sum + read_sstatus.xs := io.status.xs + read_sstatus.fs := io.status.fs + read_sstatus.vs := io.status.vs + read_sstatus.spp := io.status.spp + read_sstatus.spie := io.status.spie + read_sstatus.sie := io.status.sie + + read_mapping += CSRs.sstatus -> (read_sstatus.asUInt)(xLen - 1, 0) + read_mapping += CSRs.sip -> read_sip.asUInt + read_mapping += CSRs.sie -> read_sie.asUInt + read_mapping += CSRs.sscratch -> reg_sscratch + read_mapping += CSRs.scause -> reg_scause + read_mapping += CSRs.stval -> sextTo(reg_stval, xLen) + read_mapping += CSRs.satp -> reg_satp.asUInt + read_mapping += CSRs.sepc -> sextTo(readEPC(reg_sepc), xLen) + read_mapping += CSRs.stvec -> read_stvec + read_mapping += CSRs.scounteren -> read_scounteren + read_mapping += CSRs.mideleg -> read_mideleg + read_mapping += CSRs.medeleg -> read_medeleg + read_mapping += CSRs.senvcfg -> reg_senvcfg.asUInt + } + + val pmpCfgPerCSR = xLen / new PMPConfig().getWidth + def pmpCfgIndex(i: Int) = (xLen / 32) * (i / pmpCfgPerCSR) + if (reg_pmp.nonEmpty) { + require(reg_pmp.size <= CSR.maxPMPs) + // TODO: rc bug. + val read_pmp = reg_pmp.padTo(CSR.maxPMPs, 0.U.asTypeOf(new PMPReg(paddrBits))) + for (i <- 0 until read_pmp.size by pmpCfgPerCSR) + read_mapping += (CSRs.pmpcfg0 + pmpCfgIndex(i)) -> Cat( + read_pmp.map(_.cfg).slice(i, i + pmpCfgPerCSR).reverse.map(_.asUInt) + ).asUInt + for ((pmp, i) <- read_pmp.zipWithIndex) { + def pmpReadAddr(x: PMPReg) = if (log2Ceil(pmpGranularity) == PMP.lgAlign) x.addr + else { + val mask = ((BigInt(1) << (log2Ceil(pmpGranularity) - PMP.lgAlign)) - 1).U + Mux(PMP.napot(x), x.addr | (mask >> 1), ~(~x.addr | mask)) + } + read_mapping += (CSRs.pmpaddr0 + i) -> pmpReadAddr(pmp) + } + } + + // implementation-defined CSRs + def generateCustomCSR(csr: CustomCSR, csr_io: CustomCSRIO) = { + require(csr.mask >= 0 && csr.mask.bitLength <= xLen) + require(!read_mapping.contains(csr.id)) + val reg = csr.init.map(init => RegInit(init.U(xLen.W))).getOrElse(Reg(UInt(xLen.W))) + val read = io.rw.cmd =/= CSR.N && io.rw.addr === csr.id.U + csr_io.ren := read + when(read && csr_io.stall) { io.rwStall := true.B } + read_mapping += csr.id -> reg + reg + } + val reg_custom = customCSRs.zip(io.customCSRs).map(t => generateCustomCSR(t._1, t._2)) + + if (usingHypervisor) { + read_mapping += CSRs.mtinst -> 0.U + read_mapping += CSRs.mtval2 -> reg_mtval2 + + val read_hstatus = io.hstatus.asUInt(xLen - 1, 0) + + read_mapping += CSRs.hstatus -> read_hstatus + read_mapping += CSRs.hedeleg -> read_hedeleg + read_mapping += CSRs.hideleg -> read_hideleg + read_mapping += CSRs.hcounteren -> read_hcounteren + read_mapping += CSRs.hgatp -> reg_hgatp.asUInt + read_mapping += CSRs.hip -> read_hip + read_mapping += CSRs.hie -> read_hie + read_mapping += CSRs.hvip -> read_hvip + read_mapping += CSRs.hgeie -> 0.U + read_mapping += CSRs.hgeip -> 0.U + read_mapping += CSRs.htval -> reg_htval + read_mapping += CSRs.htinst -> 0.U + read_mapping += CSRs.henvcfg -> reg_henvcfg.asUInt + if (xLen == 32) + read_mapping += CSRs.henvcfgh -> (reg_henvcfg.asUInt >> 32) + + val read_vsie = (read_hie & read_hideleg) >> 1 + val read_vsip = (read_hip & read_hideleg) >> 1 + val read_vsepc = sextTo(readEPC(reg_vsepc), xLen) + val read_vstval = sextTo(reg_vstval, xLen) + val read_vsstatus = io.gstatus.asUInt(xLen - 1, 0) + + read_mapping += CSRs.vsstatus -> read_vsstatus + read_mapping += CSRs.vsip -> read_vsip + read_mapping += CSRs.vsie -> read_vsie + read_mapping += CSRs.vsscratch -> reg_vsscratch + read_mapping += CSRs.vscause -> reg_vscause + read_mapping += CSRs.vstval -> read_vstval + read_mapping += CSRs.vsatp -> reg_vsatp.asUInt + read_mapping += CSRs.vsepc -> read_vsepc + read_mapping += CSRs.vstvec -> read_vstvec + } + + // mimpid, marchid, mvendorid, and mconfigptr are 0 unless overridden by customCSRs + Seq(CSRs.mimpid, CSRs.marchid, CSRs.mvendorid, CSRs.mconfigptr).foreach(id => read_mapping.getOrElseUpdate(id, 0.U)) + + val decoded_addr = { + val addr = Cat(io.status.v, io.rw.addr) + val pats = + for (((k, _), i) <- read_mapping.zipWithIndex) + yield (BitPat(k.U), (0 until read_mapping.size).map(j => BitPat((i == j).B))) + val decoded = DecodeLogic(addr, Seq.fill(read_mapping.size)(X), pats) + val unvirtualized_mapping = (for (((k, _), v) <- read_mapping.zip(decoded)) yield k -> v.asBool).toMap + + for ((k, v) <- unvirtualized_mapping) yield k -> { + val alt = CSR.mode(k) match { + case PRV.S => unvirtualized_mapping.lift(k + (1 << CSR.modeLSB)) + case PRV.H => unvirtualized_mapping.lift(k - (1 << CSR.modeLSB)) + case _ => None + } + alt.map(Mux(reg_mstatus.v, _, v)).getOrElse(v) + } + } + + val wdata = readModifyWriteCSR(io.rw.cmd, io.rw.rdata, io.rw.wdata) + + val system_insn = io.rw.cmd === CSR.I + val hlsv = Seq(HLV_B, HLV_BU, HLV_H, HLV_HU, HLV_W, HLV_WU, HLV_D, HSV_B, HSV_H, HSV_W, HSV_D, HLVX_HU, HLVX_WU) + val decode_table = Seq( + ECALL -> List(Y, N, N, N, N, N, N, N, N), + EBREAK -> List(N, Y, N, N, N, N, N, N, N), + MRET -> List(N, N, Y, N, N, N, N, N, N), + CEASE -> List(N, N, N, Y, N, N, N, N, N), + WFI -> List(N, N, N, N, Y, N, N, N, N) + ) ++ + Option.when(usingDebug)(DRET -> List(N, N, Y, N, N, N, N, N, N)) ++ + Option.when(usingNMI)(MNRET -> List(N, N, Y, N, N, N, N, N, N)) ++ + Option.when(coreParams.haveCFlush)(CFLUSH_D_L1 -> List(N, N, N, N, N, N, N, N, N)) ++ + Option.when(usingSupervisor)(SRET -> List(N, N, Y, N, N, N, N, N, N)) ++ + Option.when(usingVM)(SFENCE_VMA -> List(N, N, N, N, N, Y, N, N, N)) ++ + Option.when(usingHypervisor)(HFENCE_VVMA -> List(N, N, N, N, N, N, Y, N, N)) ++ + Option.when(usingHypervisor)(HFENCE_GVMA -> List(N, N, N, N, N, N, N, Y, N)) ++ + (if (usingHypervisor) hlsv.map(_ -> List(N, N, N, N, N, N, N, N, Y)) else Seq()) + val insn_call :: insn_break :: insn_ret :: insn_cease :: insn_wfi :: _ :: _ :: _ :: _ :: Nil = { + val insn = ECALL.value.U | (io.rw.addr << 20) + DecodeLogic(insn, decode_table(0)._2.map(x => X), decode_table).map(system_insn && _.asBool) + } + + for (io_dec <- io.decode) { + val addr = io_dec.inst(31, 20) + + def decodeAny(m: mutable.LinkedHashMap[Int, Bits]): Bool = + m.map { case (k: Int, _: Bits) => addr === k.U }.reduce(_ || _) + def decodeFast(s: Seq[Int]): Bool = DecodeLogic(addr, s.map(_.U), (read_mapping -- s).keys.toList.map(_.U)) + + val _ :: is_break :: is_ret :: _ :: is_wfi :: is_sfence :: is_hfence_vvma :: is_hfence_gvma :: is_hlsv :: Nil = + DecodeLogic(io_dec.inst, decode_table(0)._2.map(x => X), decode_table).map(_.asBool) + val is_counter = inRange(addr, CSR.firstCtr.U, (CSR.firstCtr + CSR.nCtr).U) || inRange( + addr, + CSR.firstCtrH.U, + (CSR.firstCtrH + CSR.nCtr).U + ) + + val allow_wfi = + (!usingSupervisor).B || reg_mstatus.prv > PRV.S.U || !reg_mstatus.tw && (!reg_mstatus.v || !reg_hstatus.vtw) + val allow_sfence_vma = + (!usingVM).B || reg_mstatus.prv > PRV.S.U || !Mux(reg_mstatus.v, reg_hstatus.vtvm, reg_mstatus.tvm) + val allow_hfence_vvma = (!usingHypervisor).B || !reg_mstatus.v && (reg_mstatus.prv >= PRV.S.U) + val allow_hlsv = (!usingHypervisor).B || !reg_mstatus.v && (reg_mstatus.prv >= PRV.S.U || reg_hstatus.hu) + val allow_sret = + (!usingSupervisor).B || reg_mstatus.prv > PRV.S.U || !Mux(reg_mstatus.v, reg_hstatus.vtsr, reg_mstatus.tsr) + val counter_addr = addr(log2Ceil(read_mcounteren.getWidth) - 1, 0) + val allow_counter = (reg_mstatus.prv > PRV.S.U || read_mcounteren(counter_addr)) && + (!usingSupervisor.B || reg_mstatus.prv >= PRV.S.U || read_scounteren(counter_addr)) && + (!usingHypervisor.B || !reg_mstatus.v || read_hcounteren(counter_addr)) + io_dec.fpIllegal := io.status.fs === 0.U || reg_mstatus.v && reg_vsstatus.fs === 0.U || !reg_misa('f' - 'a') + io_dec.fpCsr := decodeFast(fp_csrs.keys.toList) + val csr_addr_legal = reg_mstatus.prv >= CSR.mode(addr) || + usingHypervisor.B && !reg_mstatus.v && reg_mstatus.prv === PRV.S.U && CSR.mode(addr) === PRV.H.U + val csr_exists = decodeAny(read_mapping) + io_dec.readIllegal := !csr_addr_legal || + !csr_exists || + ((addr === CSRs.satp.U || addr === CSRs.hgatp.U) && !allow_sfence_vma) || + is_counter && !allow_counter || + decodeFast(debug_csrs.keys.toList) && !reg_debug || + io_dec.fpCsr && io_dec.fpIllegal || + // vector read CSR illegal: if address is in the vector CSR, + vector + .map(vector => + decodeFast(Seq(CSRs.vxsat, CSRs.vxrm, CSRs.vcsr, CSRs.vstart, CSRs.vtype, CSRs.vl, CSRs.vlenb)) && + vector.states("mstatus.VS") === 0.U && + !reg_misa('v' - 'a') + ) + .getOrElse(false.B) + io_dec.writeIllegal := addr(11, 10).andR + io_dec.writeFlush := { + val addr_m = addr | (PRV.M.U << CSR.modeLSB) + !(addr_m >= CSRs.mscratch.U && addr_m <= CSRs.mtval.U) + } + io_dec.systemIllegal := !csr_addr_legal && !is_hlsv || + is_wfi && !allow_wfi || + is_ret && !allow_sret || + is_ret && addr(10) && addr(7) && !reg_debug || + (is_sfence || is_hfence_gvma) && !allow_sfence_vma || + is_hfence_vvma && !allow_hfence_vvma || + is_hlsv && !allow_hlsv + + io_dec.virtualAccessIllegal := reg_mstatus.v && csr_exists && (CSR.mode(addr) === PRV.H.U || + is_counter && read_mcounteren(counter_addr) && (!read_hcounteren(counter_addr) || !reg_mstatus.prv( + 0 + ) && !read_scounteren(counter_addr)) || + CSR.mode(addr) === PRV.S.U && !reg_mstatus.prv(0) || + addr === CSRs.satp.U && reg_mstatus.prv(0) && reg_hstatus.vtvm) + + io_dec.virtualSystemIllegal := reg_mstatus.v && (is_hfence_vvma || + is_hfence_gvma || + is_hlsv || + is_wfi && (!reg_mstatus.prv(0) || !reg_mstatus.tw && reg_hstatus.vtw) || + is_ret && CSR.mode(addr) === PRV.S.U && (!reg_mstatus.prv(0) || reg_hstatus.vtsr) || + is_sfence && (!reg_mstatus.prv(0) || reg_hstatus.vtvm)) + } + + val cause: UInt = + Mux( + insn_call, + Causes.user_ecall.U + Mux(reg_mstatus.prv(0) && reg_mstatus.v, PRV.H.U, reg_mstatus.prv), + Mux[UInt](insn_break, Causes.breakpoint.U, io.cause) + ) + val cause_lsbs = cause(log2Ceil(1 + CSR.busErrorIntCause) - 1, 0) + val causeIsDebugInt = cause(xLen - 1) && cause_lsbs === CSR.debugIntCause.U + val causeIsDebugTrigger = !cause(xLen - 1) && cause_lsbs === CSR.debugTriggerCause.U + val causeIsDebugBreak = + !cause(xLen - 1) && insn_break && Cat(reg_dcsr.ebreakm, reg_dcsr.ebreakh, reg_dcsr.ebreaks, reg_dcsr.ebreaku)( + reg_mstatus.prv + ) + val trapToDebug = + usingDebug.B && (reg_singleStepped || causeIsDebugInt || causeIsDebugTrigger || causeIsDebugBreak || reg_debug) + val debugEntry = parameter.debugEntry.getOrElse(BigInt(0x800)) + val debugException = parameter.debugException.getOrElse(BigInt(0x808)) + val debugTVec = Mux(reg_debug, Mux(insn_break, debugEntry.U, debugException.U), debugEntry.U) + val delegate = usingSupervisor.B && reg_mstatus.prv <= PRV.S.U && Mux( + cause(xLen - 1), + read_mideleg(cause_lsbs), + read_medeleg(cause_lsbs) + ) + val delegateVS = reg_mstatus.v && delegate && Mux(cause(xLen - 1), read_hideleg(cause_lsbs), read_hedeleg(cause_lsbs)) + def mtvecBaseAlign = 2 + def mtvecInterruptAlign = { + require(reg_mip.getWidth <= xLen) + log2Ceil(xLen) + } + val notDebugTVec = { + val base = Mux(delegate, Mux(delegateVS, read_vstvec, read_stvec), read_mtvec) + val interruptOffset = cause(mtvecInterruptAlign - 1, 0) << mtvecBaseAlign + val interruptVec = Cat(base >> (mtvecInterruptAlign + mtvecBaseAlign), interruptOffset) + val doVector = base(0) && cause(cause.getWidth - 1) && (cause_lsbs >> mtvecInterruptAlign) === 0.U + Mux(doVector, interruptVec, base >> mtvecBaseAlign << mtvecBaseAlign) + } + + val causeIsRnmiInt = + cause(xLen - 1) && cause(xLen - 2) && (cause_lsbs === CSR.rnmiIntCause.U || cause_lsbs === CSR.rnmiBEUCause.U) + val causeIsRnmiBEU = cause(xLen - 1) && cause(xLen - 2) && cause_lsbs === CSR.rnmiBEUCause.U + val causeIsNmi = causeIsRnmiInt + val nmiTVecInt = io.interrupts.tileInterrupts.nmi.map(nmi => nmi.rnmi_interrupt_vector).getOrElse(0.U) + val nmiTVecXcpt = io.interrupts.tileInterrupts.nmi.map(nmi => nmi.rnmi_exception_vector).getOrElse(0.U) + val trapToNmiInt = usingNMI.B && causeIsNmi + val trapToNmiXcpt = usingNMI.B && !nmie + val trapToNmi = trapToNmiInt || trapToNmiXcpt + val nmiTVec = (Mux(causeIsNmi, nmiTVecInt, nmiTVecXcpt) >> 1) << 1 + + val tvec = Mux(trapToDebug, debugTVec, Mux(trapToNmi, nmiTVec, notDebugTVec)) + io.evec := tvec + io.ptbr := reg_satp + io.hgatp := reg_hgatp + io.vsatp := reg_vsatp + io.eret := insn_call || insn_break || insn_ret + io.singleStep := reg_dcsr.step && !reg_debug + io.status := reg_mstatus + io.status.sd := io.status.fs.andR || io.status.xs.andR || io.status.vs.andR + io.status.debug := reg_debug + io.status.isa := reg_misa + io.status.uxl := (if (usingUser) log2Ceil(xLen) - 4 else 0).U + io.status.sxl := (if (usingSupervisor) log2Ceil(xLen) - 4 else 0).U + io.status.dprv := Mux(reg_mstatus.mprv && !reg_debug, reg_mstatus.mpp, reg_mstatus.prv) + io.status.dv := reg_mstatus.v || Mux(reg_mstatus.mprv && !reg_debug, reg_mstatus.mpv, false.B) + io.status.sd_rv32 := (xLen == 32).B && io.status.sd + io.status.mpv := reg_mstatus.mpv + io.status.gva := reg_mstatus.gva + io.status.vs := vector.map(vector => vector.states("mstatus.VS") << 9).getOrElse(0.U(2.W)) + io.hstatus := reg_hstatus + io.hstatus.vsxl := (if (usingSupervisor) log2Ceil(xLen) - 4 else 0).U + io.gstatus := reg_vsstatus + io.gstatus.sd := io.gstatus.fs.andR || io.gstatus.xs.andR || io.gstatus.vs.andR + io.gstatus.uxl := (if (usingUser) log2Ceil(xLen) - 4 else 0).U + io.gstatus.sd_rv32 := (xLen == 32).B && io.gstatus.sd + + val exception = insn_call || insn_break || io.exception + assert( + PopCount(insn_ret :: insn_call :: insn_break :: io.exception :: Nil) <= 1.U, + "these conditions must be mutually exclusive" + ) + + when(insn_wfi && !io.singleStep && !reg_debug) { reg_wfi := true.B } + when(pending_interrupts.orR || io.interrupts.tileInterrupts.debug || exception) { reg_wfi := false.B } + io.interrupts.tileInterrupts.nmi.map(nmi => when(nmi.rnmi) { reg_wfi := false.B }) + + when(io.retire(0) || exception) { reg_singleStepped := true.B } + when(!io.singleStep) { reg_singleStepped := false.B } + assert(!io.singleStep || io.retire <= 1.U) + assert(!reg_singleStepped || io.retire === 0.U) + + val epc = formEPC(io.pc) + val tval = Mux(insn_break, epc, io.tval) + + when(exception) { + when(trapToDebug) { + when(!reg_debug) { + reg_mstatus.v := false.B + reg_debug := true.B + reg_dpc := epc + reg_dcsr.cause := Mux( + reg_singleStepped, + 4.U, + Mux(causeIsDebugInt, 3.U, Mux[UInt](causeIsDebugTrigger, 2.U, 1.U)) + ) + reg_dcsr.prv := trimPrivilege(reg_mstatus.prv) + reg_dcsr.v := reg_mstatus.v + new_prv := PRV.M.U + } + }.elsewhen(trapToNmiInt) { + when(reg_rnmie) { + reg_mstatus.v := false.B + reg_mnstatus.mpv := reg_mstatus.v + reg_rnmie := false.B + reg_mnepc := epc + reg_mncause := (BigInt(1) << (xLen - 1)).U | Mux(causeIsRnmiBEU, 3.U, 2.U) + reg_mnstatus.mpp := trimPrivilege(reg_mstatus.prv) + new_prv := PRV.M.U + } + }.elsewhen(delegateVS && nmie) { + reg_mstatus.v := true.B + reg_vsstatus.spp := reg_mstatus.prv + reg_vsepc := epc + reg_vscause := Mux(cause(xLen - 1), Cat(cause(xLen - 1, 2), 1.U(2.W)), cause) + reg_vstval := tval + reg_vsstatus.spie := reg_vsstatus.sie + reg_vsstatus.sie := false.B + new_prv := PRV.S.U + }.elsewhen(delegate && nmie) { + reg_mstatus.v := false.B + reg_hstatus.spvp := Mux(reg_mstatus.v, reg_mstatus.prv(0), reg_hstatus.spvp) + reg_hstatus.gva := io.gva + reg_hstatus.spv := reg_mstatus.v + reg_sepc := epc + reg_scause := cause + reg_stval := tval + reg_htval := io.htval + reg_mstatus.spie := reg_mstatus.sie + reg_mstatus.spp := reg_mstatus.prv + reg_mstatus.sie := false.B + new_prv := PRV.S.U + }.otherwise { + reg_mstatus.v := false.B + reg_mstatus.mpv := reg_mstatus.v + reg_mstatus.gva := io.gva + reg_mepc := epc + reg_mcause := cause + reg_mtval := tval + reg_mtval2 := io.htval + reg_mstatus.mpie := reg_mstatus.mie + reg_mstatus.mpp := trimPrivilege(reg_mstatus.prv) + reg_mstatus.mie := false.B + new_prv := PRV.M.U + } + } + + for (i <- 0 until supported_interrupts.getWidth) { + val en = + exception && (supported_interrupts & (BigInt(1) << i).U) =/= 0.U && cause === (BigInt(1) << (xLen - 1)).U + i.U + val delegable = (delegable_interrupts & (BigInt(1) << i).U) =/= 0.U + // property.cover(en && !delegate, s"INTERRUPT_M_$i") + // property.cover(en && delegable && delegate, s"INTERRUPT_S_$i") + } + for (i <- 0 until xLen) { + val supported_exceptions: BigInt = 0x8fe | + (if (usingCompressed && !coreParams.misaWritable) 0 else 1) | + (if (usingUser) 0x100 else 0) | + (if (usingSupervisor) 0x200 else 0) | + (if (usingVM) 0xb000 else 0) + if (((supported_exceptions >> i) & 1) != 0) { + val en = exception && cause === i.U + val delegable = (delegable_exceptions & (BigInt(1) << i).U) =/= 0.U + // property.cover(en && !delegate, s"EXCEPTION_M_$i") + // property.cover(en && delegable && delegate, s"EXCEPTION_S_$i") + } + } + + when(insn_ret) { + val ret_prv = WireInit(UInt(), DontCare) + when(usingSupervisor.B && !io.rw.addr(9)) { + when(!reg_mstatus.v) { + reg_mstatus.sie := reg_mstatus.spie + reg_mstatus.spie := true.B + reg_mstatus.spp := PRV.U.U + ret_prv := reg_mstatus.spp + reg_mstatus.v := usingHypervisor.B && reg_hstatus.spv + io.evec := readEPC(reg_sepc) + reg_hstatus.spv := false.B + }.otherwise { + reg_vsstatus.sie := reg_vsstatus.spie + reg_vsstatus.spie := true.B + reg_vsstatus.spp := PRV.U.U + ret_prv := reg_vsstatus.spp + reg_mstatus.v := usingHypervisor.B + io.evec := readEPC(reg_vsepc) + } + }.elsewhen(usingDebug.B && io.rw.addr(10) && io.rw.addr(7)) { + ret_prv := reg_dcsr.prv + reg_mstatus.v := usingHypervisor.B && reg_dcsr.v && reg_dcsr.prv <= PRV.S.U + reg_debug := false.B + io.evec := readEPC(reg_dpc) + }.elsewhen(usingNMI.B && io.rw.addr(10) && !io.rw.addr(7)) { + ret_prv := reg_mnstatus.mpp + reg_mstatus.v := usingHypervisor.B && reg_mnstatus.mpv && reg_mnstatus.mpp <= PRV.S.U + reg_rnmie := true.B + io.evec := readEPC(reg_mnepc) + }.otherwise { + reg_mstatus.mie := reg_mstatus.mpie + reg_mstatus.mpie := true.B + reg_mstatus.mpp := legalizePrivilege(PRV.U.U) + reg_mstatus.mpv := false.B + ret_prv := reg_mstatus.mpp + reg_mstatus.v := usingHypervisor.B && reg_mstatus.mpv && reg_mstatus.mpp <= PRV.S.U + io.evec := readEPC(reg_mepc) + } + + new_prv := ret_prv + when(usingUser.B && ret_prv <= PRV.S.U) { + reg_mstatus.mprv := false.B + } + } + + io.time := reg_cycle.value + io.csrStall := reg_wfi || io.status.cease + io.status.cease := RegEnable(true.B, false.B, insn_cease) + io.status.wfi := reg_wfi + + for ((io, reg) <- io.customCSRs.zip(reg_custom)) { + io.wen := false.B + io.wdata := wdata + io.value := reg + } + + val setVlReadData: UInt = Wire(UInt(xLen.W)) + io.rw.rdata := Mux1H(for ((k, v) <- read_mapping) yield decoded_addr(k) -> v).asUInt | setVlReadData + + // cover access to register + val coverable_counters = read_mapping.filterNot { + case (k, _) => + k >= CSR.firstHPC + nPerfCounters && k < CSR.firstHPC + CSR.nHPM + } +// coverable_counters.foreach({ +// case (k, v) => { +// when(!k.U(11, 10).andR) { // Cover points for RW CSR registers +// property.cover( +// io.rw.cmd.isOneOf(CSR.W, CSR.S, CSR.C) && io.rw.addr === k.U, +// "CSR_access_" + k.toString, +// "Cover Accessing Core CSR field" +// ) +// }.otherwise { // Cover points for RO CSR registers +// property.cover( +// io.rw.cmd === CSR.R && io.rw.addr === k.U, +// "CSR_access_" + k.toString, +// "Cover Accessing Core CSR field" +// ) +// } +// } +// }) + + val set_fs_dirty = WireDefault(io.setFsDirty.getOrElse(false.B)) + if (coreParams.haveFSDirty) { + when(set_fs_dirty) { + assert(reg_mstatus.fs > 0.U) + when(reg_mstatus.v) { reg_vsstatus.fs := 3.U } + reg_mstatus.fs := 3.U + } + } + + io.fcsrRm := reg_frm + when(io.fcsrFlags.valid) { + reg_fflags := reg_fflags | io.fcsrFlags.bits + set_fs_dirty := true.B + } + + val csr_wen = isOneOf(io.rw.cmd, Seq(CSR.S, CSR.C, CSR.W)) && !io.rwStall + io.csrwCounter := Mux( + coreParams.haveBasicCounters.B && csr_wen && inRange( + io.rw.addr, + CSRs.mcycle.U, + (CSRs.mcycle + CSR.nCtr).U + ) || inRange( + io.rw.addr, + CSRs.mcycleh.U, + (CSRs.mcycleh + CSR.nCtr).U + ), + UIntToOH(io.rw.addr(log2Ceil(CSR.nCtr + nPerfCounters) - 1, 0)), + 0.U + ) + when(csr_wen) { + val scause_mask = ((BigInt(1) << (xLen - 1)) + 31).U /* only implement 5 LSBs and MSB */ + + val satp_valid_modes = 0 +: (minPgLevels to pgLevels).map(pgLevelsToMode) + + when(decoded_addr(CSRs.mstatus)) { + val new_mstatus = wdata.asTypeOf(new MStatus()) + reg_mstatus.mie := new_mstatus.mie + reg_mstatus.mpie := new_mstatus.mpie + + if (usingUser) { + reg_mstatus.mprv := new_mstatus.mprv + reg_mstatus.mpp := legalizePrivilege(new_mstatus.mpp) + if (usingSupervisor) { + reg_mstatus.spp := new_mstatus.spp + reg_mstatus.spie := new_mstatus.spie + reg_mstatus.sie := new_mstatus.sie + reg_mstatus.tw := new_mstatus.tw + reg_mstatus.tsr := new_mstatus.tsr + } + if (usingVM) { + reg_mstatus.mxr := new_mstatus.mxr + reg_mstatus.sum := new_mstatus.sum + reg_mstatus.tvm := new_mstatus.tvm + } + if (usingHypervisor) { + reg_mstatus.mpv := new_mstatus.mpv + reg_mstatus.gva := new_mstatus.gva + } + } + + if (usingSupervisor || usingFPU) reg_mstatus.fs := formFS(new_mstatus.fs) + + vector.map(vector => vector.states("mstatus.VS") := new_mstatus.vs) + } + when(decoded_addr(CSRs.misa)) { + val mask = isaStringToMask(isaMaskString).U(xLen.W) + val f = wdata('f' - 'a') + // suppress write if it would cause the next fetch to be misaligned + when(!usingCompressed.B || !io.pc(1) || wdata('c' - 'a')) { + if (coreParams.misaWritable) + reg_misa := ~(~wdata | (!f << ('d' - 'a'))) & mask | reg_misa & ~mask + } + } + when(decoded_addr(CSRs.mip)) { + // MIP should be modified based on the value in reg_mip, not the value + // in read_mip, since read_mip.seip is the OR of reg_mip.seip and + // io.interrupts.seip. We don't want the value on the PLIC line to + // inadvertently be OR'd into read_mip.seip. + val new_mip = readModifyWriteCSR(io.rw.cmd, reg_mip.asUInt, io.rw.wdata).asTypeOf(new MIP(nLocalInterrupts)) + if (usingSupervisor) { + reg_mip.ssip := new_mip.ssip + reg_mip.stip := new_mip.stip + reg_mip.seip := new_mip.seip + } + if (usingHypervisor) { + reg_mip.vssip := new_mip.vssip + } + } + when(decoded_addr(CSRs.mie)) { reg_mie := wdata & supported_interrupts } + when(decoded_addr(CSRs.mepc)) { reg_mepc := formEPC(wdata) } + when(decoded_addr(CSRs.mscratch)) { reg_mscratch := wdata } + if (mtvecWritable) + when(decoded_addr(CSRs.mtvec)) { reg_mtvec := wdata } + when(decoded_addr(CSRs.mcause)) { + reg_mcause := wdata & ((BigInt(1) << (xLen - 1)) + (BigInt(1) << whichInterrupt.getWidth) - 1).U + } + when(decoded_addr(CSRs.mtval)) { reg_mtval := wdata } + + if (usingNMI) { + val new_mnstatus = wdata.asTypeOf(new MNStatus()) + when(decoded_addr(CustomCSRs.mnscratch)) { reg_mnscratch := wdata } + when(decoded_addr(CustomCSRs.mnepc)) { reg_mnepc := formEPC(wdata) } + when(decoded_addr(CustomCSRs.mncause)) { reg_mncause := wdata & ((BigInt(1) << (xLen - 1)) + BigInt(3)).U } + when(decoded_addr(CustomCSRs.mnstatus)) { + reg_mnstatus.mpp := legalizePrivilege(new_mnstatus.mpp) + reg_mnstatus.mpv := usingHypervisor.B && new_mnstatus.mpv + reg_rnmie := reg_rnmie | new_mnstatus.mie // mnie bit settable but not clearable from software + } + } + + for (((e, c), i) <- (reg_hpmevent.zip(reg_hpmcounter)).zipWithIndex) { + writeCounter(i + CSR.firstMHPC, c, wdata) + when(decoded_addr(i + CSR.firstHPE)) { e := perfEventSets.maskEventSelector(wdata) } + } + if (coreParams.haveBasicCounters) { + when(decoded_addr(CSRs.mcountinhibit)) { + reg_mcountinhibit := wdata & ~2.U(xLen.W) + } // mcountinhibit bit [1] is tied zero + writeCounter(CSRs.mcycle, reg_cycle, wdata) + writeCounter(CSRs.minstret, reg_instret, wdata) + } + + if (usingFPU) { + when(decoded_addr(CSRs.fflags)) { set_fs_dirty := true.B; reg_fflags := wdata } + when(decoded_addr(CSRs.frm)) { set_fs_dirty := true.B; reg_frm := wdata } + when(decoded_addr(CSRs.fcsr)) { + set_fs_dirty := true.B + reg_fflags := wdata + reg_frm := wdata >> reg_fflags.getWidth + } + } + if (usingDebug) { + when(decoded_addr(CSRs.dcsr)) { + val new_dcsr = wdata.asTypeOf(new DCSR()) + reg_dcsr.step := new_dcsr.step + reg_dcsr.ebreakm := new_dcsr.ebreakm + if (usingSupervisor) reg_dcsr.ebreaks := new_dcsr.ebreaks + if (usingUser) reg_dcsr.ebreaku := new_dcsr.ebreaku + if (usingUser) reg_dcsr.prv := legalizePrivilege(new_dcsr.prv) + if (usingHypervisor) reg_dcsr.v := new_dcsr.v + } + when(decoded_addr(CSRs.dpc)) { reg_dpc := formEPC(wdata) } + when(decoded_addr(CSRs.dscratch0)) { reg_dscratch0 := wdata } + // reg_dscratch1.foreach { r => + // when(decoded_addr(CSRs.dscratch1)) { r := wdata } + // } + when(decoded_addr(CSRs.dscratch1)) { reg_dscratch1 := wdata } + } + if (usingSupervisor) { + when(decoded_addr(CSRs.sstatus)) { + val new_sstatus = wdata.asTypeOf(new MStatus()) + reg_mstatus.sie := new_sstatus.sie + reg_mstatus.spie := new_sstatus.spie + reg_mstatus.spp := new_sstatus.spp + reg_mstatus.fs := formFS(new_sstatus.fs) + if (usingVM) { + reg_mstatus.mxr := new_sstatus.mxr + reg_mstatus.sum := new_sstatus.sum + } + } + when(decoded_addr(CSRs.sip)) { + val new_sip = ((read_mip & ~read_mideleg) | (wdata & read_mideleg)).asTypeOf(new MIP(nLocalInterrupts)) + reg_mip.ssip := new_sip.ssip + } + when(decoded_addr(CSRs.satp)) { + if (usingVM) { + val new_satp = wdata.asTypeOf(new PTBR(xLen, maxPAddrBits, pgIdxBits)) + when(isOneOf(new_satp.mode, satp_valid_modes.map(_.U))) { + reg_satp.mode := new_satp.mode & satp_valid_modes.reduce(_ | _).U + reg_satp.ppn := new_satp.ppn(ppnBits - 1, 0) + if (asIdBits > 0) reg_satp.asid := new_satp.asid(asIdBits - 1, 0) + } + } + } + when(decoded_addr(CSRs.sie)) { reg_mie := (reg_mie & ~sie_mask) | (wdata & sie_mask) } + when(decoded_addr(CSRs.sscratch)) { reg_sscratch := wdata } + when(decoded_addr(CSRs.sepc)) { reg_sepc := formEPC(wdata) } + when(decoded_addr(CSRs.stvec)) { reg_stvec := wdata } + when(decoded_addr(CSRs.scause)) { reg_scause := wdata & scause_mask } + when(decoded_addr(CSRs.stval)) { reg_stval := wdata } + when(decoded_addr(CSRs.mideleg)) { reg_mideleg := wdata } + when(decoded_addr(CSRs.medeleg)) { reg_medeleg := wdata } + when(decoded_addr(CSRs.scounteren)) { reg_scounteren := wdata } + when(decoded_addr(CSRs.senvcfg)) { write(reg_senvcfg, wdata) } + } + + if (usingHypervisor) { + when(decoded_addr(CSRs.hstatus)) { + val new_hstatus = wdata.asTypeOf(new HStatus()) + reg_hstatus.gva := new_hstatus.gva + reg_hstatus.spv := new_hstatus.spv + reg_hstatus.spvp := new_hstatus.spvp + reg_hstatus.hu := new_hstatus.hu + reg_hstatus.vtvm := new_hstatus.vtvm + reg_hstatus.vtw := new_hstatus.vtw + reg_hstatus.vtsr := new_hstatus.vtsr + reg_hstatus.vsxl := new_hstatus.vsxl + } + when(decoded_addr(CSRs.hideleg)) { reg_hideleg := wdata } + when(decoded_addr(CSRs.hedeleg)) { reg_hedeleg := wdata } + when(decoded_addr(CSRs.hgatp)) { + val new_hgatp = wdata.asTypeOf(new PTBR(xLen, maxPAddrBits, pgIdxBits)) + val valid_modes = 0 +: (minPgLevels to pgLevels).map(pgLevelsToMode) + when(isOneOf(new_hgatp.mode, valid_modes.map(_.U))) { + reg_hgatp.mode := new_hgatp.mode & valid_modes.reduce(_ | _).U + } + reg_hgatp.ppn := Cat(new_hgatp.ppn(ppnBits - 1, 2), 0.U(2.W)) + if (vmIdBits > 0) reg_hgatp.asid := new_hgatp.asid(vmIdBits - 1, 0) + } + when(decoded_addr(CSRs.hip)) { + val new_hip = ((read_mip & ~hs_delegable_interrupts) | (wdata & hs_delegable_interrupts)) + .asTypeOf(new MIP(nLocalInterrupts)) + reg_mip.vssip := new_hip.vssip + } + when(decoded_addr(CSRs.hie)) { + reg_mie := (reg_mie & ~hs_delegable_interrupts) | (wdata & hs_delegable_interrupts) + } + when(decoded_addr(CSRs.hvip)) { + val new_sip = ((read_mip & ~hs_delegable_interrupts) | (wdata & hs_delegable_interrupts)) + .asTypeOf(new MIP(nLocalInterrupts)) + reg_mip.vssip := new_sip.vssip + reg_mip.vstip := new_sip.vstip + reg_mip.vseip := new_sip.vseip + } + when(decoded_addr(CSRs.hcounteren)) { reg_hcounteren := wdata } + when(decoded_addr(CSRs.htval)) { reg_htval := wdata } + when(decoded_addr(CSRs.mtval2)) { reg_mtval2 := wdata } + + when(decoded_addr(CSRs.vsstatus)) { + val new_vsstatus = wdata.asTypeOf(new MStatus()) + reg_vsstatus.sie := new_vsstatus.sie + reg_vsstatus.spie := new_vsstatus.spie + reg_vsstatus.spp := new_vsstatus.spp + reg_vsstatus.mxr := new_vsstatus.mxr + reg_vsstatus.sum := new_vsstatus.sum + reg_vsstatus.fs := formFS(new_vsstatus.fs) + } + when(decoded_addr(CSRs.vsip)) { + val new_vsip = ((read_hip & ~read_hideleg) | ((wdata << 1) & read_hideleg)).asTypeOf(new MIP(nLocalInterrupts)) + reg_mip.vssip := new_vsip.vssip + } + when(decoded_addr(CSRs.vsatp)) { + val new_vsatp = wdata.asTypeOf(new PTBR(xLen, maxPAddrBits, pgIdxBits)) + val mode_ok = isOneOf(new_vsatp.mode, satp_valid_modes.map(_.U)) + when(mode_ok) { + reg_vsatp.mode := new_vsatp.mode & satp_valid_modes.reduce(_ | _).U + } + when(mode_ok || !reg_mstatus.v) { + reg_vsatp.ppn := new_vsatp.ppn(vpnBits.min(new_vsatp.ppn.getWidth) - 1, 0) + if (asIdBits > 0) reg_vsatp.asid := new_vsatp.asid(asIdBits - 1, 0) + } + } + when(decoded_addr(CSRs.vsie)) { reg_mie := (reg_mie & ~read_hideleg) | ((wdata << 1) & read_hideleg) } + when(decoded_addr(CSRs.vsscratch)) { reg_vsscratch := wdata } + when(decoded_addr(CSRs.vsepc)) { reg_vsepc := formEPC(wdata) } + when(decoded_addr(CSRs.vstvec)) { reg_vstvec := wdata } + when(decoded_addr(CSRs.vscause)) { reg_vscause := wdata & scause_mask } + when(decoded_addr(CSRs.vstval)) { reg_vstval := wdata } + when(decoded_addr(CSRs.henvcfg)) { write(reg_henvcfg, wdata) } + } + if (usingUser) { + when(decoded_addr(CSRs.mcounteren)) { reg_mcounteren := wdata } + when(decoded_addr(CSRs.menvcfg)) { write(reg_menvcfg, wdata) } + } + if (nBreakpoints > 0) { + when(decoded_addr(CSRs.tselect)) { reg_tselect := wdata } + + for ((bp, i) <- reg_bp.zipWithIndex) { + when(i.U === reg_tselect && (!bp.control.dmode || reg_debug)) { + when(decoded_addr(CSRs.tdata2)) { bp.address := wdata } + when(decoded_addr(CSRs.tdata3)) { + if (coreParams.mcontextWidth > 0) { + bp.textra.mselect := wdata(TExtra.mselectPos(xLen)) + bp.textra.mvalue := wdata >> TExtra.mvaluePos(xLen) + } + if (coreParams.scontextWidth > 0) { + bp.textra.sselect := wdata(TExtra.sselectPos) + bp.textra.svalue := wdata >> TExtra.svaluePos + } + } + when(decoded_addr(CSRs.tdata1)) { + bp.control := wdata.asTypeOf(bp.control) + + val prevChain = if (i == 0) false.B else reg_bp(i - 1).control.chain + val prevDMode = if (i == 0) false.B else reg_bp(i - 1).control.dmode + val nextChain = if (i >= nBreakpoints - 1) true.B else reg_bp(i + 1).control.chain + val nextDMode = if (i >= nBreakpoints - 1) true.B else reg_bp(i + 1).control.dmode + val newBPC = readModifyWriteCSR(io.rw.cmd, bp.control.asUInt, io.rw.wdata).asTypeOf(bp.control) + val dMode = newBPC.dmode && reg_debug && (prevDMode || !prevChain) + bp.control.dmode := dMode + when(dMode || (newBPC.action > 1.U)) { bp.control.action := newBPC.action }.otherwise { + bp.control.action := 0.U + } + bp.control.chain := newBPC.chain && !(prevChain || nextChain) && (dMode || !nextDMode) + } + } + } + } + reg_mcontext.foreach { r => when(decoded_addr(CSRs.mcontext)) { r := wdata } } + reg_scontext.foreach { r => when(decoded_addr(CSRs.scontext)) { r := wdata } } + if (reg_pmp.nonEmpty) for (((pmp, next), i) <- (reg_pmp.zip(reg_pmp.tail :+ reg_pmp.last)).zipWithIndex) { + require(xLen % pmp.cfg.getWidth == 0) + def cfgLocked(pmpReg: PMPReg) = pmpReg.cfg.l + def addrLocked(pmpReg: PMPReg, next: PMPReg) = pmpReg.cfg.l + + when(decoded_addr(CSRs.pmpcfg0 + pmpCfgIndex(i)) && !cfgLocked(pmp)) { + val newCfg = (wdata >> ((i * pmp.cfg.getWidth) % xLen)).asTypeOf(new PMPConfig()) + pmp.cfg := newCfg + // disallow unreadable but writable PMPs + pmp.cfg.w := newCfg.w && newCfg.r + // can't select a=NA4 with coarse-grained PMPs + if (log2Ceil(pmpGranularity) > PMP.lgAlign) + pmp.cfg.a := Cat(newCfg.a(1), newCfg.a.orR) + } + when(decoded_addr(CSRs.pmpaddr0 + i) && !addrLocked(pmp, next)) { + pmp.addr := wdata + } + } + def writeCustomCSR(io: CustomCSRIO, csr: CustomCSR, reg: UInt) = { + val mask = csr.mask.U(xLen.W) + when(decoded_addr(csr.id)) { + reg := (wdata & mask) | (reg & ~mask) + io.wen := true.B + } + } + for ((io, csr, reg) <- (io.customCSRs, customCSRs, reg_custom).zipped) { + writeCustomCSR(io, csr, reg) + } + + } + + // update csr for vector + if (usingVector) { + // connect csr for vector + val vtype = vector.get.states("vill") ## 0.U(23.W) ## vector.get.states("vma") ## + vector.get.states("vta") ## vector.get.states("vsew") ## vector.get.states("vlmul") + val vcsr = vector.get.states("vxrm") ## vector.get.states("vxsat") + io.csrToVector.foreach {v => + v.vtype := vtype + v.vl := vector.get.states("vl") + v.vcsr := vcsr + v.vstart := vector.get.states("vstart") + } + // set vl type + val vsetvli = !io.inst(0)(31) + val vsetivli = io.inst(0)(31, 30).andR + val vsetvl = io.inst(0)(31) && !io.inst(0)(30) + val rs1IsZero = io.inst(0)(19, 15) === 0.U + val rdIsZero = io.inst(0)(11, 7) === 0.U + // v type set + val newVType = Mux1H( + Seq( + (vsetvli || vsetivli) -> io.inst(0)(27, 20), + vsetvl -> io.wbRegRS2.get(7, 0) + ) + ) + // vlmax = vlen * lmul / sew + val vlmax: UInt = (true.B << (log2Ceil(vLen) - 6) << (newVType(2, 0) + 3.U) >> newVType(5, 3)).asUInt + // set vl + val setVL = Mux1H( + Seq( + ((vsetvli || vsetvl) && !rs1IsZero) -> Mux(io.rw.wdata > vlmax, vlmax, io.rw.wdata), + ((vsetvli || vsetvl) && rs1IsZero && !rdIsZero) -> vlmax, + ((vsetvli || vsetvl) && rs1IsZero && rdIsZero) -> vector.get.states("vl"), + vsetivli -> io.inst(0)(19, 15) + ) + ) + setVlReadData := Mux(io.retire(0) && io.vectorCsr.getOrElse(false.B), setVL, 0.U) + when(io.retire(0) && io.vectorCsr.get) { + vector.get.states("vl") := setVL + vector.get.states("vlmul") := newVType(2, 0) + vector.get.states("vsew") := newVType(5, 3) + vector.get.states("vta") := newVType(6) + vector.get.states("vma") := newVType(7) + } + } else { + setVlReadData := 0.U + } + def setCustomCSR(io: CustomCSRIO, csr: CustomCSR, reg: UInt) = { + val mask = csr.mask.U(xLen.W) + when(io.set) { + reg := (io.sdata & mask) | (reg & ~mask) + } + } + for ((io, csr, reg) <- (io.customCSRs, customCSRs, reg_custom).zipped) { + setCustomCSR(io, csr, reg) + } + + when(io.reset.asBool) { + reg_satp.mode := 0.U + reg_vsatp.mode := 0.U + reg_hgatp.mode := 0.U + } + if (!usingVM) { + reg_satp.mode := 0.U + reg_satp.ppn := 0.U + reg_satp.asid := 0.U + } + if (!usingHypervisor) { + reg_vsatp.mode := 0.U + reg_vsatp.ppn := 0.U + reg_vsatp.asid := 0.U + reg_hgatp.mode := 0.U + reg_hgatp.ppn := 0.U + reg_hgatp.asid := 0.U + } + if (!(asIdBits > 0)) { + reg_satp.asid := 0.U + reg_vsatp.asid := 0.U + } + if (!(vmIdBits > 0)) { + reg_hgatp.asid := 0.U + } + reg_vsstatus.xs := 0.U + + if (nBreakpoints <= 1) reg_tselect := 0.U + for (bpc <- reg_bp.map { _.control }) { + def tType = 2 + def maskMax = 4 + // bpc.ttype := bpc.tType.U + bpc.ttype := tType.U + // bpc.maskmax := bpc.maskMax.U + bpc.maskmax := maskMax.U + bpc.reserved := 0.U + bpc.zero := 0.U + bpc.h := false.B + if (!usingSupervisor) bpc.s := false.B + if (!usingUser) bpc.u := false.B + if (!usingSupervisor && !usingUser) bpc.m := true.B + when(io.reset.asBool) { + bpc.action := 0.U + bpc.dmode := false.B + bpc.chain := false.B + bpc.r := false.B + bpc.w := false.B + bpc.x := false.B + } + } + for (bpx <- reg_bp.map { _.textra }) { + if (coreParams.mcontextWidth == 0) bpx.mselect := false.B + if (coreParams.scontextWidth == 0) bpx.sselect := false.B + } + for (bp <- reg_bp.drop(nBreakpoints)) + bp := 0.U.asTypeOf(new BP(xLen, useBPWatch, vaddrBits, coreParams.mcontextWidth, coreParams.scontextWidth)) + for (pmp <- reg_pmp) { + pmp.cfg.res := 0.U + def resetPMP(pmp: PMPReg): Unit = { + pmp.cfg.a := 0.U + pmp.cfg.l := 0.U + } + when(io.reset.asBool) { resetPMP(pmp) } + } + + def chooseInterrupt(masksIn: Seq[UInt]): (Bool, UInt) = { + val nonstandard = supported_interrupts.getWidth - 1 to 12 by -1 + // MEI, MSI, MTI, SEI, SSI, STI, VSEI, VSSI, VSTI, UEI, USI, UTI + val standard = Seq(11, 3, 7, 9, 1, 5, 10, 2, 6, 8, 0, 4) + val priority = nonstandard ++ standard + val masks = masksIn.reverse + val any = masks.flatMap(m => priority.filter(_ < m.getWidth).map(i => m(i))).reduce(_ || _) + val which = PriorityMux(masks.flatMap(m => priority.filter(_ < m.getWidth).map(i => (m(i), i.U)))) + (any, which) + } + + def readModifyWriteCSR(cmd: UInt, rdata: UInt, wdata: UInt) = { + (Mux(cmd(1), rdata, 0.U) | wdata) & ~Mux(cmd(1, 0).andR, wdata, 0.U) + } + + def legalizePrivilege(priv: UInt): UInt = + if (usingSupervisor) Mux(priv === PRV.H.U, PRV.U.U, priv) + else if (usingUser) Fill(2, priv(0)) + else PRV.M.U + + def trimPrivilege(priv: UInt): UInt = + if (usingSupervisor) priv + else legalizePrivilege(priv) + + def writeCounter(lo: Int, ctr: WideCounter, wdata: UInt) = { + if (xLen == 32) { + val hi = lo + CSRs.mcycleh - CSRs.mcycle + when(decoded_addr(lo)) { ctr.assign(Cat(ctr.value(ctr.width - 1, 32), wdata)) } + when(decoded_addr(hi)) { ctr.assign(Cat(wdata(ctr.width - 33, 0), ctr.value(31, 0))) } + } else { + when(decoded_addr(lo)) { ctr.assign(wdata(ctr.width - 1, 0)) } + } + } + def andNot(x: UInt, y: UInt): UInt = x & ~(y | (x & 0.U)) + def formEPC(x: UInt): UInt = ~(~x | (if (usingCompressed) 1.U else 3.U)) + def readEPC(x: UInt): UInt = ~(~x | Mux(reg_misa('c' - 'a'), 1.U, 3.U)) + def formTVec(x: UInt): UInt = + andNot(x, Mux(x(0), ((((BigInt(1) << mtvecInterruptAlign) - 1) << mtvecBaseAlign) | 2).U, 2.U)) + def isaStringToMask(s: String): Int = s.map(x => 1 << (x - 'A')).foldLeft(0)(_ | _) + def formFS(fs: UInt): UInt = if (coreParams.haveFSDirty) fs else Fill(2, fs.orR) +} diff --git a/rocketv/src/DecodeLogic.scala b/rocketv/src/DecodeLogic.scala new file mode 100644 index 000000000..ad835f56d --- /dev/null +++ b/rocketv/src/DecodeLogic.scala @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.util.BitPat +import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable} + +// compatibility layer. +object DecodeLogic +{ + // TODO This should be a method on BitPat + private def hasDontCare(bp: BitPat): Boolean = bp.mask.bitCount != bp.width + // Pads BitPats that are safe to pad (no don't cares), errors otherwise + private def padBP(bp: BitPat, width: Int): BitPat = { + if (bp.width == width) bp + else { + require(!hasDontCare(bp), s"Cannot pad '$bp' to '$width' bits because it has don't cares") + val diff = width - bp.width + require(diff > 0, s"Cannot pad '$bp' to '$width' because it is already '${bp.width}' bits wide!") + BitPat(0.U(diff.W)) ## bp + } + } + + def apply(addr: UInt, default: BitPat, mapping: Iterable[(BitPat, BitPat)]): UInt = + chisel3.util.experimental.decode.decoder(QMCMinimizer, addr, TruthTable(mapping, default)) + def apply(addr: UInt, default: Seq[BitPat], mappingIn: Iterable[(BitPat, Seq[BitPat])]): Seq[UInt] = { + val nElts = default.size + require(mappingIn.forall(_._2.size == nElts), + s"All Seq[BitPat] must be of the same length, got $nElts vs. ${mappingIn.find(_._2.size != nElts).get}" + ) + + val elementsGrouped = mappingIn.map(_._2).transpose + val elementWidths = elementsGrouped.zip(default).map { case (elts, default) => + (default :: elts.toList).map(_.getWidth).max + } + val resultWidth = elementWidths.sum + + val elementIndices = elementWidths.scan(resultWidth - 1) { case (l, r) => l - r } + + // All BitPats that correspond to a given element in the result must have the same width in the + // chisel3 decoder. We will zero pad any BitPats that are too small so long as they dont have + // any don't cares. If there are don't cares, it is an error and the user needs to pad the + // BitPat themselves + val defaultsPadded = default.zip(elementWidths).map { case (bp, w) => padBP(bp, w) } + val mappingInPadded = mappingIn.map { case (in, elts) => + in -> elts.zip(elementWidths).map { case (bp, w) => padBP(bp, w) } + } + val decoded = apply(addr, defaultsPadded.reduce(_ ## _), mappingInPadded.map { case (in, out) => (in, out.reduce(_ ## _)) }) + + elementIndices.zip(elementIndices.tail).map { case (msb, lsb) => decoded(msb, lsb + 1) }.toList + } + def apply(addr: UInt, default: Seq[BitPat], mappingIn: List[(UInt, Seq[BitPat])]): Seq[UInt] = + apply(addr, default, mappingIn.map(m => (BitPat(m._1), m._2)).asInstanceOf[Iterable[(BitPat, Seq[BitPat])]]) + def apply(addr: UInt, trues: Iterable[UInt], falses: Iterable[UInt]): Bool = + apply(addr, BitPat.dontCare(1), trues.map(BitPat(_) -> BitPat("b1")) ++ falses.map(BitPat(_) -> BitPat("b0"))).asBool +} diff --git a/rocketv/src/Decoder.scala b/rocketv/src/Decoder.scala new file mode 100644 index 000000000..419e407b0 --- /dev/null +++ b/rocketv/src/Decoder.scala @@ -0,0 +1,749 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util.BitPat +import chisel3.util.experimental.decode.{BoolDecodeField, DecodeField, DecodePattern, DecodeTable} +import org.chipsalliance.rvdecoderdb.{Encoding, Instruction, InstructionSet} + +// behave like ChiselEnum, but for compatibility, use UInt for now. +// This is going to be upstreamed to Chisel in the future. +trait UOP { + def width: Int + + def dontCare: BitPat = BitPat.dontCare(width) + + def chiselType: TPE = UInt(width.W) + + def encode(lit: Int): BitPat = BitPat(lit.U(width.W)) + + def encode(strLit: String): BitPat = BitPat(strLit.U(width.W)) + + type TPE = UInt +} + +trait UOPDecodeField[T <: DecodePattern] extends DecodeField[T, UInt] { + def uopType: UOP + + def chiselType: UInt = uopType.chiselType +} + +object CustomInstructions { + private def rocket(name: String, encoding: Encoding) = + Instruction(name, encoding, Seq(), Seq(InstructionSet("rv_rocket")), None, false, true) + + val rocketSet = Seq( + // should be replaced by: + // cbo.clean rs1 31..20=1 14..12=2 11..7=0 6..2=0x03 1..0=3 + // cbo.flush rs1 31..20=2 14..12=2 11..7=0 6..2=0x03 1..0=3 + // cbo.inval rs1 31..20=0 14..12=2 11..7=0 6..2=0x03 1..0=3 + rocket("c.flush.d.l1", Encoding.fromString("111111000000?????000000001110011")), + rocket("c.discard.d.l1", Encoding.fromString("111111000010?????000000001110011")), + // no standard instruction, maybe we need to change this to mmio store to PMU + rocket("cease", Encoding.fromString("00110000010100000000000001110011")) + ) +} + +object DecoderParameter { + implicit def rwP: upickle.default.ReadWriter[DecoderParameter] = upickle.default.macroRW[DecoderParameter] +} + +case class DecoderParameter( + instructionSets: Set[String], + pipelinedMul: Boolean, + fenceIFlushDCache: Boolean) + extends SerializableModuleParameter { + val instructions: Seq[Instruction] = + org.chipsalliance.rvdecoderdb + .instructions( + org.chipsalliance.rvdecoderdb.extractResource(getClass.getClassLoader) + ) + .filter(instruction => + ( + instructionSets ++ + // Four mandatory instruction sets. + Seq("rv_i", "rv_zicsr", "rv_zifencei", "rv_system") + ).contains(instruction.instructionSet.name) + ) + .toSeq + .filter { + // special case for rv32 pseudo from rv64 + case i if i.pseudoFrom.isDefined && Seq("slli", "srli", "srai").contains(i.name) => true + case i if i.pseudoFrom.isDefined => false + case _ => true + } + .sortBy(i => (i.instructionSet.name, i.name)) + + // functions below is my little reminder, which is used for future rocket core refactoring, just keep it, I'll remove it later in the future. + private def hasAnySetIn(sets: String*): Boolean = + sets.exists(set => instructions.flatMap(_.instructionSets.map(_.name)).exists(_.contains(set))) + + private def xLen32: Boolean = instructions.map(_.instructionSet.name).exists(_.startsWith("rv32_")) + + private def xLen64: Boolean = instructions.map(_.instructionSet.name).exists(_.startsWith("rv64_")) + + private def fLen0: Boolean = !fLen32 && !fLen64 + + private def fLen32: Boolean = hasAnySetIn("rv_f", "rv32_f", "rv64_f") + + private def fLen64: Boolean = hasAnySetIn("rv_d", "rv32_d", "rv64_d") + + private val useFPU = !fLen0 + private val useMulDiv = hasAnySetIn("rv_m", "rv64_m") + private val useVector = hasAnySetIn("rv_v") + + private val instructionDecodePatterns: Seq[RocketDecodePattern] = instructions.map(RocketDecodePattern.apply) + private val instructionDecodeFields: Seq[DecodeField[RocketDecodePattern, _ <: Data]] = Seq( + isLegal, + isBranch, + isJal, + isJalr, + rxs2, + rxs1, + selAlu2, + selAlu1, + selImm, + aluDoubleWords, + mem, + memCommand, + wxd, + csr, + fenceI, + fence, + amo, + aluFn + ) ++ + (if (useFPU) Seq(fp, rfs1, rfs2, rfs3, wfd, dp) else None) ++ + (if (useMulDiv) if (pipelinedMul) Seq(mul, div) else Seq(div) else None) ++ + (if (useVector) Seq(vector, vectorLSU, vectorCSR) else None) + private val Y = BitPat.Y() + private val N = BitPat.N() + + val table: DecodeTable[RocketDecodePattern] = new DecodeTable[RocketDecodePattern]( + instructionDecodePatterns, + instructionDecodeFields + ) + + object isLegal extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "legal" + + override def default: BitPat = n + + // should always be true + override def genTable(op: RocketDecodePattern): BitPat = y + } + + object fp extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "fp" + + override def genTable(op: RocketDecodePattern): BitPat = op.instruction.instructionSet.name match { + // format: off + case s if Seq( + "rv_d", "rv64_d", + "rv_f", "rv64_f", + "rv_q", "rv64_q", + "rv_zfh", "rv64_zfh", "rv_d_zfh", "rv_q_zfh", + ).contains(s) => y + case _ => n + // format: on + } + } + + object dp extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "dp" + + override def genTable(op: RocketDecodePattern): BitPat = op.instruction.instructionSet.name match { + // format: off + case s if Seq("rv_d", "rv_d_zfh", "rv64_d").contains(s) => y + case _ => n + // format: on + } + } + + object isBranch extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "branch" + + override def genTable(op: RocketDecodePattern): BitPat = op.instruction.name match { + // format: off + case i if Seq("bne", "beq", "blt", "bltu", "bge", "bgeu").contains(i) => y + case _ => n + // format: on + } + } + + object isJal extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "jal" + + override def genTable(op: RocketDecodePattern): BitPat = op.instruction.name match { + // format: off + case i if Seq("jal").contains(i) => y + case _ => n + // format: on + } + } + + object isJalr extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "jalr" + + override def genTable(op: RocketDecodePattern): BitPat = op.instruction.name match { + // format: off + case i if Seq("jalr").contains(i) => y + case _ => n + // format: on + } + } + + object rxs2 extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "rxs2" + + override def genTable(op: RocketDecodePattern): BitPat = (op.instruction.name, op) match { + // format: off + case (i, _) if Seq("amomaxu.w", "amoand.w", "amoor.w", "amoxor.w", "amoswap.w", "lr.w", "amomax.w", "amoadd.w", "amomin.w", "amominu.w", "sc.w", "lr.d", "amomax.d", "amoswap.d", "amoxor.d", "amoand.d", "amomin.d", "amoor.d", "amoadd.d", "amomaxu.d", "amominu.d", "sc.d", "hsv.w", "hsv.b", "hfence.vvma", "hsv.h", "hfence.gvma", "hsv.d", "or", "srl", "sltu", "sra", "sb", "add", "xor", "beq", "bge", "sw", "blt", "bgeu", "bltu", "bne", "sub", "and", "slt", "sh", "sll", "addw", "sd", "sllw", "sraw", "subw", "srlw", "mulhsu", "rem", "div", "mul", "mulhu", "mulh", "remu", "divu", "remuw", "divw", "divuw", "mulw", "remw", "sfence.vma", "czero.nez", "czero.eqz").contains(i) => y + case (_, p) if p.vectorReadRs2 => y + case _ => n + // format: on + } + } + + object rxs1 extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "rxs1" + + override def genTable(op: RocketDecodePattern): BitPat = (op.instruction.name, op) match { + // format: off + case (i, _) if Seq("amomaxu.w", "amoand.w", "amoor.w", "amoxor.w", "amoswap.w", "lr.w", "amomax.w", "amoadd.w", "amomin.w", "amominu.w", "sc.w", "lr.d", "amomax.d", "amoswap.d", "amoxor.d", "amoand.d", "amomin.d", "amoor.d", "amoadd.d", "amomaxu.d", "amominu.d", "sc.d", "fld", "fcvt.d.wu", "fsd", "fcvt.d.w", "fcvt.d.lu", "fmv.d.x", "fcvt.d.l", "fcvt.s.wu", "fmv.w.x", "fsw", "fcvt.s.w", "flw", "fcvt.s.lu", "fcvt.s.l", "hsv.w", "hsv.b", "hfence.vvma", "hlv.hu", "hlvx.hu", "hlv.b", "hlvx.wu", "hlv.w", "hsv.h", "hlv.h", "hlv.bu", "hfence.gvma", "hsv.d", "hlv.d", "hlv.wu", "or", "srl", "ori", "lhu", "sltu", "sra", "sb", "lw", "add", "xor", "beq", "andi", "bge", "sw", "blt", "bgeu", "sltiu", "lh", "bltu", "jalr", "bne", "lbu", "sub", "and", "xori", "slti", "slt", "addi", "lb", "sh", "sll", "srli", "srai", "slli", "ld", "addw", "sd", "sraiw", "lwu", "sllw", "sraw", "subw", "srlw", "addiw", "srliw", "slliw", "mulhsu", "rem", "div", "mul", "mulhu", "mulh", "remu", "divu", "remuw", "divw", "divuw", "mulw", "remw", "sfence.vma", "fsh", "flh", "fcvt.h.wu", "fcvt.h.w", "fmv.h.x", "fcvt.h.lu", "fcvt.h.l", "csrrc", "csrrs", "csrrw", "czero.nez", "czero.eqz", "cflush.d.l1", "cdiscard.d.l1").contains(i) => y + case (i, _) if Seq("ecall", "ebreak", "mret", "wfi", "sret", "dret", "cease", "nmret").contains(i) => dc + case (_, p) if p.vectorReadRs1 => y + case _ => n + // format: on + } + } + + object fenceI extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "fence_i" + + override def genTable(op: RocketDecodePattern): BitPat = op.instruction.name match { + // format: off + case i if Seq("fence.i").contains(i) => y + case _ => n + // format: on + } + } + + object fence extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "fence" + + override def genTable(op: RocketDecodePattern): BitPat = op.instruction.name match { + // format: off + case i if Seq("fence").contains(i) => y + case _ => n + // format: on + } + } + + object amo extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "amo" + + override def genTable(op: RocketDecodePattern): BitPat = op.instruction.instructionSet.name match { + // format: off + case s if Seq("rv_a", "rv64_a").contains(s) => y + case _ => n + // format: on + } + } + + object aluDoubleWords extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "alu_dw" + + override def genTable(op: RocketDecodePattern): BitPat = { + op.instruction.name match { + // format: off + case i if Seq("amomaxu.w", "amoand.w", "amoor.w", "amoxor.w", "amoswap.w", "lr.w", "amomax.w", "amoadd.w", "amomin.w", "amominu.w", "sc.w", "lr.d", "amomax.d", "amoswap.d", "amoxor.d", "amoand.d", "amomin.d", "amoor.d", "amoadd.d", "amomaxu.d", "amominu.d", "sc.d", "fld", "fsd", "fsw", "flw", "hsv.w", "hsv.b", "hfence.vvma", "hlv.hu", "hlvx.hu", "hlv.b", "hlvx.wu", "hlv.w", "hsv.h", "hlv.h", "hlv.bu", "hfence.gvma", "hsv.d", "hlv.d", "hlv.wu", "or", "srl", "ori", "lhu", "sltu", "sra", "sb", "lw", "add", "xor", "beq", "andi", "bge", "sw", "blt", "bgeu", "sltiu", "lh", "bltu", "jalr", "lui", "bne", "lbu", "sub", "and", "auipc", "xori", "slti", "slt", "addi", "lb", "jal", "sh", "sll", "srli", "srai", "slli", "ld", "sd", "lwu", "mulhsu", "rem", "div", "mul", "mulhu", "mulh", "remu", "divu", "sfence.vma", "fsh", "flh", "csrrc", "csrrci", "csrrs", "csrrw", "csrrsi", "csrrwi", "czero.nez", "czero.eqz").contains(i) => y + case i if Seq("addw", "sraiw", "sllw", "sraw", "subw", "srlw", "addiw", "srliw", "slliw", "remuw", "divw", "divuw", "mulw", "remw").contains(i) => n + case _ => dc + // format: on + } + } + } + + object mem extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "mem" + + override def default: BitPat = n + + override def genTable(op: RocketDecodePattern): BitPat = { + op.instruction.name match { + // format: off + case i if Seq("amomaxu.w", "amoand.w", "amoor.w", "amoxor.w", "amoswap.w", "lr.w", "amomax.w", "amoadd.w", "amomin.w", "amominu.w", "sc.w", "lr.d", "amomax.d", "amoswap.d", "amoxor.d", "amoand.d", "amomin.d", "amoor.d", "amoadd.d", "amomaxu.d", "amominu.d", "sc.d", "fld", "fsd", "fsw", "flw", "hsv.w", "hsv.b", "hlv.hu", "hlv.b", "hlv.w", "hsv.h", "hlv.h", "hlv.bu", "hsv.d", "hlv.d", "hlv.wu", "lhu", "sb", "lw", "sw", "lh", "lbu", "lb", "sh", "ld", "sd", "lwu", "sfence.vma", "fsh", "flh").contains(i) => y + case i if Seq("fence.i").contains(i) && fenceIFlushDCache => y + case _ => n + // format: on + } + } + } + + object rfs1 extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "rfs1" + + override def genTable(op: RocketDecodePattern): BitPat = { + op.instruction.name match { + // format: off + case i if Seq("fmin.d", "fsgnj.d", "fle.d", "fnmsub.d", "fadd.d", "fcvt.w.d", "fmsub.d", "fmul.d", "fcvt.wu.d", "feq.d", "fmax.d", "fnmadd.d", "fcvt.d.s", "fcvt.s.d", "fmadd.d", "fsgnjx.d", "flt.d", "fsgnjn.d", "fsub.d", "fsqrt.d", "fclass.d", "fdiv.d", "fmv.x.d", "fcvt.lu.d", "fcvt.l.d", "fcvt.d.h", "fcvt.h.d", "fnmsub.s", "fsgnjx.s", "fmsub.s", "fsgnjn.s", "fdiv.s", "fmin.s", "fsqrt.s", "fclass.s", "fcvt.wu.s", "fmax.s", "feq.s", "fle.s", "fmadd.s", "fsgnj.s", "fadd.s", "flt.s", "fmv.x.w", "fnmadd.s", "fmul.s", "fcvt.w.s", "fsub.s", "fcvt.lu.s", "fcvt.l.s", "feq.h", "fsgnjx.h", "fcvt.w.h", "fcvt.h.s", "fdiv.h", "fclass.h", "fsgnj.h", "fmul.h", "fsub.h", "fcvt.wu.h", "fadd.h", "fmax.h", "fsgnjn.h", "fmv.x.h", "fcvt.s.h", "fmsub.h", "fmin.h", "fsqrt.h", "flt.h", "fnmadd.h", "fmadd.h", "fnmsub.h", "fle.h", "fcvt.l.h", "fcvt.lu.h").contains(i) => y + case _ => n + // format: on + } + } + } + + object rfs2 extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "rfs2" + + override def genTable(op: RocketDecodePattern): BitPat = { + op.instruction.name match { + // format: off + case i if Seq("fmin.d", "fsgnj.d", "fle.d", "fnmsub.d", "fadd.d", "fmsub.d", "fmul.d", "feq.d", "fmax.d", "fnmadd.d", "fmadd.d", "fsgnjx.d", "flt.d", "fsgnjn.d", "fsub.d", "fsqrt.d", "fdiv.d", "fnmsub.s", "fsgnjx.s", "fmsub.s", "fsgnjn.s", "fdiv.s", "fmin.s", "fsqrt.s", "fmax.s", "feq.s", "fle.s", "fmadd.s", "fsgnj.s", "fadd.s", "flt.s", "fnmadd.s", "fmul.s", "fsub.s", "feq.h", "fsgnjx.h", "fdiv.h", "fsgnj.h", "fmul.h", "fsub.h", "fadd.h", "fmax.h", "fsgnjn.h", "fmsub.h", "fmin.h", "fsqrt.h", "flt.h", "fnmadd.h", "fmadd.h", "fnmsub.h", "fle.h").contains(i) => y + case _ => n + // format: on + } + } + } + + object rfs3 extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "rfs3" + + override def genTable(op: RocketDecodePattern): BitPat = + op.instruction.name match { + // format: off + case i if Seq("fnmsub.d", "fmsub.d", "fnmadd.d", "fmadd.d", "fnmsub.s", "fmsub.s", "fmadd.s", "fnmadd.s", "fmsub.h", "fnmadd.h", "fmadd.h", "fnmsub.h").contains(i) => y + case _ => n + // format: on + } + } + + object wfd extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "wfd" + + override def genTable(op: RocketDecodePattern): BitPat = op.instruction.name match { + // format: off + case i if Seq("fmin.d", "fsgnj.d", "fnmsub.d", "fadd.d", "fmsub.d", "fld", "fmul.d", "fmax.d", "fcvt.d.wu", "fnmadd.d", "fcvt.d.s", "fcvt.s.d", "fmadd.d", "fsgnjx.d", "fsgnjn.d", "fsub.d", "fsqrt.d", "fcvt.d.w", "fdiv.d", "fcvt.d.lu", "fmv.d.x", "fcvt.d.l", "fcvt.d.h", "fcvt.h.d", "fnmsub.s", "fsgnjx.s", "fmsub.s", "fsgnjn.s", "fdiv.s", "fmin.s", "fsqrt.s", "fmax.s", "fcvt.s.wu", "fmv.w.x", "fmadd.s", "fsgnj.s", "fadd.s", "fnmadd.s", "fcvt.s.w", "flw", "fmul.s", "fsub.s", "fcvt.s.lu", "fcvt.s.l", "fsgnjx.h", "fcvt.h.s", "fdiv.h", "fsgnj.h", "fmul.h", "fsub.h", "flh", "fadd.h", "fmax.h", "fsgnjn.h", "fcvt.s.h", "fcvt.h.wu", "fcvt.h.w", "fmsub.h", "fmin.h", "fsqrt.h", "fnmadd.h", "fmadd.h", "fnmsub.h", "fmv.h.x", "fcvt.h.lu", "fcvt.h.l").contains(i) => y + case _ => n + // format: on + } + } + + object mul extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "mul" + + override def genTable(op: RocketDecodePattern): BitPat = op.instruction.name match { + // format: off + case i if Seq("mulhsu", "mul", "mulhu", "mulh", "mulw").contains(i) => y + case _ => n + // format: on + } + } + + object div extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "div" + + override def genTable(op: RocketDecodePattern): BitPat = op.instruction.name match { + // format: off + case i if Seq("mulhsu", "mul", "mulhu", "mulh", "mulw").contains(i) && !pipelinedMul => y + case i if Seq("rem", "div", "remu", "divu", "remuw", "divw", "divuw", "remw").contains(i) => y + case _ => n + // format: on + } + } + + object wxd extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "wxd" + + override def genTable(op: RocketDecodePattern): BitPat = op.instruction.name match { + // format: off + // TODO: filter out rd + case i if Seq("amomaxu.w", "amoand.w", "amoor.w", "amoxor.w", "amoswap.w", "lr.w", "amomax.w", "amoadd.w", "amomin.w", "amominu.w", "sc.w", "lr.d", "amomax.d", "amoswap.d", "amoxor.d", "amoand.d", "amomin.d", "amoor.d", "amoadd.d", "amomaxu.d", "amominu.d", "sc.d", "fle.d", "fcvt.w.d", "fcvt.wu.d", "feq.d", "flt.d", "fclass.d", "fmv.x.d", "fcvt.lu.d", "fcvt.l.d", "fclass.s", "fcvt.wu.s", "feq.s", "fle.s", "flt.s", "fmv.x.w", "fcvt.w.s", "fcvt.lu.s", "fcvt.l.s", "hlv.hu", "hlvx.hu", "hlv.b", "hlvx.wu", "hlv.w", "hlv.h", "hlv.bu", "hlv.d", "hlv.wu", "or", "srl", "ori", "lhu", "sltu", "sra", "lw", "add", "xor", "andi", "sltiu", "lh", "jalr", "lui", "lbu", "sub", "and", "auipc", "xori", "slti", "slt", "addi", "lb", "jal", "sll", "srli", "srai", "slli", "ld", "addw", "sraiw", "lwu", "sllw", "sraw", "subw", "srlw", "addiw", "srliw", "slliw", "mulhsu", "rem", "div", "mul", "mulhu", "mulh", "remu", "divu", "remuw", "divw", "divuw", "mulw", "remw", "feq.h", "fcvt.w.h", "fclass.h", "fcvt.wu.h", "fmv.x.h", "flt.h", "fle.h", "fcvt.l.h", "fcvt.lu.h", "csrrc", "csrrci", "csrrs", "csrrw", "csrrsi", "csrrwi", "czero.nez", "czero.eqz").contains(i) => y + case i if Seq("vsetvl", "vsetivli", "vsetvli", "vmv.x.s", "vcpop.m", "vfirst.m").contains(i) => y + case _ => n + // format: on + } + } + + // UOPs + + object UOPMEM extends UOP { + def width = 5 + + def xrd: BitPat = encode("b00000") + + def xwr: BitPat = encode("b00001") + + def pfr: BitPat = encode("b00010") + + def pfw: BitPat = encode("b00011") + + def xaSwap: BitPat = encode("b00100") + + def flushAll: BitPat = encode("b00101") + + def xlr: BitPat = encode("b00110") + + def xsc: BitPat = encode("b00111") + + def xaAdd: BitPat = encode("b01000") + + def xaXor: BitPat = encode("b01001") + + def xaOr: BitPat = encode("b01010") + + def xaAnd: BitPat = encode("b01011") + + def xaMin: BitPat = encode("b01100") + + def xaMax: BitPat = encode("b01101") + + def xaMinu: BitPat = encode("b01110") + + def xaMaxu: BitPat = encode("b01111") + + // TODO: unused + def flush: BitPat = encode("b10000") + + // TODO: unused + def pwr: BitPat = encode("b10001") + + // TODO: unused + def produce: BitPat = encode("b10010") + + // TODO: unused + def clean: BitPat = encode("b10011") + + def sfence: BitPat = encode("b10100") + + def hfencev: BitPat = encode("b10101") + + def hfenceg: BitPat = encode("b10110") + + def wok: BitPat = encode("b10111") + + def hlvx: BitPat = encode("b10000") + } + + object memCommand extends UOPDecodeField[RocketDecodePattern] { + override def name: String = "mem_cmd" + + override def genTable(op: RocketDecodePattern): BitPat = { + op.instruction.name match { + // format: off + case i if Seq("fld", "flh", "flw", "hlv.b", "hlv.bu", "hlv.d", "hlv.h", "hlv.hu", "hlv.w", "hlv.wu", "lb", "lbu", "ld", "lh", "lhu", "lw", "lwu").contains(i) => UOPMEM.xrd + case i if Seq("fsd", "fsh", "fsw", "hsv.b", "hsv.d", "hsv.h", "hsv.w", "sb", "sd", "sh", "sw").contains(i) => UOPMEM.xwr + case i if Seq("amoswap.d", "amoswap.w").contains(i) => UOPMEM.xaSwap + case i if Seq("fence.i").contains(i) && fenceIFlushDCache => UOPMEM.flushAll + case i if Seq("lr.d", "lr.w").contains(i) => UOPMEM.xlr + case i if Seq("sc.d", "sc.w").contains(i) => UOPMEM.xsc + case i if Seq("amoadd.d", "amoadd.w").contains(i) => UOPMEM.xaAdd + case i if Seq("amoxor.d", "amoxor.w").contains(i) => UOPMEM.xaXor + case i if Seq("amoor.d", "amoor.w").contains(i) => UOPMEM.xaOr + case i if Seq("amoand.d", "amoand.w").contains(i) => UOPMEM.xaAnd + case i if Seq("amomin.d", "amomin.w").contains(i) => UOPMEM.xaMin + case i if Seq("amomax.d", "amomax.w").contains(i) => UOPMEM.xaMax + case i if Seq("amominu.d", "amominu.w").contains(i) => UOPMEM.xaMinu + case i if Seq("amomaxu.d", "amomaxu.w").contains(i) => UOPMEM.xaMaxu + case i if Seq("sfence.vma").contains(i) => UOPMEM.sfence + case i if Seq("hfence.vvma").contains(i) => UOPMEM.hfencev + case i if Seq("hfence.gvma").contains(i) => UOPMEM.hfenceg + case i if Seq("hlvx.hu", "hlvx.wu").contains(i) => UOPMEM.hlvx + case _ => UOPMEM.dontCare + // format: on + } + } + + override def uopType: UOPMEM.type = UOPMEM + } + + object UOPCSR extends UOP { + def width = 3 + + def n: BitPat = encode(0) + + def r: BitPat = encode(2) + + def i: BitPat = encode(4) + + def w: BitPat = encode(5) + + def s: BitPat = encode(6) + + def c: BitPat = encode(7) + } + + object csr extends UOPDecodeField[RocketDecodePattern] { + override def name: String = "csr" + + override def genTable(op: RocketDecodePattern): BitPat = op.instruction.name match { + // format: off + // TODO: default should be N? + case i if Seq("amomaxu.w", "amoand.w", "amoor.w", "amoxor.w", "amoswap.w", "lr.w", "amomax.w", "amoadd.w", "amomin.w", "amominu.w", "sc.w", "lr.d", "amomax.d", "amoswap.d", "amoxor.d", "amoand.d", "amomin.d", "amoor.d", "amoadd.d", "amomaxu.d", "amominu.d", "sc.d", "fmin.d", "fsgnj.d", "fle.d", "fnmsub.d", "fadd.d", "fcvt.w.d", "fmsub.d", "fld", "fmul.d", "fcvt.wu.d", "feq.d", "fmax.d", "fcvt.d.wu", "fnmadd.d", "fcvt.d.s", "fcvt.s.d", "fsd", "fmadd.d", "fsgnjx.d", "flt.d", "fsgnjn.d", "fsub.d", "fsqrt.d", "fclass.d", "fcvt.d.w", "fdiv.d", "fcvt.d.lu", "fmv.x.d", "fmv.d.x", "fcvt.lu.d", "fcvt.l.d", "fcvt.d.l", "fcvt.d.h", "fcvt.h.d", "fnmsub.s", "fsgnjx.s", "fmsub.s", "fsgnjn.s", "fdiv.s", "fmin.s", "fsqrt.s", "fclass.s", "fcvt.wu.s", "fmax.s", "feq.s", "fcvt.s.wu", "fmv.w.x", "fle.s", "fmadd.s", "fsgnj.s", "fadd.s", "fsw", "flt.s", "fmv.x.w", "fnmadd.s", "fcvt.s.w", "flw", "fmul.s", "fcvt.w.s", "fsub.s", "fcvt.lu.s", "fcvt.s.lu", "fcvt.l.s", "fcvt.s.l", "or", "srl", "fence", "ori", "lhu", "sltu", "sra", "sb", "lw", "add", "xor", "beq", "andi", "bge", "sw", "blt", "bgeu", "sltiu", "lh", "bltu", "jalr", "lui", "bne", "lbu", "sub", "and", "auipc", "xori", "slti", "slt", "addi", "lb", "jal", "sh", "sll", "srli", "srai", "slli", "ld", "addw", "sd", "sraiw", "lwu", "sllw", "sraw", "subw", "srlw", "addiw", "srliw", "slliw", "mulhsu", "rem", "div", "mul", "mulhu", "mulh", "remu", "divu", "remuw", "divw", "divuw", "mulw", "remw", "feq.h", "fsgnjx.h", "fcvt.w.h", "fcvt.h.s", "fdiv.h", "fclass.h", "fsh", "fsgnj.h", "fmul.h", "fsub.h", "flh", "fcvt.wu.h", "fadd.h", "fmax.h", "fsgnjn.h", "fmv.x.h", "fcvt.s.h", "fcvt.h.wu", "fcvt.h.w", "fmsub.h", "fmin.h", "fsqrt.h", "flt.h", "fnmadd.h", "fmadd.h", "fnmsub.h", "fmv.h.x", "fle.h", "fcvt.l.h", "fcvt.lu.h", "fcvt.h.lu", "fcvt.h.l", "fence.i", "czero.nez", "czero.eqz").contains(i) => UOPCSR.n + case i if Seq("cdiscard.d.l1", "cease", "cflush.d.l1", "hsv.w", "hsv.b", "hfence.vvma", "hlv.hu", "hlvx.hu", "hlv.b", "hlvx.wu", "hlv.w", "hsv.h", "hlv.h", "hlv.bu", "hfence.gvma", "hsv.d", "hlv.d", "hlv.wu", "ebreak", "ecall", "sret", "sfence.vma", "dret", "wfi", "mret", "mnret").contains(i) => UOPCSR.i + case i if Seq("csrrw", "csrrwi").contains(i) => UOPCSR.w + case i if Seq("csrrs", "csrrsi").contains(i) => UOPCSR.s + case i if Seq("csrrc", "csrrci").contains(i) => UOPCSR.c + case _ => UOPCSR.dontCare + // format: on + } + + override def uopType: UOPCSR.type = UOPCSR + } + + object UOPALU extends UOP { + def width = 4 + + def add: BitPat = encode(0) + + def sl: BitPat = encode(1) + + def seq: BitPat = encode(2) + + def sne: BitPat = encode(3) + + def xor: BitPat = encode(4) + + def sr: BitPat = encode(5) + + def or: BitPat = encode(6) + + def and: BitPat = encode(7) + + def czeqz: BitPat = encode(8) + + def cznez: BitPat = encode(9) + + def sub: BitPat = encode(10) + + def sra: BitPat = encode(11) + + def slt: BitPat = encode(12) + + def sge: BitPat = encode(13) + + def sltu: BitPat = encode(14) + + def sgeu: BitPat = encode(15) + + def div: BitPat = xor + + def divu: BitPat = sr + + def rem: BitPat = or + + def remu: BitPat = and + + def mul: BitPat = add + + def mulh: BitPat = sl + + def mulhsu: BitPat = seq + + def mulhu: BitPat = sne + } + + object aluFn extends UOPDecodeField[RocketDecodePattern] { + override def name: String = "alu_fn" + + override def genTable(op: RocketDecodePattern): BitPat = (op.instruction.name, op) match { + // format: off + case (i, _) if Seq("amomaxu.w", "amoand.w", "amoor.w", "amoxor.w", "amoswap.w", "lr.w", "amomax.w", "amoadd.w", "amomin.w", "amominu.w", "sc.w", "lr.d", "amomax.d", "amoswap.d", "amoxor.d", "amoand.d", "amomin.d", "amoor.d", "amoadd.d", "amomaxu.d", "amominu.d", "sc.d", "fld", "fsd", "fsw", "flw", "hsv.w", "hsv.b", "hfence.vvma", "hlv.hu", "hlvx.hu", "hlv.b", "hlvx.wu", "hlv.w", "hsv.h", "hlv.h", "hlv.bu", "hfence.gvma", "hsv.d", "hlv.d", "hlv.wu", "lhu", "sb", "lw", "add", "sw", "lh", "jalr", "lui", "lbu", "auipc", "addi", "lb", "jal", "sh", "ld", "addw", "sd", "lwu", "addiw", "sfence.vma", "fsh", "flh", "csrrc", "csrrci", "csrrs", "csrrw", "csrrsi", "csrrwi", "cdiscard.d.l1", "cflush.d.l1").contains(i) => UOPALU.add + case (i, _) if Seq("and", "andi").contains(i) => UOPALU.and + case (i, _) if Seq("or", "ori").contains(i) => UOPALU.or + case (i, _) if Seq("beq").contains(i) => UOPALU.seq + case (i, _) if Seq("bge").contains(i) => UOPALU.sge + case (i, _) if Seq("bgeu").contains(i) => UOPALU.sgeu + case (i, _) if Seq("sll", "slli", "slli", "slliw", "sllw").contains(i) => UOPALU.sl + case (i, _) if Seq("blt", "slt", "slti").contains(i) => UOPALU.slt + case (i, _) if Seq("bltu", "sltiu", "sltu").contains(i) => UOPALU.sltu + case (i, _) if Seq("bne").contains(i) => UOPALU.sne + case (i, _) if Seq("srl", "srli", "srli", "srliw", "srlw").contains(i) => UOPALU.sr + case (i, _) if Seq("sra", "srai", "srai", "sraiw", "sraw").contains(i) => UOPALU.sra + case (i, _) if Seq("sub", "subw").contains(i) => UOPALU.sub + case (i, _) if Seq("xor", "xori").contains(i) => UOPALU.xor + + // rv_m + case (i, _) if Seq("mul", "mulw").contains(i) => UOPALU.mul + case (i, _) if Seq("mulh").contains(i) => UOPALU.mulh + case (i, _) if Seq("mulhu").contains(i) => UOPALU.mulhu + case (i, _) if Seq("mulhsu").contains(i) => UOPALU.mulhsu + case (i, _) if Seq("div", "divw").contains(i) => UOPALU.div + case (i, _) if Seq("divu", "divuw").contains(i) => UOPALU.divu + case (i, _) if Seq("rem", "remw").contains(i) => UOPALU.rem + case (i, _) if Seq("remu", "remuw").contains(i) => UOPALU.remu + + case (i, _) if Seq("czero.eqz").contains(i) => UOPALU.czeqz + case (i, _) if Seq("czero.nez").contains(i) => UOPALU.cznez + // vector + // 7. Vector read RS1 go through ALU rs1 + 0. + case (_, p) if p.vectorReadRs1 => UOPALU.add + case _ => UOPALU.dontCare + // format: on + } + + override def uopType: UOPALU.type = UOPALU + } + + object UOPIMM extends UOP { + def width = 3 + + def s: BitPat = encode(0) + + def sb: BitPat = encode(1) + + def u: BitPat = encode(2) + + def uj: BitPat = encode(3) + + def i: BitPat = encode(4) + + def z: BitPat = encode(5) + } + + object selImm extends UOPDecodeField[RocketDecodePattern] { + override def name: String = "sel_imm" + + override def genTable(op: RocketDecodePattern): BitPat = op.instruction.name match { + // format: off + case i if Seq("fld", "flw", "hsv.w", "hsv.b", "hsv.h", "hsv.d", "ori", "lhu", "lw", "andi", "sltiu", "lh", "jalr", "lbu", "xori", "slti", "addi", "lb", "srli", "srai", "slli", "ld", "sraiw", "lwu", "addiw", "srliw", "slliw", "flh").contains(i) => UOPIMM.i + case i if Seq("fsd", "fsh", "fsw", "sb", "sd", "sh", "sw").contains(i) => UOPIMM.s + case i if Seq("beq", "bge", "bgeu", "blt", "bltu", "bne").contains(i) => UOPIMM.sb + case i if Seq("auipc", "lui").contains(i) => UOPIMM.u + case i if Seq("jal").contains(i) => UOPIMM.uj + case i if Seq("csrrci", "csrrsi", "csrrwi").contains(i) => UOPIMM.z + case _ => UOPIMM.dontCare + // format: on + } + + override def uopType: UOPIMM.type = UOPIMM + } + + object UOPA1 extends UOP { + def width = 2 + + def zero: BitPat = encode(0) + + def rs1: BitPat = encode(1) + + def pc: BitPat = encode(2) + } + + object selAlu1 extends UOPDecodeField[RocketDecodePattern] { + override def name: String = "sel_alu1" + + override def genTable(op: RocketDecodePattern): BitPat = (op.instruction.name, op) match { + // format: off + case (i, _) if Seq("auipc", "jal").contains(i) => UOPA1.pc + case (i, _) if Seq("amomaxu.w", "amoand.w", "amoor.w", "amoxor.w", "amoswap.w", "lr.w", "amomax.w", "amoadd.w", "amomin.w", "amominu.w", "sc.w", "lr.d", "amomax.d", "amoswap.d", "amoxor.d", "amoand.d", "amomin.d", "amoor.d", "amoadd.d", "amomaxu.d", "amominu.d", "sc.d", "fld", "fcvt.d.wu", "fsd", "fcvt.d.w", "fcvt.d.lu", "fmv.d.x", "fcvt.d.l", "fcvt.s.wu", "fmv.w.x", "fsw", "fcvt.s.w", "flw", "fcvt.s.lu", "fcvt.s.l", "hsv.w", "hsv.b", "hfence.vvma", "hlv.hu", "hlvx.hu", "hlv.b", "hlvx.wu", "hlv.w", "hsv.h", "hlv.h", "hlv.bu", "hfence.gvma", "hsv.d", "hlv.d", "hlv.wu", "or", "srl", "ori", "lhu", "sltu", "sra", "sb", "lw", "add", "xor", "beq", "andi", "bge", "sw", "blt", "bgeu", "sltiu", "lh", "bltu", "jalr", "bne", "lbu", "sub", "and", "xori", "slti", "slt", "addi", "lb", "sh", "sll", "srli", "srai", "slli", "ld", "addw", "sd", "sraiw", "lwu", "sllw", "sraw", "subw", "srlw", "addiw", "srliw", "slliw", "mulhsu", "rem", "div", "mul", "mulhu", "mulh", "remu", "divu", "remuw", "divw", "divuw", "mulw", "remw", "sfence.vma", "fsh", "flh", "fcvt.h.wu", "fcvt.h.w", "fmv.h.x", "fcvt.h.lu", "fcvt.h.l", "csrrc", "csrrs", "csrrw", "czero.nez", "czero.eqz", "cdiscard.d.l1", "cflush.d.l1").contains(i) => UOPA1.rs1 + case (_, p) if p.vectorReadRs1 => UOPA1.rs1 + case (i, _) if Seq("csrrci", "csrrsi", "csrrwi", "lui").contains(i) => UOPA1.zero + case _ => UOPA1.dontCare + } + + override def uopType: UOPA1.type = UOPA1 + } + + object UOPA2 extends UOP { + def width = 2 + + def zero: BitPat = encode(0) + + def size: BitPat = encode(1) + + def rs2: BitPat = encode(2) + + def imm: BitPat = encode(3) + } + + object selAlu2 extends UOPDecodeField[RocketDecodePattern] { + override def name: String = "sel_alu2" + + override def genTable(op: RocketDecodePattern): BitPat = (op.instruction.name, op) match { + // format: off + case (i, _) if Seq("fld", "fsd", "fsw", "flw", "ori", "lhu", "sb", "lw", "andi", "sw", "sltiu", "lh", "jalr", "lui", "lbu", "auipc", "xori", "slti", "addi", "lb", "sh", "srli", "srai", "slli", "ld", "sd", "sraiw", "lwu", "addiw", "srliw", "slliw", "fsh", "flh", "csrrci", "csrrsi", "csrrwi").contains(i) => UOPA2.imm + case (i, _) if Seq("or", "srl", "sltu", "sra", "add", "xor", "beq", "bge", "blt", "bgeu", "bltu", "bne", "sub", "and", "slt", "sll", "addw", "sllw", "sraw", "subw", "srlw", "mulhsu", "rem", "div", "mul", "mulhu", "mulh", "remu", "divu", "remuw", "divw", "divuw", "mulw", "remw", "czero.nez", "czero.eqz").contains(i) => UOPA2.rs2 + case (i, _) if Seq("jal").contains(i) => UOPA2.size + case (i, _) if Seq("amomaxu.w", "amoand.w", "amoor.w", "amoxor.w", "amoswap.w", "lr.w", "amomax.w", "amoadd.w", "amomin.w", "amominu.w", "sc.w", "lr.d", "amomax.d", "amoswap.d", "amoxor.d", "amoand.d", "amomin.d", "amoor.d", "amoadd.d", "amomaxu.d", "amominu.d", "sc.d", "hsv.w", "hsv.b", "hfence.vvma", "hlv.hu", "hlvx.hu", "hlv.b", "hlvx.wu", "hlv.w", "hsv.h", "hlv.h", "hlv.bu", "hfence.gvma", "hsv.d", "hlv.d", "hlv.wu", "sfence.vma", "csrrc", "csrrs", "csrrw", "cdiscard.d.l1", "cflush.d.l1").contains(i) => UOPA2.zero + case (_, p) if p.vectorReadRs1 => UOPA2.zero + case _ => UOPA2.dontCare + } + + override def uopType: UOPA2.type = UOPA2 + } + + object vector extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "vector" + + override def genTable(op: RocketDecodePattern): BitPat = if (op.instruction.instructionSet.name == "rv_v") Y else N + } + + object vectorLSU extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "vectorLSU" + + override def genTable(op: RocketDecodePattern): BitPat = if (op.isVectorLSU) Y else N + } + + object vectorCSR extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "vectorCSR" + + override def genTable(op: RocketDecodePattern): BitPat = if (op.isVectorCSR) Y else N + } +} + +class DecoderInterface(parameter: DecoderParameter) extends Bundle { + val instruction = Input(UInt(32.W)) + val output = Output(parameter.table.bundle) +} + +/** DecodePattern for an RISC-V instruction */ +case class RocketDecodePattern(instruction: Instruction) extends DecodePattern { + override def bitPat: BitPat = BitPat("b" + instruction.encoding.toString) + def isVector = instruction.instructionSet.name == "rv_v" + def isVectorCSR = Seq("vsetvl", "vsetivli", "vsetvli").contains(instruction.name) + def isVectorLSU = instruction.name match { + // unit stride + // load/store(t) sz element + case s"v${t}e${sz}.v" if (t == "l") || (t == "s") => true + // alias to vl(s)e1.v + case s"v${t}m.v" if (t == "l") || (t == "s") => true + // load/store(t) element w/ first fault + case s"v${t}e${sz}ff.v" if (t == "l") || (t == "s") => true + // load/store(t) r registers with VLEN/sz bytes + case s"v${tr}re${sz}.v" if tr.startsWith("l") || tr.startsWith("s") => true + // alias to vl(s)szr.v + case s"v${tsz}r.v" if tsz.startsWith("l") || tsz.startsWith("s") => true + // stride + case s"v${t}se${sz}.v" if (t == "l") || (t == "s") => true + // indexed + case s"v${to}xei${sz}.v" if (to == "lo" || to == "lu" || to == "so" || to == "su") => true + case _ => false + } + // todo: unsure. + def vectorReadRs1: Boolean = isVectorLSU || (instruction.name match { + // vx type + case s"v${op}.vx" => true + case s"v${op}.v.x" => true + // set vl + case s"vsetvl${i}" => true + case _ => false + }) + def vectorReadRs2 = instruction.name match { + // set vl + case s"vsetvl" => true + // stride + case s"v${t}se${sz}.v" if (t == "l") || (t == "s") => true + case _ => false + } +} + +@instantiable +class Decoder(val parameter: DecoderParameter) + extends FixedIORawModule(new DecoderInterface(parameter)) + with SerializableModule[DecoderParameter] { + io.output := parameter.table.decode(io.instruction) +} diff --git a/rocketv/src/ECC.scala b/rocketv/src/ECC.scala new file mode 100644 index 000000000..0c7c5c920 --- /dev/null +++ b/rocketv/src/ECC.scala @@ -0,0 +1,235 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.util._ +import chisel3.util.random.LFSR + +abstract class Decoding +{ + def uncorrected: UInt + def corrected: UInt + def correctable: Bool + def uncorrectable: Bool // If true, correctable should be ignored + def error = correctable || uncorrectable +} + +abstract class Code +{ + def canDetect: Boolean + def canCorrect: Boolean + + def width(w0: Int): Int + + /** Takes the unencoded width and returns a list of indices indicating which + * bits of the encoded value will be used for ecc + */ + def eccIndices(width: Int): Seq[Int] + + /** Encode x to a codeword suitable for decode. + * If poison is true, the decoded value will report uncorrectable + * error despite uncorrected == corrected == x. + */ + def encode(x: UInt, poison: Bool = false.B): UInt + def decode(x: UInt): Decoding + + /** Copy the bits in x to the right bit positions in an encoded word, + * so that x === decode(swizzle(x)).uncorrected; but don't generate + * the other code bits, so decode(swizzle(x)).error might be true. + * For codes for which this operation is not trivial, throw an + * UnsupportedOperationException. */ + def swizzle(x: UInt): UInt +} + +class IdentityCode extends Code +{ + def canDetect = false + def canCorrect = false + + def width(w0: Int) = w0 + def eccIndices(width: Int) = Seq.empty[Int] + def encode(x: UInt, poison: Bool = false.B) = { + require (poison.isLit && poison.litValue == 0, "IdentityCode can not be poisoned") + x + } + def swizzle(x: UInt) = x + def decode(y: UInt) = new Decoding { + def uncorrected = y + def corrected = y + def correctable = false.B + def uncorrectable = false.B + } +} + +class ParityCode extends Code +{ + def canDetect = true + def canCorrect = false + + def width(w0: Int) = w0+1 + def eccIndices(w0: Int) = Seq(w0) + def encode(x: UInt, poison: Bool = false.B) = Cat(x.xorR ^ poison, x) + def swizzle(x: UInt) = Cat(false.B, x) + def decode(y: UInt) = new Decoding { + val uncorrected = y(y.getWidth-2,0) + val corrected = uncorrected + val correctable = false.B + val uncorrectable = y.xorR + } +} + +class SECCode extends Code +{ + def canDetect = true + def canCorrect = true + + // SEC codes may or may not be poisonous depending on the length + // If the code is perfect, every non-codeword is correctable + def poisonous(n: Int) = !isPow2(n+1) + + def width(k: Int) = { + val m = log2Floor(k) + 1 + k + m + (if((1 << m) < m+k+1) 1 else 0) + } + + def eccIndices(w0: Int) = { + (0 until width(w0)).collect { + case i if i >= w0 => i + } + } + + def swizzle(x: UInt) = { + val k = x.getWidth + val n = width(k) + Cat(0.U((n-k).W), x) + } + + // An (n=16, k=11) Hamming code is naturally encoded as: + // PPxPxxxPxxxxxxxP where P are parity bits and x are data + // Indexes typically start at 1, because then the P are on powers of two + // In systematic coding, you put all the data in the front: + // xxxxxxxxxxxPPPPP + // Indexes typically start at 0, because Computer Science + // For sanity when reading SRAMs, you want systematic form. + + private def impl(n: Int, k: Int) = { + require (n >= 3 && k >= 1 && !isPow2(n)) + val hamm2sys = IndexedSeq.tabulate(n+1) { i => + if (i == 0) { + n /* undefined */ + } else if (isPow2(i)) { + k + log2Ceil(i) + } else { + i - 1 - log2Ceil(i) + } + } + val sys2hamm = hamm2sys.zipWithIndex.sortBy(_._1).map(_._2).toIndexedSeq + def syndrome(j: Int) = { + val bit = 1 << j + ("b" + Seq.tabulate(n) { i => + if ((sys2hamm(i) & bit) != 0) "1" else "0" + }.reverse.mkString).U + } + (hamm2sys, sys2hamm, syndrome _) + } + + def encode(x: UInt, poison: Bool = false.B) = { + val k = x.getWidth + val n = width(k) + val (_, _, syndrome) = impl(n, k) + + require ((poison.isLit && poison.litValue == 0) || poisonous(n), s"SEC code of length ${n} cannot be poisoned") + + /* By setting the entire syndrome on poison, the corrected bit falls off the end of the code */ + val syndromeUInt = VecInit.tabulate(n-k) { j => (syndrome(j)(k-1, 0) & x).xorR ^ poison }.asUInt + Cat(syndromeUInt, x) + } + + def decode(y: UInt) = new Decoding { + val n = y.getWidth + val k = n - log2Ceil(n) + val (_, sys2hamm, syndrome) = impl(n, k) + + val syndromeUInt = VecInit.tabulate(n-k) { j => (syndrome(j) & y).xorR }.asUInt + + val hammBadBitOH = UIntToOH(syndromeUInt, n+1) + val sysBadBitOH = VecInit.tabulate(k) { i => hammBadBitOH(sys2hamm(i)) }.asUInt + + val uncorrected = y(k-1, 0) + val corrected = uncorrected ^ sysBadBitOH + val correctable = syndromeUInt.orR + val uncorrectable = if (poisonous(n)) { syndromeUInt > n.U } else { false.B } + } +} + +class SECDEDCode extends Code +{ + def canDetect = true + def canCorrect = true + + private val sec = new SECCode + private val par = new ParityCode + + def width(k: Int) = sec.width(k)+1 + def eccIndices(w0: Int) = { + (0 until width(w0)).collect { + case i if i >= w0 => i + } + } + def encode(x: UInt, poison: Bool = false.B) = { + // toggling two bits ensures the error is uncorrectable + // to ensure corrected == uncorrected, we pick one redundant + // bit from SEC (the highest); correcting it does not affect + // corrected == uncorrected. the second toggled bit is the + // parity bit, which also does not appear in the decoding + val toggle_lo = Cat(poison.asUInt, poison.asUInt) + val toggle_hi = toggle_lo << (sec.width(x.getWidth)-1) + par.encode(sec.encode(x)) ^ toggle_hi + } + def swizzle(x: UInt) = par.swizzle(sec.swizzle(x)) + def decode(x: UInt) = new Decoding { + val secdec = sec.decode(x(x.getWidth-2,0)) + val pardec = par.decode(x) + + val uncorrected = secdec.uncorrected + val corrected = secdec.corrected + val correctable = pardec.uncorrectable + val uncorrectable = !pardec.uncorrectable && secdec.correctable + } +} + +object ErrGen +{ + // generate a 1-bit error with approximate probability 2^-f + def apply(width: Int, f: Int): UInt = { + require(width > 0 && f >= 0 && log2Up(width) + f <= 16) + UIntToOH(LFSR(16)(log2Up(width)+f-1,0))(width-1,0) + } + def apply(x: UInt, f: Int): UInt = x ^ apply(x.getWidth, f) +} + +trait CanHaveErrors extends Bundle { + val correctable: Option[ValidIO[UInt]] + val uncorrectable: Option[ValidIO[UInt]] +} + +case class ECCParams( + bytes: Int = 1, + code: Code = new IdentityCode, + notifyErrors: Boolean = false, + ) + +object Code { + def fromString(s: Option[String]): Code = fromString(s.getOrElse("none")) + def fromString(s: String): Code = s.toLowerCase match { + case "none" => new IdentityCode + case "identity" => new IdentityCode + case "parity" => new ParityCode + case "sec" => new SECCode + case "secded" => new SECDEDCode + case _ => throw new IllegalArgumentException("Unknown ECC type") + } +} \ No newline at end of file diff --git a/rocketv/src/FPU.scala b/rocketv/src/FPU.scala new file mode 100644 index 000000000..c980908d4 --- /dev/null +++ b/rocketv/src/FPU.scala @@ -0,0 +1,889 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.experimental.hierarchy.{Instance, Instantiate, instantiable} +import chisel3.experimental.{BaseModule, SerializableModule, SerializableModuleParameter} +import chisel3.util._ +import chisel3.util.circt.ClockGate + +object FPUParameter { + implicit def rwP: upickle.default.ReadWriter[FPUParameter] = upickle.default.macroRW[FPUParameter] +} + +case class FPUParameter( + useAsyncReset: Boolean, + useClockGating: Boolean, + xLen: Int, + fLen: Int, + minFLen: Int, + sfmaLatency: Int, + dfmaLatency: Int, + divSqrt: Boolean, + hartIdLen: Int) + extends SerializableModuleParameter + +class FPUInterface(parameter: FPUParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + val core = new FPUCoreIO(parameter.hartIdLen, parameter.xLen, parameter.fLen) + val cp_req = Flipped(Decoupled(new FPInput(parameter.fLen))) //cp doesn't pay attn to kill sigs + val cp_resp = Decoupled(new FPResult(parameter.fLen)) +} + +// TODO: all hardfloat module can be replaced by DWBB? +@instantiable +class FPU(val parameter: FPUParameter) + extends FixedIORawModule(new FPUInterface(parameter)) + with SerializableModule[FPUParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + val helper = new FPUHelper(parameter.minFLen, parameter.minFLen, parameter.xLen) + val typeTagWbOffset = helper.typeTagWbOffset + def recode(x: UInt, tag: UInt): UInt = helper.recode(x, tag) + def consistent(x: UInt): Bool = helper.consistent(x) + def unbox(x: UInt, tag: UInt, exactType: Option[FType]): UInt = helper.unbox(x, tag, exactType) + def box(x: UInt, tag: UInt) = helper.box(x, tag) + def typeTag(t: FType) = helper.typeTag(t) + def sanitizeNaN(x: UInt, t: FType) = helper.sanitizeNaN(x, t) + def maxType = helper.maxType + val fLen = parameter.fLen + val minFLen = parameter.minFLen + val floatTypes = helper.floatTypes + val S = helper.S + val D = helper.D + val H = helper.H + object cfg { + val sfmaLatency = parameter.sfmaLatency + val dfmaLatency = parameter.dfmaLatency + val divSqrt = parameter.divSqrt + } + + val useClockGating = parameter.useClockGating + val clock_en_reg = Reg(Bool()) + val clock_en = clock_en_reg || io.cp_req.valid + val gated_clock = + if (!useClockGating) io.clock + else ClockGate(io.clock, clock_en) + + // TODO: remove me. + val fp_decoder = Module(new FPUDecoder(parameter)) + fp_decoder.io.inst := io.core.inst + val id_ctrl = fp_decoder.io.sigs + + val ex_reg_valid = RegNext(io.core.valid, false.B) + val ex_reg_inst = RegEnable(io.core.inst, io.core.valid) + val ex_reg_ctrl = RegEnable(id_ctrl, io.core.valid) + val ex_ra = List.fill(3)(Reg(UInt())) + + // load response + val load_wb = RegNext(io.core.dmem_resp_val) + val load_wb_typeTag = RegEnable(io.core.dmem_resp_type(1, 0) - typeTagWbOffset, io.core.dmem_resp_val) + val load_wb_data = RegEnable(io.core.dmem_resp_data, io.core.dmem_resp_val) + val load_wb_tag = RegEnable(io.core.dmem_resp_tag, io.core.dmem_resp_val) + + class FPUImpl { // entering gated-clock domain + + val req_valid = ex_reg_valid || io.cp_req.valid + val ex_cp_valid = io.cp_req.fire + val mem_cp_valid = RegNext(ex_cp_valid, false.B) + val wb_cp_valid = RegNext(mem_cp_valid, false.B) + val mem_reg_valid = RegInit(false.B) + val killm = (io.core.killm || io.core.nack_mem) && !mem_cp_valid + // Kill X-stage instruction if M-stage is killed. This prevents it from + // speculatively being sent to the div-sqrt unit, which can cause priority + // inversion for two back-to-back divides, the first of which is killed. + val killx = io.core.killx || mem_reg_valid && killm + mem_reg_valid := ex_reg_valid && !killx || ex_cp_valid + val mem_reg_inst = RegEnable(ex_reg_inst, ex_reg_valid) + val wb_reg_valid = RegNext(mem_reg_valid && (!killm || mem_cp_valid), false.B) + + val cp_ctrl = Wire(new FPUCtrlSigs) + cp_ctrl :<>= io.cp_req.bits.fpuControl + io.cp_resp.valid := false.B + io.cp_resp.bits.data := 0.U + io.cp_resp.bits.exc := DontCare + + val ex_ctrl = Mux(ex_cp_valid, cp_ctrl, ex_reg_ctrl) + val mem_ctrl = RegEnable(ex_ctrl, req_valid) + val wb_ctrl = RegEnable(mem_ctrl, mem_reg_valid) + + // regfile + val regfile = Mem(32, Bits((fLen + 1).W)) + when(load_wb) { + val wdata = recode(load_wb_data, load_wb_typeTag) + regfile(load_wb_tag) := wdata + assert(consistent(wdata)) + } + + val ex_rs = ex_ra.map(a => regfile(a)) + when(io.core.valid) { + when(id_ctrl.ren1) { + when(!id_ctrl.swap12) { ex_ra(0) := io.core.inst(19, 15) } + when(id_ctrl.swap12) { ex_ra(1) := io.core.inst(19, 15) } + } + when(id_ctrl.ren2) { + when(id_ctrl.swap12) { ex_ra(0) := io.core.inst(24, 20) } + when(id_ctrl.swap23) { ex_ra(2) := io.core.inst(24, 20) } + when(!id_ctrl.swap12 && !id_ctrl.swap23) { ex_ra(1) := io.core.inst(24, 20) } + } + when(id_ctrl.ren3) { ex_ra(2) := io.core.inst(31, 27) } + } + val ex_rm = Mux(ex_reg_inst(14, 12) === 7.U, io.core.fcsr_rm, ex_reg_inst(14, 12)) + + def fuInput(minT: Option[FType]): FPInput = { + val req = Wire(new FPInput(fLen)) + val tag = ex_ctrl.typeTagIn + req.fpuControl :#= ex_ctrl + req.rm := ex_rm + req.in1 := unbox(ex_rs(0), tag, minT) + req.in2 := unbox(ex_rs(1), tag, minT) + req.in3 := unbox(ex_rs(2), tag, minT) + req.typ := ex_reg_inst(21, 20) + req.fmt := ex_reg_inst(26, 25) + req.fmaCmd := ex_reg_inst(3, 2) | (!ex_ctrl.ren3 && ex_reg_inst(27)) + when(ex_cp_valid) { + req := io.cp_req.bits + when(io.cp_req.bits.fpuControl.swap23) { + req.in2 := io.cp_req.bits.in3 + req.in3 := io.cp_req.bits.in2 + } + } + req + } + + val sfma = Instantiate( + new FPUFMAPipe( + FPUFMAPipeParameter( + parameter.useAsyncReset, + parameter.sfmaLatency, + parameter.xLen, + parameter.fLen, + parameter.minFLen, + FType.S + ) + ) + ) + sfma.io.clock := io.clock + sfma.io.reset := io.reset + sfma.io.in.valid := req_valid && ex_ctrl.fma && ex_ctrl.typeTagOut === S + sfma.io.in.bits := fuInput(Some(FType.S /*sfma.t*/ )) + + val fpiu = Instantiate( + new FPToInt( + FPToIntParameter( + parameter.useAsyncReset, + parameter.xLen, + parameter.fLen, + parameter.minFLen + ) + ) + ) + fpiu.io.clock := io.clock + fpiu.io.reset := io.reset + fpiu.io.in.valid := req_valid && (ex_ctrl.toint || ex_ctrl.div || ex_ctrl.sqrt || (ex_ctrl.fastpipe && ex_ctrl.wflags)) + fpiu.io.in.bits := fuInput(None) + io.core.store_data := fpiu.io.out.bits.store + io.core.toint_data := fpiu.io.out.bits.toint + when(fpiu.io.out.valid && mem_cp_valid && mem_ctrl.toint) { + io.cp_resp.bits.data := fpiu.io.out.bits.toint + io.cp_resp.valid := true.B + } + + val ifpu = Instantiate( + new IntToFP( + IntToFPParameter( + parameter.useAsyncReset, + 2, + parameter.fLen, + parameter.xLen, + parameter.minFLen + ) + ) + ) + ifpu.io.clock := io.clock + ifpu.io.reset := io.reset + ifpu.io.in.valid := req_valid && ex_ctrl.fromint + ifpu.io.in.bits := fpiu.io.in.bits + ifpu.io.in.bits.in1 := Mux(ex_cp_valid, io.cp_req.bits.in1, io.core.fromint_data) + + val fpmu = Instantiate( + new FPToFP( + FPToFPParameter( + parameter.useAsyncReset, + 2, + parameter.xLen, + parameter.fLen, + parameter.minFLen + ) + ) + ) + fpmu.io.clock := io.clock + fpmu.io.reset := io.reset + fpmu.io.in.valid := req_valid && ex_ctrl.fastpipe + fpmu.io.in.bits := fpiu.io.in.bits + fpmu.io.lt := fpiu.io.out.bits.lt + + val divSqrt_wen = WireDefault(false.B) + val divSqrt_inFlight = WireDefault(false.B) + val divSqrt_waddr = Reg(UInt(5.W)) + val divSqrt_typeTag = Wire(UInt(log2Ceil(floatTypes.size).W)) + val divSqrt_wdata = Wire(UInt((parameter.fLen + 1).W)) + val divSqrt_flags = Wire(UInt(FPConstants.FLAGS_SZ.W)) + divSqrt_typeTag := DontCare + divSqrt_wdata := DontCare + divSqrt_flags := DontCare + // writeback arbitration + case class Pipe[T <: BaseModule](p: Instance[T], lat: Int, cond: (FPUCtrlSigs) => Bool, res: FPResult) + val dfma = Option.when(fLen > 32)( + Instantiate( + new FPUFMAPipe( + FPUFMAPipeParameter( + parameter.useAsyncReset, + parameter.dfmaLatency, + parameter.xLen, + parameter.fLen, + parameter.minFLen, + FType.D + ) + ) + ) + ) + val hfma = Option.when(minFLen == 16)( + Instantiate( + new FPUFMAPipe( + FPUFMAPipeParameter( + parameter.useAsyncReset, + parameter.sfmaLatency, + parameter.xLen, + parameter.fLen, + parameter.minFLen, + FType.H + ) + ) + ) + ) + dfma.foreach { dfma => + dfma.io.clock := io.clock + dfma.io.reset := io.reset + dfma.io.in.valid := req_valid && ex_ctrl.fma && ex_ctrl.typeTagOut === D + dfma.io.in.bits := fuInput(Some(FType.D /*dfma.t*/ )) + } + hfma.foreach { hfma => + hfma.io.clock := io.clock + hfma.io.reset := io.reset + hfma.io.in.valid := req_valid && ex_ctrl.fma && ex_ctrl.typeTagOut === H + hfma.io.in.bits := fuInput(Some(FType.H /*hfma.t*/ )) + } + val pipes = List( + Pipe(fpmu, 2, (c: FPUCtrlSigs) => c.fastpipe, fpmu.io.out.bits), + Pipe(ifpu, 2, (c: FPUCtrlSigs) => c.fromint, ifpu.io.out.bits), + Pipe(sfma, cfg.sfmaLatency, (c: FPUCtrlSigs) => c.fma && c.typeTagOut === S, sfma.io.out.bits) + ) ++ + dfma.map(dfma => + Pipe(dfma, cfg.dfmaLatency, (c: FPUCtrlSigs) => c.fma && c.typeTagOut === D, dfma.io.out.bits) + ) ++ + hfma.map(hfma => Pipe(hfma, cfg.sfmaLatency, (c: FPUCtrlSigs) => c.fma && c.typeTagOut === H, hfma.io.out.bits)) + def latencyMask(c: FPUCtrlSigs, offset: Int) = { + require(pipes.forall(_.lat >= offset)) + pipes.map(p => Mux(p.cond(c), (1 << p.lat - offset).U, 0.U)).reduce(_ | _) + } + def pipeid(c: FPUCtrlSigs) = pipes.zipWithIndex.map(p => Mux(p._1.cond(c), p._2.U, 0.U)).reduce(_ | _) + val maxLatency = pipes.map(_.lat).max + val memLatencyMask = latencyMask(mem_ctrl, 2) + + class WBInfo extends Bundle { + val rd = UInt(5.W) + val typeTag = UInt(log2Ceil(floatTypes.size).W) + val cp = Bool() + val pipeid = UInt(log2Ceil(pipes.size).W) + } + + val wen = RegInit(0.U((maxLatency - 1).W)) + val wbInfo = Reg(Vec(maxLatency - 1, new WBInfo)) + val mem_wen = mem_reg_valid && (mem_ctrl.fma || mem_ctrl.fastpipe || mem_ctrl.fromint) + val write_port_busy = RegEnable( + mem_wen && (memLatencyMask & latencyMask(ex_ctrl, 1)).orR || (wen & latencyMask(ex_ctrl, 0)).orR, + req_valid + ) + + for (i <- 0 until maxLatency - 2) { + when(wen(i + 1)) { wbInfo(i) := wbInfo(i + 1) } + } + wen := wen >> 1 + when(mem_wen) { + when(!killm) { + wen := wen >> 1 | memLatencyMask + } + for (i <- 0 until maxLatency - 1) { + when(!write_port_busy && memLatencyMask(i)) { + wbInfo(i).cp := mem_cp_valid + wbInfo(i).typeTag := mem_ctrl.typeTagOut + wbInfo(i).pipeid := pipeid(mem_ctrl) + wbInfo(i).rd := mem_reg_inst(11, 7) + } + } + } + + val waddr = Mux(divSqrt_wen, divSqrt_waddr, wbInfo(0).rd) + val wtypeTag = Mux(divSqrt_wen, divSqrt_typeTag, wbInfo(0).typeTag) + val wdata = box(Mux(divSqrt_wen, divSqrt_wdata, VecInit(pipes.map(_.res.data))(wbInfo(0).pipeid)), wtypeTag) + val wexc = VecInit(pipes.map(_.res.exc))(wbInfo(0).pipeid) + when((!wbInfo(0).cp && wen(0)) || divSqrt_wen) { + assert(consistent(wdata)) + regfile(waddr) := wdata + } + + when(wbInfo(0).cp && wen(0)) { + io.cp_resp.bits.data := wdata + io.cp_resp.valid := true.B + } + io.cp_req.ready := !ex_reg_valid + + val wb_toint_valid = wb_reg_valid && wb_ctrl.toint + val wb_toint_exc = RegEnable(fpiu.io.out.bits.exc, mem_ctrl.toint) + io.core.fcsr_flags.valid := wb_toint_valid || divSqrt_wen || wen(0) + io.core.fcsr_flags.bits := + Mux(wb_toint_valid, wb_toint_exc, 0.U) | + Mux(divSqrt_wen, divSqrt_flags, 0.U) | + Mux(wen(0), wexc, 0.U) + + val divSqrt_write_port_busy = (mem_ctrl.div || mem_ctrl.sqrt) && wen.orR + io.core.fcsr_rdy := !(ex_reg_valid && ex_ctrl.wflags || mem_reg_valid && mem_ctrl.wflags || wb_reg_valid && wb_ctrl.toint || wen.orR || divSqrt_inFlight) + io.core.nack_mem := write_port_busy || divSqrt_write_port_busy || divSqrt_inFlight + io.core.dec <> fp_decoder.io.sigs + def useScoreboard(f: ((Pipe[_], Int)) => Bool) = + pipes.zipWithIndex.filter(_._1.lat > 3).map(x => f(x)).fold(false.B)(_ || _) + io.core.sboard_set := wb_reg_valid && !wb_cp_valid && RegNext( + useScoreboard(_._1.cond(mem_ctrl)) || mem_ctrl.div || mem_ctrl.sqrt + ) + io.core.sboard_clr := !wb_cp_valid && (divSqrt_wen || (wen(0) && useScoreboard(x => wbInfo(0).pipeid === x._2.U))) + io.core.sboard_clra := waddr + + def isOneOf(x: UInt, s: Seq[UInt]): Bool = VecInit(s.map(x === _)).asUInt.orR + // we don't currently support round-max-magnitude (rm=4) + io.core.illegal_rm := isOneOf(io.core.inst(14, 12), Seq(5.U, 6.U)) || io.core.inst( + 14, + 12 + ) === 7.U && io.core.fcsr_rm >= 5.U + + if (cfg.divSqrt) { + val divSqrt_inValid = mem_reg_valid && (mem_ctrl.div || mem_ctrl.sqrt) && !divSqrt_inFlight + val divSqrt_killed = RegNext(divSqrt_inValid && killm, true.B) + when(divSqrt_inValid) { + divSqrt_waddr := mem_reg_inst(11, 7) + } + + for (t <- floatTypes) { + val tag = mem_ctrl.typeTagOut + val divSqrt = withReset(divSqrt_killed) { Module(new hardfloat.DivSqrtRecFN_small(t.exp, t.sig, 0)) } + divSqrt.io.inValid := divSqrt_inValid && tag === typeTag(t).U + divSqrt.io.sqrtOp := mem_ctrl.sqrt + divSqrt.io.a := maxType.unsafeConvert(fpiu.io.out.bits.in.in1, t) + divSqrt.io.b := maxType.unsafeConvert(fpiu.io.out.bits.in.in2, t) + divSqrt.io.roundingMode := fpiu.io.out.bits.in.rm + divSqrt.io.detectTininess := hardfloat.consts.tininess_afterRounding + + when(!divSqrt.io.inReady) { divSqrt_inFlight := true.B } // only 1 in flight + + when(divSqrt.io.outValid_div || divSqrt.io.outValid_sqrt) { + divSqrt_wen := !divSqrt_killed + divSqrt_wdata := sanitizeNaN(divSqrt.io.out, t) + divSqrt_flags := divSqrt.io.exceptionFlags + divSqrt_typeTag := typeTag(t).U + } + } + + when(divSqrt_killed) { divSqrt_inFlight := false.B } + } else { + when(id_ctrl.div || id_ctrl.sqrt) { io.core.illegal_rm := true.B } + } + + // gate the clock + clock_en_reg := !useClockGating.B || + io.core.keep_clock_enabled || // chicken bit + io.core.valid || // ID stage + req_valid || // EX stage + mem_reg_valid || mem_cp_valid || // MEM stage + wb_reg_valid || wb_cp_valid || // WB stage + wen.orR || divSqrt_inFlight || // post-WB stage + io.core.dmem_resp_val // load writeback + + } // leaving gated-clock domain + val fpuImpl = withClockAndReset(gated_clock, io.reset) { new FPUImpl } +} + +class FPUDecoderInterface(parameter: FPUParameter) extends Bundle { + val inst = Input(UInt(32.W)) + val sigs = Output(new FPUCtrlSigs()) +} + +// TODO: we should eliminate this module and move it to CoreDecoder with optional fields. +class FPUDecoder(val parameter: FPUParameter) + extends FixedIORawModule(new FPUDecoderInterface(parameter)) + with SerializableModule[FPUParameter] { + val X2 = BitPat.dontCare(2) + val X = BitPat.dontCare(1) + val N = BitPat.N() + val Y = BitPat.N() + val helper = new FPUHelper(parameter.minFLen, parameter.minFLen, parameter.xLen) + // TODO: wtf here. + def H = BitPat(helper.H) + def I = BitPat(helper.I) + def D = BitPat(helper.D) + def S = BitPat(helper.S) + + def FADD_D = BitPat("b0000001??????????????????1010011") + def FADD_H = BitPat("b0000010??????????????????1010011") + def FADD_S = BitPat("b0000000??????????????????1010011") + def FCLASS_D = BitPat("b111000100000?????001?????1010011") + def FCLASS_H = BitPat("b111001000000?????001?????1010011") + def FCLASS_S = BitPat("b111000000000?????001?????1010011") + def FCVT_D_H = BitPat("b010000100010?????????????1010011") + def FCVT_D_L = BitPat("b110100100010?????????????1010011") + def FCVT_D_LU = BitPat("b110100100011?????????????1010011") + def FCVT_D_S = BitPat("b010000100000?????????????1010011") + def FCVT_D_W = BitPat("b110100100000?????????????1010011") + def FCVT_D_WU = BitPat("b110100100001?????????????1010011") + def FCVT_H_D = BitPat("b010001000001?????????????1010011") + def FCVT_H_L = BitPat("b110101000010?????????????1010011") + def FCVT_H_LU = BitPat("b110101000011?????????????1010011") + def FCVT_H_S = BitPat("b010001000000?????????????1010011") + def FCVT_H_W = BitPat("b110101000000?????????????1010011") + def FCVT_H_WU = BitPat("b110101000001?????????????1010011") + def FCVT_L_D = BitPat("b110000100010?????????????1010011") + def FCVT_L_H = BitPat("b110001000010?????????????1010011") + def FCVT_L_S = BitPat("b110000000010?????????????1010011") + def FCVT_LU_D = BitPat("b110000100011?????????????1010011") + def FCVT_LU_H = BitPat("b110001000011?????????????1010011") + def FCVT_LU_S = BitPat("b110000000011?????????????1010011") + def FCVT_S_D = BitPat("b010000000001?????????????1010011") + def FCVT_S_H = BitPat("b010000000010?????????????1010011") + def FCVT_S_L = BitPat("b110100000010?????????????1010011") + def FCVT_S_LU = BitPat("b110100000011?????????????1010011") + def FCVT_S_W = BitPat("b110100000000?????????????1010011") + def FCVT_S_WU = BitPat("b110100000001?????????????1010011") + def FCVT_W_D = BitPat("b110000100000?????????????1010011") + def FCVT_W_H = BitPat("b110001000000?????????????1010011") + def FCVT_W_S = BitPat("b110000000000?????????????1010011") + def FCVT_WU_D = BitPat("b110000100001?????????????1010011") + def FCVT_WU_H = BitPat("b110001000001?????????????1010011") + def FCVT_WU_S = BitPat("b110000000001?????????????1010011") + def FDIV_D = BitPat("b0001101??????????????????1010011") + def FDIV_H = BitPat("b0001110??????????????????1010011") + def FDIV_S = BitPat("b0001100??????????????????1010011") + def FEQ_D = BitPat("b1010001??????????010?????1010011") + def FEQ_H = BitPat("b1010010??????????010?????1010011") + def FEQ_S = BitPat("b1010000??????????010?????1010011") + def FLD = BitPat("b?????????????????011?????0000111") + def FLE_D = BitPat("b1010001??????????000?????1010011") + def FLE_H = BitPat("b1010010??????????000?????1010011") + def FLE_S = BitPat("b1010000??????????000?????1010011") + def FLH = BitPat("b?????????????????001?????0000111") + def FLT_D = BitPat("b1010001??????????001?????1010011") + def FLT_H = BitPat("b1010010??????????001?????1010011") + def FLT_S = BitPat("b1010000??????????001?????1010011") + def FLW = BitPat("b?????????????????010?????0000111") + def FMADD_D = BitPat("b?????01??????????????????1000011") + def FMADD_H = BitPat("b?????10??????????????????1000011") + def FMADD_S = BitPat("b?????00??????????????????1000011") + def FMAX_D = BitPat("b0010101??????????001?????1010011") + def FMAX_H = BitPat("b0010110??????????001?????1010011") + def FMAX_S = BitPat("b0010100??????????001?????1010011") + def FMIN_D = BitPat("b0010101??????????000?????1010011") + def FMIN_H = BitPat("b0010110??????????000?????1010011") + def FMIN_S = BitPat("b0010100??????????000?????1010011") + def FMSUB_D = BitPat("b?????01??????????????????1000111") + def FMSUB_H = BitPat("b?????10??????????????????1000111") + def FMSUB_S = BitPat("b?????00??????????????????1000111") + def FMUL_D = BitPat("b0001001??????????????????1010011") + def FMUL_H = BitPat("b0001010??????????????????1010011") + def FMUL_S = BitPat("b0001000??????????????????1010011") + def FMV_D_X = BitPat("b111100100000?????000?????1010011") + def FMV_H_X = BitPat("b111101000000?????000?????1010011") + def FMV_W_X = BitPat("b111100000000?????000?????1010011") + def FMV_X_D = BitPat("b111000100000?????000?????1010011") + def FMV_X_H = BitPat("b111001000000?????000?????1010011") + def FMV_X_W = BitPat("b111000000000?????000?????1010011") + def FNMADD_D = BitPat("b?????01??????????????????1001111") + def FNMADD_H = BitPat("b?????10??????????????????1001111") + def FNMADD_S = BitPat("b?????00??????????????????1001111") + def FNMSUB_D = BitPat("b?????01??????????????????1001011") + def FNMSUB_H = BitPat("b?????10??????????????????1001011") + def FNMSUB_S = BitPat("b?????00??????????????????1001011") + def FSD = BitPat("b?????????????????011?????0100111") + def FSGNJ_D = BitPat("b0010001??????????000?????1010011") + def FSGNJ_H = BitPat("b0010010??????????000?????1010011") + def FSGNJ_S = BitPat("b0010000??????????000?????1010011") + def FSGNJN_D = BitPat("b0010001??????????001?????1010011") + def FSGNJN_H = BitPat("b0010010??????????001?????1010011") + def FSGNJN_S = BitPat("b0010000??????????001?????1010011") + def FSGNJX_D = BitPat("b0010001??????????010?????1010011") + def FSGNJX_H = BitPat("b0010010??????????010?????1010011") + def FSGNJX_S = BitPat("b0010000??????????010?????1010011") + def FSH = BitPat("b?????????????????001?????0100111") + def FSQRT_D = BitPat("b010110100000?????????????1010011") + def FSQRT_H = BitPat("b010111000000?????????????1010011") + def FSQRT_S = BitPat("b010110000000?????????????1010011") + def FSUB_D = BitPat("b0000101??????????????????1010011") + def FSUB_H = BitPat("b0000110??????????????????1010011") + def FSUB_S = BitPat("b0000100??????????????????1010011") + def FSW = BitPat("b?????????????????010?????0100111") + + val default = List(X, X, X, X, X, X, X, X2, X2, X, X, X, X, X, X, X) + val h: Array[(BitPat, List[BitPat])] = + Array( + FLH -> List(Y, Y, N, N, N, X, X, X2, X2, N, N, N, N, N, N, N), + FSH -> List(Y, N, N, Y, N, Y, X, I, H, N, Y, N, N, N, N, N), + FMV_H_X -> List(N, Y, N, N, N, X, X, H, I, Y, N, N, N, N, N, N), + FCVT_H_W -> List(N, Y, N, N, N, X, X, H, H, Y, N, N, N, N, N, Y), + FCVT_H_WU -> List(N, Y, N, N, N, X, X, H, H, Y, N, N, N, N, N, Y), + FCVT_H_L -> List(N, Y, N, N, N, X, X, H, H, Y, N, N, N, N, N, Y), + FCVT_H_LU -> List(N, Y, N, N, N, X, X, H, H, Y, N, N, N, N, N, Y), + FMV_X_H -> List(N, N, Y, N, N, N, X, I, H, N, Y, N, N, N, N, N), + FCLASS_H -> List(N, N, Y, N, N, N, X, H, H, N, Y, N, N, N, N, N), + FCVT_W_H -> List(N, N, Y, N, N, N, X, H, X2, N, Y, N, N, N, N, Y), + FCVT_WU_H -> List(N, N, Y, N, N, N, X, H, X2, N, Y, N, N, N, N, Y), + FCVT_L_H -> List(N, N, Y, N, N, N, X, H, X2, N, Y, N, N, N, N, Y), + FCVT_LU_H -> List(N, N, Y, N, N, N, X, H, X2, N, Y, N, N, N, N, Y), + FCVT_S_H -> List(N, Y, Y, N, N, N, X, H, S, N, N, Y, N, N, N, Y), + FCVT_H_S -> List(N, Y, Y, N, N, N, X, S, H, N, N, Y, N, N, N, Y), + FEQ_H -> List(N, N, Y, Y, N, N, N, H, H, N, Y, N, N, N, N, Y), + FLT_H -> List(N, N, Y, Y, N, N, N, H, H, N, Y, N, N, N, N, Y), + FLE_H -> List(N, N, Y, Y, N, N, N, H, H, N, Y, N, N, N, N, Y), + FSGNJ_H -> List(N, Y, Y, Y, N, N, N, H, H, N, N, Y, N, N, N, N), + FSGNJN_H -> List(N, Y, Y, Y, N, N, N, H, H, N, N, Y, N, N, N, N), + FSGNJX_H -> List(N, Y, Y, Y, N, N, N, H, H, N, N, Y, N, N, N, N), + FMIN_H -> List(N, Y, Y, Y, N, N, N, H, H, N, N, Y, N, N, N, Y), + FMAX_H -> List(N, Y, Y, Y, N, N, N, H, H, N, N, Y, N, N, N, Y), + FADD_H -> List(N, Y, Y, Y, N, N, Y, H, H, N, N, N, Y, N, N, Y), + FSUB_H -> List(N, Y, Y, Y, N, N, Y, H, H, N, N, N, Y, N, N, Y), + FMUL_H -> List(N, Y, Y, Y, N, N, N, H, H, N, N, N, Y, N, N, Y), + FMADD_H -> List(N, Y, Y, Y, Y, N, N, H, H, N, N, N, Y, N, N, Y), + FMSUB_H -> List(N, Y, Y, Y, Y, N, N, H, H, N, N, N, Y, N, N, Y), + FNMADD_H -> List(N, Y, Y, Y, Y, N, N, H, H, N, N, N, Y, N, N, Y), + FNMSUB_H -> List(N, Y, Y, Y, Y, N, N, H, H, N, N, N, Y, N, N, Y), + FDIV_H -> List(N, Y, Y, Y, N, N, N, H, H, N, N, N, N, Y, N, Y), + FSQRT_H -> List(N, Y, Y, N, N, N, X, H, H, N, N, N, N, N, Y, Y) + ) + val f: Array[(BitPat, List[BitPat])] = + Array( + FLW -> List(Y, Y, N, N, N, X, X, X2, X2, N, N, N, N, N, N, N), + FSW -> List(Y, N, N, Y, N, Y, X, I, S, N, Y, N, N, N, N, N), + FMV_W_X -> List(N, Y, N, N, N, X, X, S, I, Y, N, N, N, N, N, N), + FCVT_S_W -> List(N, Y, N, N, N, X, X, S, S, Y, N, N, N, N, N, Y), + FCVT_S_WU -> List(N, Y, N, N, N, X, X, S, S, Y, N, N, N, N, N, Y), + FCVT_S_L -> List(N, Y, N, N, N, X, X, S, S, Y, N, N, N, N, N, Y), + FCVT_S_LU -> List(N, Y, N, N, N, X, X, S, S, Y, N, N, N, N, N, Y), + FMV_X_W -> List(N, N, Y, N, N, N, X, I, S, N, Y, N, N, N, N, N), + FCLASS_S -> List(N, N, Y, N, N, N, X, S, S, N, Y, N, N, N, N, N), + FCVT_W_S -> List(N, N, Y, N, N, N, X, S, X2, N, Y, N, N, N, N, Y), + FCVT_WU_S -> List(N, N, Y, N, N, N, X, S, X2, N, Y, N, N, N, N, Y), + FCVT_L_S -> List(N, N, Y, N, N, N, X, S, X2, N, Y, N, N, N, N, Y), + FCVT_LU_S -> List(N, N, Y, N, N, N, X, S, X2, N, Y, N, N, N, N, Y), + FEQ_S -> List(N, N, Y, Y, N, N, N, S, S, N, Y, N, N, N, N, Y), + FLT_S -> List(N, N, Y, Y, N, N, N, S, S, N, Y, N, N, N, N, Y), + FLE_S -> List(N, N, Y, Y, N, N, N, S, S, N, Y, N, N, N, N, Y), + FSGNJ_S -> List(N, Y, Y, Y, N, N, N, S, S, N, N, Y, N, N, N, N), + FSGNJN_S -> List(N, Y, Y, Y, N, N, N, S, S, N, N, Y, N, N, N, N), + FSGNJX_S -> List(N, Y, Y, Y, N, N, N, S, S, N, N, Y, N, N, N, N), + FMIN_S -> List(N, Y, Y, Y, N, N, N, S, S, N, N, Y, N, N, N, Y), + FMAX_S -> List(N, Y, Y, Y, N, N, N, S, S, N, N, Y, N, N, N, Y), + FADD_S -> List(N, Y, Y, Y, N, N, Y, S, S, N, N, N, Y, N, N, Y), + FSUB_S -> List(N, Y, Y, Y, N, N, Y, S, S, N, N, N, Y, N, N, Y), + FMUL_S -> List(N, Y, Y, Y, N, N, N, S, S, N, N, N, Y, N, N, Y), + FMADD_S -> List(N, Y, Y, Y, Y, N, N, S, S, N, N, N, Y, N, N, Y), + FMSUB_S -> List(N, Y, Y, Y, Y, N, N, S, S, N, N, N, Y, N, N, Y), + FNMADD_S -> List(N, Y, Y, Y, Y, N, N, S, S, N, N, N, Y, N, N, Y), + FNMSUB_S -> List(N, Y, Y, Y, Y, N, N, S, S, N, N, N, Y, N, N, Y), + FDIV_S -> List(N, Y, Y, Y, N, N, N, S, S, N, N, N, N, Y, N, Y), + FSQRT_S -> List(N, Y, Y, N, N, N, X, S, S, N, N, N, N, N, Y, Y) + ) + val d: Array[(BitPat, List[BitPat])] = + Array( + FLD -> List(Y, Y, N, N, N, X, X, X2, X2, N, N, N, N, N, N, N), + FSD -> List(Y, N, N, Y, N, Y, X, I, D, N, Y, N, N, N, N, N), + FMV_D_X -> List(N, Y, N, N, N, X, X, D, I, Y, N, N, N, N, N, N), + FCVT_D_W -> List(N, Y, N, N, N, X, X, D, D, Y, N, N, N, N, N, Y), + FCVT_D_WU -> List(N, Y, N, N, N, X, X, D, D, Y, N, N, N, N, N, Y), + FCVT_D_L -> List(N, Y, N, N, N, X, X, D, D, Y, N, N, N, N, N, Y), + FCVT_D_LU -> List(N, Y, N, N, N, X, X, D, D, Y, N, N, N, N, N, Y), + FMV_X_D -> List(N, N, Y, N, N, N, X, I, D, N, Y, N, N, N, N, N), + FCLASS_D -> List(N, N, Y, N, N, N, X, D, D, N, Y, N, N, N, N, N), + FCVT_W_D -> List(N, N, Y, N, N, N, X, D, X2, N, Y, N, N, N, N, Y), + FCVT_WU_D -> List(N, N, Y, N, N, N, X, D, X2, N, Y, N, N, N, N, Y), + FCVT_L_D -> List(N, N, Y, N, N, N, X, D, X2, N, Y, N, N, N, N, Y), + FCVT_LU_D -> List(N, N, Y, N, N, N, X, D, X2, N, Y, N, N, N, N, Y), + FCVT_S_D -> List(N, Y, Y, N, N, N, X, D, S, N, N, Y, N, N, N, Y), + FCVT_D_S -> List(N, Y, Y, N, N, N, X, S, D, N, N, Y, N, N, N, Y), + FEQ_D -> List(N, N, Y, Y, N, N, N, D, D, N, Y, N, N, N, N, Y), + FLT_D -> List(N, N, Y, Y, N, N, N, D, D, N, Y, N, N, N, N, Y), + FLE_D -> List(N, N, Y, Y, N, N, N, D, D, N, Y, N, N, N, N, Y), + FSGNJ_D -> List(N, Y, Y, Y, N, N, N, D, D, N, N, Y, N, N, N, N), + FSGNJN_D -> List(N, Y, Y, Y, N, N, N, D, D, N, N, Y, N, N, N, N), + FSGNJX_D -> List(N, Y, Y, Y, N, N, N, D, D, N, N, Y, N, N, N, N), + FMIN_D -> List(N, Y, Y, Y, N, N, N, D, D, N, N, Y, N, N, N, Y), + FMAX_D -> List(N, Y, Y, Y, N, N, N, D, D, N, N, Y, N, N, N, Y), + FADD_D -> List(N, Y, Y, Y, N, N, Y, D, D, N, N, N, Y, N, N, Y), + FSUB_D -> List(N, Y, Y, Y, N, N, Y, D, D, N, N, N, Y, N, N, Y), + FMUL_D -> List(N, Y, Y, Y, N, N, N, D, D, N, N, N, Y, N, N, Y), + FMADD_D -> List(N, Y, Y, Y, Y, N, N, D, D, N, N, N, Y, N, N, Y), + FMSUB_D -> List(N, Y, Y, Y, Y, N, N, D, D, N, N, N, Y, N, N, Y), + FNMADD_D -> List(N, Y, Y, Y, Y, N, N, D, D, N, N, N, Y, N, N, Y), + FNMSUB_D -> List(N, Y, Y, Y, Y, N, N, D, D, N, N, N, Y, N, N, Y), + FDIV_D -> List(N, Y, Y, Y, N, N, N, D, D, N, N, N, N, Y, N, Y), + FSQRT_D -> List(N, Y, Y, N, N, N, X, D, D, N, N, N, N, N, Y, Y) + ) + val fcvt_hd: Array[(BitPat, List[BitPat])] = + Array( + FCVT_H_D -> List(N, Y, Y, N, N, N, X, D, H, N, N, Y, N, N, N, Y), + FCVT_D_H -> List(N, Y, Y, N, N, N, X, H, D, N, N, Y, N, N, N, Y) + ) + + val insns = (parameter.minFLen, parameter.fLen) match { + case (32, 32) => f + case (16, 32) => h ++ f + case (32, 64) => f ++ d + case (16, 64) => h ++ f ++ d ++ fcvt_hd + case other => + throw new Exception(s"minFLen = ${parameter.minFLen} & fLen = ${parameter.fLen} is an unsupported configuration") + } + val decoder = DecodeLogic(io.inst, default, insns) + val s = io.sigs + val sigs = Seq( + s.ldst, + s.wen, + s.ren1, + s.ren2, + s.ren3, + s.swap12, + s.swap23, + s.typeTagIn, + s.typeTagOut, + s.fromint, + s.toint, + s.fastpipe, + s.fma, + s.div, + s.sqrt, + s.wflags + ) + sigs.zip(decoder).foreach { case (s, d) => s := d } +} + +object FType { + implicit def rwP: upickle.default.ReadWriter[FType] = upickle.default.macroRW[FType] + + val H = FType(5, 11) + val S = FType(8, 24) + val D = FType(11, 53) + + val all = List(H, S, D) +} + +case class FType(exp: Int, sig: Int) { + def ieeeWidth = exp + sig + def recodedWidth = ieeeWidth + 1 + + def ieeeQNaN = ((BigInt(1) << (ieeeWidth - 1)) - (BigInt(1) << (sig - 2))).U(ieeeWidth.W) + def qNaN = ((BigInt(7) << (exp + sig - 3)) + (BigInt(1) << (sig - 2))).U(recodedWidth.W) + def isNaN(x: UInt) = x(sig + exp - 1, sig + exp - 3).andR + def isSNaN(x: UInt) = isNaN(x) && !x(sig - 2) + + def classify(x: UInt) = { + val sign = x(sig + exp) + val code = x(exp + sig - 1, exp + sig - 3) + val codeHi = code(2, 1) + val isSpecial = codeHi === 3.U + + val isHighSubnormalIn = x(exp + sig - 3, sig - 1) < 2.U + val isSubnormal = code === 1.U || codeHi === 1.U && isHighSubnormalIn + val isNormal = codeHi === 1.U && !isHighSubnormalIn || codeHi === 2.U + val isZero = code === 0.U + val isInf = isSpecial && !code(0) + val isNaN = code.andR + val isSNaN = isNaN && !x(sig - 2) + val isQNaN = isNaN && x(sig - 2) + + Cat( + isQNaN, + isSNaN, + isInf && !sign, + isNormal && !sign, + isSubnormal && !sign, + isZero && !sign, + isZero && sign, + isSubnormal && sign, + isNormal && sign, + isInf && sign + ) + } + + // convert between formats, ignoring rounding, range, NaN + def unsafeConvert(x: UInt, to: FType) = if (this == to) x + else { + val sign = x(sig + exp) + val fractIn = x(sig - 2, 0) + val expIn = x(sig + exp - 1, sig - 1) + val fractOut = fractIn << to.sig >> sig + val expOut = { + val expCode = expIn(exp, exp - 2) + val commonCase = (expIn + (1 << to.exp).U) - (1 << exp).U + Mux(expCode === 0.U || expCode >= 6.U, Cat(expCode, commonCase(to.exp - 3, 0)), commonCase(to.exp, 0)) + } + Cat(sign, expOut, fractOut) + } + + private def ieeeBundle = { + val expWidth = exp + class IEEEBundle extends Bundle { + val sign = Bool() + val exp = UInt(expWidth.W) + val sig = UInt((ieeeWidth - expWidth - 1).W) + } + new IEEEBundle + } + + def unpackIEEE(x: UInt) = x.asTypeOf(ieeeBundle) + + def recode(x: UInt) = hardfloat.recFNFromFN(exp, sig, x) + def ieee(x: UInt) = hardfloat.fNFromRecFN(exp, sig, x) +} + +// TODO: migrate into FPUParameter +class FPUHelper(minFLen: Int, fLen: Int, xLen: Int) { + require(fLen == 0 || FType.all.exists(_.ieeeWidth == fLen)) + val minXLen = 32 + val nIntTypes = log2Ceil(xLen / minXLen) + 1 + def floatTypes = FType.all.filter(t => minFLen <= t.ieeeWidth && t.ieeeWidth <= fLen) + def minType = floatTypes.head + def maxType = floatTypes.last + def prevType(t: FType) = floatTypes(typeTag(t) - 1) + def maxExpWidth = maxType.exp + def maxSigWidth = maxType.sig + def typeTag(t: FType) = floatTypes.indexOf(t) + def typeTagWbOffset = (FType.all.indexOf(minType) + 1).U + def typeTagGroup(t: FType) = (if (floatTypes.contains(t)) typeTag(t) else typeTag(maxType)).U + // typeTag + def H = typeTagGroup(FType.H) + def S = typeTagGroup(FType.S) + def D = typeTagGroup(FType.D) + def I = typeTag(maxType).U + + private def isBox(x: UInt, t: FType): Bool = x(t.sig + t.exp, t.sig + t.exp - 4).andR + + private def box(x: UInt, xt: FType, y: UInt, yt: FType): UInt = { + require(xt.ieeeWidth == 2 * yt.ieeeWidth) + val swizzledNaN = Cat( + x(xt.sig + xt.exp, xt.sig + xt.exp - 3), + x(xt.sig - 2, yt.recodedWidth - 1).andR, + x(xt.sig + xt.exp - 5, xt.sig), + y(yt.recodedWidth - 2), + x(xt.sig - 2, yt.recodedWidth - 1), + y(yt.recodedWidth - 1), + y(yt.recodedWidth - 3, 0) + ) + Mux(xt.isNaN(x), swizzledNaN, x) + } + + // implement NaN unboxing for FU inputs + def unbox(x: UInt, tag: UInt, exactType: Option[FType]): UInt = { + val outType = exactType.getOrElse(maxType) + def helper(x: UInt, t: FType): Seq[(Bool, UInt)] = { + val prev = + if (t == minType) { + Seq() + } else { + val prevT = prevType(t) + val unswizzled = Cat(x(prevT.sig + prevT.exp - 1), x(t.sig - 1), x(prevT.sig + prevT.exp - 2, 0)) + val prev = helper(unswizzled, prevT) + val isbox = isBox(x, t) + prev.map(p => (isbox && p._1, p._2)) + } + prev :+ (true.B, t.unsafeConvert(x, outType)) + } + + val (oks, floats) = helper(x, maxType).unzip + if (exactType.isEmpty || floatTypes.size == 1) { + Mux(VecInit(oks)(tag), VecInit(floats)(tag), maxType.qNaN) + } else { + val t = exactType.get + floats(typeTag(t)) | Mux(oks(typeTag(t)), 0.U, t.qNaN) + } + } + + // make sure that the redundant bits in the NaN-boxed encoding are consistent + def consistent(x: UInt): Bool = { + def helper(x: UInt, t: FType): Bool = if (typeTag(t) == 0) true.B + else { + val prevT = prevType(t) + val unswizzled = Cat(x(prevT.sig + prevT.exp - 1), x(t.sig - 1), x(prevT.sig + prevT.exp - 2, 0)) + val prevOK = !isBox(x, t) || helper(unswizzled, prevT) + val curOK = !t.isNaN(x) || x(t.sig + t.exp - 4) === x(t.sig - 2, prevT.recodedWidth - 1).andR + prevOK && curOK + } + helper(x, maxType) + } + + // generate a NaN box from an FU result + def box(x: UInt, t: FType): UInt = { + if (t == maxType) { + x + } else { + val nt = floatTypes(typeTag(t) + 1) + val bigger = box(((BigInt(1) << nt.recodedWidth) - 1).U, nt, x, t) + bigger | ((BigInt(1) << maxType.recodedWidth) - (BigInt(1) << nt.recodedWidth)).U + } + } + + // generate a NaN box from an FU result + def box(x: UInt, tag: UInt): UInt = { + val opts = floatTypes.map(t => box(x, t)) + VecInit(opts)(tag) + } + + // zap bits that hardfloat thinks are don't-cares, but we do care about + def sanitizeNaN(x: UInt, t: FType): UInt = { + if (typeTag(t) == 0) { + x + } else { + val maskedNaN = x & ~((BigInt(1) << (t.sig - 1)) | (BigInt(1) << (t.sig + t.exp - 4))).U(t.recodedWidth.W) + Mux(t.isNaN(x), maskedNaN, x) + } + } + + // implement NaN boxing and recoding for FL*/fmv.*.x + def recode(x: UInt, tag: UInt): UInt = { + def helper(x: UInt, t: FType): UInt = { + if (typeTag(t) == 0) { + t.recode(x) + } else { + val prevT = prevType(t) + box(t.recode(x), t, helper(x, prevT), prevT) + } + } + + // fill MSBs of subword loads to emulate a wider load of a NaN-boxed value + val boxes = floatTypes.map(t => ((BigInt(1) << maxType.ieeeWidth) - (BigInt(1) << t.ieeeWidth)).U) + helper(VecInit(boxes)(tag) | x, maxType) + } + + // implement NaN unboxing and un-recoding for FS*/fmv.x.* + def ieee(x: UInt, t: FType = maxType): UInt = { + if (typeTag(t) == 0) { + t.ieee(x) + } else { + val unrecoded = t.ieee(x) + val prevT = prevType(t) + val prevRecoded = Cat(x(prevT.recodedWidth - 2), x(t.sig - 1), x(prevT.recodedWidth - 3, 0)) + val prevUnrecoded = ieee(prevRecoded, prevT) + Cat(unrecoded >> prevT.ieeeWidth, Mux(t.isNaN(x), prevUnrecoded, unrecoded(prevT.ieeeWidth - 1, 0))) + } + } +} diff --git a/rocketv/src/FetchQueue.scala b/rocketv/src/FetchQueue.scala new file mode 100644 index 000000000..c1ad35fc1 --- /dev/null +++ b/rocketv/src/FetchQueue.scala @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu + +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util._ + +object FetchQueueParameter { + implicit def rwP: upickle.default.ReadWriter[FetchQueueParameter] = upickle.default.macroRW[FetchQueueParameter] +} + +case class FetchQueueParameter( + useAsyncReset: Boolean, + entries: Int, + vaddrBits: Int, + respEntries: Int, + bhtHistoryLength: Option[Int], + bhtCounterLength: Option[Int], + vaddrBitsExtended: Int, + coreInstBits: Int, + fetchWidth: Int) + extends SerializableModuleParameter { + def gen = new FrontendResp( + vaddrBits, + respEntries, + bhtHistoryLength, + bhtCounterLength, + vaddrBitsExtended, + coreInstBits, + fetchWidth + ) +} + +class FetchQueueInterface(parameter: FetchQueueParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + val enq = Flipped(Decoupled(parameter.gen)) + val deq = Decoupled(parameter.gen) + val mask = Output(UInt(parameter.entries.W)) +} + +@instantiable +class FetchQueue(val parameter: FetchQueueParameter) + extends FixedIORawModule(new FetchQueueInterface(parameter)) + with SerializableModule[FetchQueueParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + private val valid = RegInit(VecInit(Seq.fill(parameter.entries) { false.B })) + private val elts = Reg(Vec(parameter.entries, parameter.gen)) + + for (i <- 0 until parameter.entries) { + def paddedValid(i: Int) = if (i == -1) true.B else if (i == parameter.entries) false.B else valid(i) + + val flow = true + val wdata = if (i == parameter.entries - 1) io.enq.bits else Mux(valid(i + 1), elts(i + 1), io.enq.bits) + val wen = + Mux( + io.deq.ready, + paddedValid(i + 1) || io.enq.fire && valid(i), + io.enq.fire && paddedValid(i - 1) && !valid(i) + ) + when(wen) { elts(i) := wdata } + + valid(i) := + Mux( + io.deq.ready, + paddedValid(i + 1) || io.enq.fire && ((i == 0 && !flow).B || valid(i)), + io.enq.fire && paddedValid(i - 1) || valid(i) + ) + } + + io.enq.ready := !valid(parameter.entries - 1) + io.deq.valid := valid(0) + io.deq.bits := elts.head + + when(io.enq.valid) { io.deq.valid := true.B } + when(!valid(0)) { io.deq.bits := io.enq.bits } + + io.mask := valid.asUInt +} diff --git a/rocketv/src/Frontend.scala b/rocketv/src/Frontend.scala new file mode 100644 index 000000000..36a313d84 --- /dev/null +++ b/rocketv/src/Frontend.scala @@ -0,0 +1,620 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.experimental.hierarchy.{Instantiate, instantiable} +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util._ +import chisel3.util.circt.ClockGate +import chisel3.util.experimental.BitSet +import org.chipsalliance.amba.axi4.bundle.{AXI4BundleParameter, AXI4ROIrrevocable, AXI4RWIrrevocable} + +object FrontendParameter { + implicit def bitSetP: upickle.default.ReadWriter[BitSet] = upickle.default + .readwriter[String] + .bimap[BitSet]( + bs => bs.terms.map("b" + _.rawString).mkString("\n"), + str => if(str.isEmpty) BitSet.empty else BitSet.fromString(str) + ) + + implicit def rwP: upickle.default.ReadWriter[FrontendParameter] = upickle.default.macroRW[FrontendParameter] +} + +case class FrontendParameter( + // must be false, since resetVector will be aligned here. + useAsyncReset: Boolean, + clockGate: Boolean, + xLen: Int, + usingAtomics: Boolean, + usingDataScratchpad: Boolean, + usingVM: Boolean, + usingCompressed: Boolean, + usingBTB: Boolean, + itlbNSets: Int, + itlbNWays: Int, + itlbNSectors: Int, + itlbNSuperpageEntries: Int, + blockBytes: Int, + iCacheNSets: Int, + iCacheNWays: Int, + iCachePrefetch: Boolean, + btbEntries: Int, + btbNMatchBits: Int, + btbUpdatesOutOfOrder: Boolean, + nPages: Int, + nRAS: Int, + nPMPs: Int, + paddrBits: Int, + pgLevels: Int, + asidBits: Int, + bhtParameter: Option[BHTParameter], + legal: BitSet, + cacheable: BitSet, + read: BitSet, + write: BitSet, + putPartial: BitSet, + logic: BitSet, + arithmetic: BitSet, + exec: BitSet, + sideEffects: BitSet + ) extends SerializableModuleParameter { + // static now + def hasCorrectable: Boolean = false + def usingHypervisor: Boolean = false + def hasUncorrectable: Boolean = false + def usingAtomicsOnlyForIO: Boolean = false + def itimParameter: Option[AXI4BundleParameter] = None + + // calculate + def bhtHistoryLength: Option[Int] = bhtParameter.map(_.historyLength) + def bhtCounterLength: Option[Int] = bhtParameter.map(_.counterLength) + def usingAtomicsInCache: Boolean = usingAtomics && !usingAtomicsOnlyForIO + private def vpnBitsExtended: Int = vpnBits + (if (vaddrBits < xLen) 1 + (if (usingHypervisor) 1 else 0) else 0) + def vaddrBitsExtended: Int = vpnBitsExtended + pgIdxBits + def maxHypervisorExtraAddrBits: Int = 2 + def hypervisorExtraAddrBits: Int = if (usingHypervisor) maxHypervisorExtraAddrBits else 0 + def pgLevelBits: Int = 10 - log2Ceil(xLen / 32) + def maxSVAddrBits: Int = pgIdxBits + pgLevels * pgLevelBits + def maxHVAddrBits: Int = maxSVAddrBits + hypervisorExtraAddrBits + def vaddrBits: Int = if (usingVM) { + val v = maxHVAddrBits + require(v == xLen || xLen > v && v > paddrBits) + v + } else { + // since virtual addresses sign-extend but physical addresses + // zero-extend, make room for a zero sign bit for physical addresses + (paddrBits + 1).min(xLen) + } + def entries: Int = btbEntries + def coreInstBits: Int = if (usingCompressed) 16 else 32 + def vpnBits: Int = vaddrBits - pgIdxBits + def maxPAddrBits: Int = xLen match { + case 32 => 34 + case 64 => 56 + } + def pgIdxBits: Int = 12 + val fetchWidth: Int = if (usingCompressed) 2 else 1 + def fetchBytes: Int = 4 + val coreInstBytes = (if (usingCompressed) 16 else 32) / 8 + def resetVectorBits: Int = paddrBits + def pmaCheckerParameter: PMACheckerParameter = PMACheckerParameter( + paddrBits = paddrBits, + legal = legal, + cacheable = cacheable, + read = read, + write = write, + putPartial = putPartial, + logic = logic, + arithmetic = arithmetic, + exec = exec, + sideEffects = sideEffects, + ) + val rowBits: Int = blockBytes * 8 + val instructionFetchParameter: AXI4BundleParameter = AXI4BundleParameter( + idWidth = 1, + dataWidth = rowBits, + addrWidth = paddrBits, + userReqWidth = 0, + userDataWidth = 0, + userRespWidth = 0, + hasAW = false, + hasW = false, + hasB = false, + hasAR = true, + hasR = true, + supportId = true, + supportRegion = false, + supportLen = true, + supportSize = true, + supportBurst = true, + supportLock = false, + supportCache = false, + supportQos = false, + supportStrb = false, + supportResp = false, + supportProt = false, + ) + + def icacheParameter: ICacheParameter = ICacheParameter( + useAsyncReset = useAsyncReset, + prefetch = iCachePrefetch, + nSets = iCacheNSets, + nWays = iCacheNWays, + blockBytes = blockBytes, + usingVM = usingVM, + vaddrBits = vaddrBits, + paddrBits = paddrBits + ) + + def tlbParameter: TLBParameter = TLBParameter( + useAsyncReset = useAsyncReset, + xLen = xLen, + nSets = itlbNSets, + nWays = itlbNWays, + nSectors = itlbNSectors, + nSuperpageEntries = itlbNSuperpageEntries, + asidBits = asidBits, + pgLevels = pgLevels, + usingHypervisor = usingHypervisor, + usingAtomics = usingAtomics, + usingDataScratchpad = usingDataScratchpad, + usingAtomicsOnlyForIO = usingAtomicsOnlyForIO, + usingVM = usingVM, + usingAtomicsInCache = usingAtomicsInCache, + nPMPs = nPMPs, + pmaCheckerParameter = pmaCheckerParameter, + paddrBits = paddrBits, + isITLB = true, + ) + def btbParameter: Option[BTBParameter] = Option.when(usingBTB)(BTBParameter( + useAsyncReset = useAsyncReset, + fetchBytes = fetchBytes, + vaddrBits = vaddrBits, + entries = btbEntries, + nMatchBits = btbNMatchBits, + nPages = nPages, + nRAS = nRAS, + cacheBlockBytes = blockBytes, + iCacheSet = iCacheNSets, + useCompressed = usingCompressed, + updatesOutOfOrder = btbUpdatesOutOfOrder, + bhtParameter = bhtParameter, + fetchWidth = fetchWidth + )) + + // entry = 5 + def fetchQueueParameter: FetchQueueParameter = FetchQueueParameter( + // static to be false. + useAsyncReset = false, + entries = 5, + vaddrBits = vaddrBits, + respEntries = entries, + bhtHistoryLength = bhtHistoryLength, + bhtCounterLength = bhtCounterLength, + vaddrBitsExtended = vaddrBitsExtended, + coreInstBits = coreInstBits, + fetchWidth = fetchWidth + ) +} + +class FrontendInterface(parameter: FrontendParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + val resetVector = Input(Const(UInt(parameter.resetVectorBits.W))) + val nonDiplomatic = new FrontendBundle( + parameter.vaddrBitsExtended, + parameter.vaddrBits, + parameter.asidBits, + parameter.entries, + parameter.bhtHistoryLength, + parameter.bhtCounterLength, + parameter.coreInstBits, + parameter.nPMPs, + parameter.vpnBits, + parameter.paddrBits, + parameter.pgLevels, + parameter.xLen, + parameter.maxPAddrBits, + parameter.pgIdxBits, + parameter.hasCorrectable, + parameter.hasUncorrectable, + parameter.fetchWidth + ) + val instructionFetchAXI: AXI4ROIrrevocable = + org.chipsalliance.amba.axi4.bundle.AXI4ROIrrevocable(parameter.instructionFetchParameter) + val itimAXI: Option[AXI4RWIrrevocable] = + parameter.itimParameter.map(p => Flipped(org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(p))) +} + +@instantiable +class Frontend(val parameter: FrontendParameter) + extends FixedIORawModule(new FrontendInterface(parameter)) + with SerializableModule[FrontendParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + def xLen = parameter.xLen + def fetchWidth = parameter.fetchWidth + def fetchBytes = parameter.fetchBytes + def vaddrBitsExtended = parameter.vaddrBitsExtended + def coreInstBits = parameter.coreInstBits + def vaddrBits = parameter.vaddrBits + def entries = parameter.entries + def coreInstBytes = parameter.coreInstBytes + def usingBTB = parameter.usingBTB + def bhtHistoryLength = parameter.bhtHistoryLength + def bhtCounterLength = parameter.bhtCounterLength + def usingCompressed = parameter.usingCompressed + def clock = io.clock + + object rocketParams { + def clockGate = parameter.clockGate + } + + object tileParams { + def btb = parameter.btbParameter + } + + object Instructions { + def BEQ: BitPat = BitPat("b?????????????????000?????1100011") + + def JAL = BitPat("b?????????????????????????1101111") + + def JALR = BitPat("b?????????????????000?????1100111") + + def C_BEQZ = BitPat("b????????????????110???????????01") + + def C_BNEZ = BitPat("b????????????????111???????????01") + + def C_J = BitPat("b????????????????101???????????01") + + def C_ADD = BitPat("b????????????????1001??????????10") + + def C_MV = BitPat("b????????????????1000??????????10") + } + + object Instructions32 { + def C_JAL = BitPat("b????????????????001???????????01") + } + + val clock_en_reg: Bool = Reg(Bool()) + val clock_en: Bool = clock_en_reg || io.nonDiplomatic.cpu.might_request + val gated_clock: Clock = + if (!rocketParams.clockGate) clock + else ClockGate(clock, clock_en) + + val icache = Instantiate(new ICache(parameter.icacheParameter)) + icache.io.clock := gated_clock + icache.io.reset := io.reset + icache.io.clock_enabled := clock_en + (icache.io.itimAXI zip io.itimAXI).foreach{ case (frontend, itim) => itim :<>= frontend } + io.instructionFetchAXI :<>= icache.io.instructionFetchAXI + val tlb = Instantiate(new TLB(parameter.tlbParameter)) + tlb.io.clock := gated_clock + tlb.io.reset := io.reset + io.nonDiplomatic.ptw :<>= tlb.io.ptw + io.nonDiplomatic.cpu.clock_enabled := clock_en + val btb = parameter.btbParameter.map(btbParameter => Instantiate(new BTB(btbParameter))) + btb.foreach { btb => + btb.io.clock := io.clock + btb.io.reset := io.reset + btb.io.btb_update := io.nonDiplomatic.cpu.btb_update + btb.io.bht_update := io.nonDiplomatic.cpu.bht_update + } + val fq = Instantiate(new FetchQueue(parameter.fetchQueueParameter)) + fq.io.clock := io.clock + fq.io.reset := io.reset.asBool || io.nonDiplomatic.cpu.req.valid + + assert(!(io.nonDiplomatic.cpu.req.valid || io.nonDiplomatic.cpu.sfence.valid || io.nonDiplomatic.cpu.flush_icache || io.nonDiplomatic.cpu.bht_update.valid || io.nonDiplomatic.cpu.btb_update.valid) || io.nonDiplomatic.cpu.might_request) + + withClock(gated_clock) { // entering gated-clock domain + val s1_valid = Reg(Bool()) + val s2_valid = RegInit(false.B) + val s0_fq_has_space = + !fq.io.mask(fq.io.mask.getWidth - 3) || + (!fq.io.mask(fq.io.mask.getWidth - 2) && (!s1_valid || !s2_valid)) || + (!fq.io.mask(fq.io.mask.getWidth - 1) && (!s1_valid && !s2_valid)) + val s0_valid = io.nonDiplomatic.cpu.req.valid || s0_fq_has_space + s1_valid := s0_valid + val s1_pc = Reg(UInt(vaddrBitsExtended.W)) + val s1_speculative = Reg(Bool()) + // TODO: make it Const + def alignPC(pc: UInt): UInt = ~(~pc | (coreInstBytes - 1).U) + val s2_pc = RegInit(UInt(vaddrBitsExtended.W), alignPC(io.resetVector)) + val s2_btb_resp_valid = if (usingBTB) Reg(Bool()) else false.B + val s2_btb_resp_bits = Reg(new BTBResp(vaddrBits, entries, fetchWidth, bhtHistoryLength, bhtCounterLength)) + val s2_btb_taken = s2_btb_resp_valid && s2_btb_resp_bits.taken + val s2_tlb_resp = Reg(tlb.io.resp.cloneType) + val s2_xcpt = s2_tlb_resp.ae.inst || s2_tlb_resp.pf.inst || s2_tlb_resp.gf.inst + val s2_speculative = RegInit(false.B) + val s2_partial_insn_valid = RegInit(false.B) + val s2_partial_insn = Reg(UInt(coreInstBits.W)) + val wrong_path = RegInit(false.B) + + val s1_base_pc: UInt = ~(~s1_pc | (fetchBytes - 1).U) + val ntpc = s1_base_pc + fetchBytes.U + val predicted_npc = WireDefault(ntpc) + val predicted_taken = WireDefault(false.B) + + val s2_replay = Wire(Bool()) + s2_replay := (s2_valid && !fq.io.enq.fire) || RegNext(s2_replay && !s0_valid, true.B) + val npc = Mux(s2_replay, s2_pc, predicted_npc) + + s1_pc := io.nonDiplomatic.cpu.npc + // consider RVC fetches across blocks to be non-speculative if the first + // part was non-speculative + val s0_speculative = + if (usingCompressed) s1_speculative || s2_valid && !s2_speculative || predicted_taken + else true.B + s1_speculative := Mux( + io.nonDiplomatic.cpu.req.valid, + io.nonDiplomatic.cpu.req.bits.speculative, + Mux(s2_replay, s2_speculative, s0_speculative) + ) + + val s2_redirect = WireDefault(io.nonDiplomatic.cpu.req.valid) + s2_valid := false.B + when(!s2_replay) { + s2_valid := !s2_redirect + s2_pc := s1_pc + s2_speculative := s1_speculative + s2_tlb_resp := tlb.io.resp + } + + val recent_progress_counter_init = 3.U + val recent_progress_counter = RegInit(recent_progress_counter_init) + val recent_progress = recent_progress_counter > 0.U + when(io.nonDiplomatic.ptw.req.fire && recent_progress) { recent_progress_counter := recent_progress_counter - 1.U } + when(io.nonDiplomatic.cpu.progress) { recent_progress_counter := recent_progress_counter_init } + + val s2_kill_speculative_tlb_refill = s2_speculative && !recent_progress + + tlb.io.req.valid := s1_valid && !s2_replay + def M_XRD = "b00000".U + tlb.io.req.bits.cmd := M_XRD // Frontend only reads + tlb.io.req.bits.vaddr := s1_pc + tlb.io.req.bits.passthrough := false.B + tlb.io.req.bits.size := log2Ceil(coreInstBytes * fetchWidth).U + tlb.io.req.bits.prv := io.nonDiplomatic.ptw.status.prv + tlb.io.req.bits.v := io.nonDiplomatic.ptw.status.v + tlb.io.sfence := io.nonDiplomatic.cpu.sfence + tlb.io.kill := !s2_valid || s2_kill_speculative_tlb_refill + + icache.io.req.valid := s0_valid + icache.io.req.bits.addr := io.nonDiplomatic.cpu.npc + icache.io.invalidate := io.nonDiplomatic.cpu.flush_icache + icache.io.s1_paddr := tlb.io.resp.paddr + icache.io.s2_vaddr := s2_pc + icache.io.s1_kill := s2_redirect || tlb.io.resp.miss || s2_replay + val s2_can_speculatively_refill = + s2_tlb_resp.cacheable +// && !io.nonDiplomatic.ptw.customCSRs.asInstanceOf[RocketCustomCSRs].disableSpeculativeICacheRefill + icache.io.s2_kill := s2_speculative && !s2_can_speculatively_refill || s2_xcpt + icache.io.s2_cacheable := s2_tlb_resp.cacheable + icache.io.s2_prefetch := s2_tlb_resp.prefetchable +// && !io.ptw.customCSRs +// .asInstanceOf[RocketCustomCSRs] +// .disableICachePrefetch + + fq.io.enq.valid := RegNext( + s1_valid + ) && s2_valid && (icache.io.resp.valid || (s2_kill_speculative_tlb_refill && s2_tlb_resp.miss) || (!s2_tlb_resp.miss && icache.io.s2_kill)) + fq.io.enq.bits.pc := s2_pc + io.nonDiplomatic.cpu.npc := alignPC(Mux(io.nonDiplomatic.cpu.req.valid, io.nonDiplomatic.cpu.req.bits.pc, npc)) + + fq.io.enq.bits.data := icache.io.resp.bits.data + fq.io.enq.bits.mask := ((1 << fetchWidth) - 1).U << (if(log2Ceil(fetchWidth) == 0) 0.U else s2_pc(log2Ceil(fetchWidth) + log2Ceil(coreInstBytes) - 1, log2Ceil(coreInstBytes))) + fq.io.enq.bits.replay := (icache.io.resp.bits.replay || icache.io.s2_kill && !icache.io.resp.valid && !s2_xcpt) || (s2_kill_speculative_tlb_refill && s2_tlb_resp.miss) + fq.io.enq.bits.btb := s2_btb_resp_bits + fq.io.enq.bits.btb.taken := s2_btb_taken + fq.io.enq.bits.xcpt.ae := s2_tlb_resp.ae.inst + fq.io.enq.bits.xcpt.gf := s2_tlb_resp.gf.inst + fq.io.enq.bits.xcpt.pf := s2_tlb_resp.pf.inst +// assert( +// !(s2_speculative && io.ptw.customCSRs +// .asInstanceOf[RocketCustomCSRs] +// .disableSpeculativeICacheRefill && !icache.io.s2_kill) +// ) + when(icache.io.resp.valid && icache.io.resp.bits.ae) { fq.io.enq.bits.xcpt.ae := true.B } + + btb.map { btb => + btb.io.flush := false.B + btb.io.req.valid := false.B + btb.io.req.bits.addr := s1_pc + btb.io.ras_update.valid := false.B + btb.io.ras_update.bits := DontCare + btb.io.bht_advance.valid := false.B + btb.io.bht_advance.bits := DontCare + when(!s2_replay) { + btb.io.req.valid := !s2_redirect + s2_btb_resp_valid := btb.io.resp.valid + s2_btb_resp_bits := btb.io.resp.bits + } + when(btb.io.resp.valid && btb.io.resp.bits.taken) { + def sextTo(x: UInt, n: Int): UInt = { + require(x.getWidth <= n) + if (x.getWidth == n) x + else Cat(Fill(n - x.getWidth, x(x.getWidth - 1)), x) + } + + predicted_npc := sextTo(btb.io.resp.bits.target, vaddrBitsExtended) + predicted_taken := true.B + } + +// val force_taken = io.nonDiplomatic.ptw.customCSRs.bpmStatic +// when(io.nonDiplomatic.ptw.customCSRs.flushBTB) { btb.io.flush := true.B } +// when(force_taken) { btb.io.bht_update.valid := false.B } + + val s2_base_pc: UInt = ~(~s2_pc | (fetchBytes - 1).U) + val taken_idx = Wire(UInt()) + val after_idx = Wire(UInt()) + val useRAS = WireDefault(false.B) + val updateBTB = WireDefault(false.B) + + // If !prevTaken, ras_update / bht_update is always invalid. + taken_idx := DontCare + after_idx := DontCare + + def scanInsns(idx: Int, prevValid: Bool, prevBits: UInt, prevTaken: Bool): Bool = { + def insnIsRVC(bits: UInt) = bits(1, 0) =/= 3.U + val prevRVI = prevValid && !insnIsRVC(prevBits) + val valid = fq.io.enq.bits.mask(idx) && !prevRVI + val bits = if (coreInstBits * (idx + 1) == coreInstBits * idx) 0.U else + fq.io.enq.bits.data(coreInstBits * (idx + 1) - 1, coreInstBits * idx) + val rvc = insnIsRVC(bits) + val rviBits = Cat(bits, prevBits) + val rviBranch = rviBits(6, 0) === Instructions.BEQ.value.U(6, 0) + val rviJump = rviBits(6, 0) === Instructions.JAL.value.U(6, 0) + val rviJALR = rviBits(6, 0) === Instructions.JALR.value.U(6, 0) + val rviReturn = rviJALR && !rviBits(7) && BitPat("b00?01") === rviBits(19, 15) + val rviCall = (rviJALR || rviJump) && rviBits(7) + val rvcBranch = bits === Instructions.C_BEQZ || bits === Instructions.C_BNEZ + val rvcJAL = (xLen == 32).B && bits === Instructions32.C_JAL + val rvcJump = bits === Instructions.C_J || rvcJAL + val rvcImm = Mux(bits(14), new RVCDecoder(bits, xLen).bImm.asSInt, new RVCDecoder(bits, xLen).jImm.asSInt) + val rvcJR = bits === Instructions.C_MV && bits(6, 2) === 0.U + val rvcReturn = rvcJR && BitPat("b00?01") === bits(11, 7) + val rvcJALR = bits === Instructions.C_ADD && bits(6, 2) === 0.U + val rvcCall = rvcJAL || rvcJALR + val rviImm = Mux(rviBits(3), ImmGen(ImmGen.IMM_UJ, rviBits), ImmGen(ImmGen.IMM_SB, rviBits)) + val predict_taken = BHTResp.taken(s2_btb_resp_bits.bht) /*|| force_taken*/ + val taken = + prevRVI && (rviJump || rviJALR || rviBranch && predict_taken) || + valid && (rvcJump || rvcJALR || rvcJR || rvcBranch && predict_taken) + val predictReturn = btb.io.ras_head.valid && (prevRVI && rviReturn || valid && rvcReturn) + val predictJump = prevRVI && rviJump || valid && rvcJump + val predictBranch = predict_taken && (prevRVI && rviBranch || valid && rvcBranch) + + when(s2_valid && s2_btb_resp_valid && s2_btb_resp_bits.bridx === idx.U && valid && !rvc) { + // The BTB has predicted that the middle of an RVI instruction is + // a branch! Flush the BTB and the pipeline. + btb.io.flush := true.B + fq.io.enq.bits.replay := true.B + wrong_path := true.B + } + + when(!prevTaken) { + taken_idx := idx.U + after_idx := (idx + 1).U + btb.io.ras_update.valid := fq.io.enq.fire && !wrong_path && (prevRVI && (rviCall || rviReturn) || valid && (rvcCall || rvcReturn)) + btb.io.ras_update.bits.cfiType := Mux( + Mux(prevRVI, rviReturn, rvcReturn), + CFIType.ret, + Mux( + Mux(prevRVI, rviCall, rvcCall), + CFIType.call, + Mux(Mux(prevRVI, rviBranch, rvcBranch) /* && !force_taken */, CFIType.branch, CFIType.jump) + ) + ) + + when(!s2_btb_taken) { + when(fq.io.enq.fire && taken && !predictBranch && !predictJump && !predictReturn) { + wrong_path := true.B + } + when(s2_valid && predictReturn) { + useRAS := true.B + } + when(s2_valid && (predictBranch || predictJump)) { + val pc: UInt = s2_base_pc | (idx * coreInstBytes).U + val npc = + if (idx == 0) pc.asSInt + Mux(prevRVI, rviImm -& 2.S, rvcImm) + else Mux(prevRVI, pc - coreInstBytes.U, pc).asSInt + Mux(prevRVI, rviImm, rvcImm) + predicted_npc := npc.asUInt + } + } + when(prevRVI && rviBranch || valid && rvcBranch) { + btb.io.bht_advance.valid := fq.io.enq.fire && !wrong_path + btb.io.bht_advance.bits := s2_btb_resp_bits + } + when( + !s2_btb_resp_valid && (predictBranch && BHTResp.strongly_taken( + s2_btb_resp_bits.bht + ) || predictJump || predictReturn) + ) { + updateBTB := true.B + } + } + + if (idx == fetchWidth - 1) { + when(fq.io.enq.fire) { + s2_partial_insn_valid := false.B + when(valid && !prevTaken && !rvc) { + s2_partial_insn_valid := true.B + s2_partial_insn := bits | 0x3.U + } + } + prevTaken || taken + } else { + scanInsns(idx + 1, valid, bits, prevTaken || taken) + } + } + + when(!io.nonDiplomatic.cpu.btb_update.valid) { + val fetch_bubble_likely = !fq.io.mask(1) + btb.io.btb_update.valid := fq.io.enq.fire && !wrong_path && fetch_bubble_likely && updateBTB + btb.io.btb_update.bits.prediction.entry := tileParams.btb.get.nEntries.U + btb.io.btb_update.bits.isValid := true.B + btb.io.btb_update.bits.cfiType := btb.io.ras_update.bits.cfiType + btb.io.btb_update.bits.br_pc := s2_base_pc | (taken_idx << log2Ceil(coreInstBytes)) + btb.io.btb_update.bits.pc := s2_base_pc + } + + btb.io.ras_update.bits.returnAddr := s2_base_pc + (after_idx << log2Ceil(coreInstBytes)) + + val taken = scanInsns(0, s2_partial_insn_valid, s2_partial_insn, false.B) + when(useRAS) { + predicted_npc := btb.io.ras_head.bits + } + when(fq.io.enq.fire && (s2_btb_taken || taken)) { + s2_partial_insn_valid := false.B + } + when(!s2_btb_taken) { + when(taken) { + fq.io.enq.bits.btb.bridx := taken_idx + fq.io.enq.bits.btb.taken := true.B + fq.io.enq.bits.btb.entry := tileParams.btb.get.nEntries.U + when(fq.io.enq.fire) { s2_redirect := true.B } + } + } + + assert(!s2_partial_insn_valid || fq.io.enq.bits.mask(0)) + when(s2_redirect) { s2_partial_insn_valid := false.B } + when(io.nonDiplomatic.cpu.req.valid) { wrong_path := false.B } + } + + io.nonDiplomatic.cpu.resp <> fq.io.deq + + // supply guest physical address to commit stage + val gpa_valid = Reg(Bool()) + val gpa = Reg(UInt(vaddrBitsExtended.W)) + when(fq.io.enq.fire && s2_tlb_resp.gf.inst) { + when(!gpa_valid) { + gpa := s2_tlb_resp.gpa + } + gpa_valid := true.B + } + when(io.nonDiplomatic.cpu.req.valid) { + gpa_valid := false.B + } + io.nonDiplomatic.cpu.gpa.valid := gpa_valid + io.nonDiplomatic.cpu.gpa.bits := gpa + + // performance events + io.nonDiplomatic.cpu.perf.acquire := icache.io.perf.acquire + io.nonDiplomatic.cpu.perf.tlbMiss := io.nonDiplomatic.ptw.req.fire + io.nonDiplomatic.errors := icache.io.errors + + // gate the clock + clock_en_reg := !rocketParams.clockGate.B || + io.nonDiplomatic.cpu.might_request || // chicken bit + icache.io.keep_clock_enabled || // I$ miss or ITIM access + s1_valid || s2_valid || // some fetch in flight + !tlb.io.req.ready || // handling TLB miss + !fq.io.mask(fq.io.mask.getWidth - 1) // queue not full + } // leaving gated-clock domain +} diff --git a/rocketv/src/HellaCache.scala b/rocketv/src/HellaCache.scala new file mode 100644 index 000000000..5574a0940 --- /dev/null +++ b/rocketv/src/HellaCache.scala @@ -0,0 +1,1682 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.experimental.hierarchy.{Instance, Instantiate, instantiable} +import chisel3.experimental.{SerializableModule, SerializableModuleParameter, SourceInfo} +import chisel3.util.experimental.{BitSet, InlineInstance} +import chisel3.util.{Arbiter, BitPat, Cat, Enum, Fill, FillInterleaved, Mux1H, MuxLookup, OHToUInt, PriorityEncoder, PriorityEncoderOH, PriorityMux, Queue, RegEnable, SRAM, SRAMInterface, UIntToOH, isPow2, log2Ceil} +import org.chipsalliance.amba.axi4.bundle.{AXI4BundleParameter, AXI4ChiselBundle, AXI4ROIrrevocable, AXI4RWIrrevocable, R, W} + +object HellaCacheParameter { + implicit def bitSetP: upickle.default.ReadWriter[BitSet] = upickle.default + .readwriter[String] + .bimap[BitSet]( + bs => bs.terms.map("b" + _.rawString).mkString("\n"), + str => if (str.isEmpty) BitSet.empty else BitSet.fromString(str) + ) + + implicit def rwP: upickle.default.ReadWriter[HellaCacheParameter] = upickle.default.macroRW[HellaCacheParameter] +} + +case class HellaCacheParameter( + useAsyncReset: Boolean, + clockGate: Boolean, + xLen: Int, + fLen: Int, + usingVM: Boolean, + paddrBits: Int, + cacheBlockBytes: Int, + nWays: Int, + nSets: Int, + rowBits: Int, + nTLBSets: Int, + nTLBWays: Int, + tagECC: Option[String], + dataECC: Option[String], + maxUncachedInFlight: Int, + separateUncachedResp: Boolean, + legal: BitSet, + cacheable: BitSet, + read: BitSet, + write: BitSet, + putPartial: BitSet, + logic: BitSet, + arithmetic: BitSet, + exec: BitSet, + sideEffects: BitSet + ) extends SerializableModuleParameter { + + def vpnBitsExtended: Int = vpnBits + (if (vaddrBits < xLen) (if (usingHypervisor) 1 else 0) + 1 else 0) + + def vaddrBitsExtended: Int = vpnBitsExtended + pgIdxBits + + def maxSVAddrBits: Int = pgIdxBits + pgLevels * pgLevelBits + + def maxHypervisorExtraAddrBits: Int = 2 + + def hypervisorExtraAddrBits: Int = if (usingHypervisor) maxHypervisorExtraAddrBits else 0 + + def maxHVAddrBits: Int = maxSVAddrBits + hypervisorExtraAddrBits + + def vaddrBits: Int = if (usingVM) { + val v = maxHVAddrBits + require(v == xLen || xLen > v && v > paddrBits) + v + } else { + // since virtual addresses sign-extend but physical addresses + // zero-extend, make room for a zero sign bit for physical addresses + (paddrBits + 1) min xLen + } + // static for now + def dcacheReqTagBits: Int = 6 + + def usingHypervisor = false + + def scratch: Option[BigInt] = None + + def acquireBeforeRelease: Boolean = false + + def replacementPolicy: String = "random" //lfsr + + def usingAtomics: Boolean = true + + def useAtomicsOnlyForIO: Boolean = false + + def flushOnFenceI: Boolean = true + + def useVector: Boolean = false + + def haveCFlush: Boolean = false + + def subWordBits: Option[Int] = None + + // calculated + def pgIdxBits: Int = 12 + + def lgCacheBlockBytes: Int = log2Ceil(cacheBlockBytes) + + def blockOffBits: Int = lgCacheBlockBytes + + def idxBits: Int = log2Ceil(nSets) + + def untagBits: Int = blockOffBits + idxBits + + def coreMaxAddrBits: Int = paddrBits max vaddrBitsExtended + + def usingDataScratchpad: Boolean = scratch.isDefined + + def dcacheArbPorts: Int = 1 + (if (usingVM) 1 else 0) + (if (usingDataScratchpad) 1 else 0) + + def tagCode: Code = Code.fromString(tagECC) + + def dataCode: Code = Code.fromString(dataECC) + + def pgLevelBits: Int = 10 - log2Ceil(xLen / 32) + + def pipelineWayMux: Boolean = false + + def nPMPs: Int = 8 + + def vpnBits: Int = vaddrBits - pgIdxBits + + def hasCorrectable: Boolean = tagCode.canCorrect || dataCode.canCorrect + + def hasUncorrectable: Boolean = tagCode.canDetect || dataCode.canDetect + + def pgLevels: Int = xLen match { + case 32 => 2 + case 64 => 3 + } + + /* Sv32 */ + val maxPAddrBits: Int = xLen match { + case 32 => 34 + case 64 => 56 + } + + def coreDataBits: Int = xLen max fLen + + def coreDataBytes: Int = coreDataBits / 8 + + def silentDrop: Boolean = !acquireBeforeRelease + + def idxMSB: Int = untagBits - 1 + + def idxLSB: Int = blockOffBits + + def wordBits: Int = coreDataBits + + def rowWords: Int = rowBits / wordBits + + def wordBytes: Int = coreDataBytes + + def wordOffBits: Int = log2Ceil(wordBytes) + + def cacheDataBits: Int = rowBits + + def cacheDataBeats: Int = (cacheBlockBytes * 8) / cacheDataBits + + def beatBytes: Int = cacheBlockBytes / cacheDataBeats + + def beatWords: Int = beatBytes / wordBytes + + def dataECCBytes: Int = 1 + + def eccBits: Int = dataECCBytes * 8 + + def eccBytes: Int = dataECCBytes + + def encBits: Int = dataCode.width(eccBits) + + def rowBytes: Int = rowBits / 8 + + def subWordBytes: Int = subWordBits.getOrElse(wordBits) / 8 + + def rowOffBits: Int = log2Ceil(rowBytes) + + def beatOffBits: Int = log2Ceil(beatBytes) + + def usingAtomicsInCache: Boolean = usingAtomics && !useAtomicsOnlyForIO + + def pgUntagBits: Int = if (usingVM) untagBits min pgIdxBits else untagBits + + def tagBits: Int = paddrBits - pgUntagBits + + // todo: max axi id + def firstMMIO: Int = 4 + + def lrscBackoff: Int = 3 + + def lrscCycles: Int = 80 // worst case is 14 mispredicted branches + slop + + def pmaCheckerParameter: PMACheckerParameter = PMACheckerParameter( + paddrBits, + legal, + cacheable, + read, + write, + putPartial, + logic, + arithmetic, + exec, + sideEffects) + + def tlbParameter: TLBParameter = TLBParameter( + useAsyncReset, + xLen, + nTLBSets, + nTLBWays, + nSectors = 4, + nSuperpageEntries = 4, + asidBits = 0, + pgLevels, + usingHypervisor = false, + usingAtomics, + usingDataScratchpad, + useAtomicsOnlyForIO, + usingVM, + usingAtomicsInCache, + nPMPs, + pmaCheckerParameter, + paddrBits, + isITLB = false + ) + + def amoaluParameter: Option[AMOALUParameter] = Option.when(eccBytes > 1 || usingAtomicsInCache)(AMOALUParameter(xLen)) + + def dtimParameter: Option[AXI4BundleParameter] = scratch.map { _ => + AXI4BundleParameter( + idWidth = 1, + dataWidth = rowBits, + addrWidth = paddrBits, + userReqWidth = 0, + userDataWidth = 0, + userRespWidth = 0, + hasAW = true, + hasW = true, + hasB = true, + hasAR = true, + hasR = true, + supportId = true, + supportRegion = false, + supportLen = true, + supportSize = true, + supportBurst = true, + supportLock = false, + supportCache = false, + supportQos = false, + supportStrb = false, + supportResp = false, + supportProt = false, + ) + } + + def loadStoreParameter: AXI4BundleParameter = AXI4BundleParameter( + idWidth = 1, + dataWidth = rowBits, + addrWidth = paddrBits, + userReqWidth = 1, + userDataWidth = 0, + userRespWidth = 1, + hasAW = true, + hasW = true, + hasB = true, + hasAR = true, + hasR = true, + supportId = true, + supportRegion = false, + supportLen = true, + supportSize = true, + supportBurst = true, + supportLock = false, + supportCache = false, + supportQos = false, + supportStrb = false, + supportResp = false, + supportProt = false, + ) +} + +class HellaCacheInterface(parameter: HellaCacheParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + val cpu = Flipped( + new HellaCacheIO( + parameter.coreMaxAddrBits, + parameter.usingVM, + parameter.untagBits, + parameter.pgIdxBits, + parameter.dcacheReqTagBits, + parameter.dcacheArbPorts, + parameter.coreDataBytes, + parameter.paddrBits, + parameter.vaddrBitsExtended, + parameter.separateUncachedResp + ) + ) + val ptw = new TLBPTWIO( + parameter.nPMPs, + parameter.vpnBits, + parameter.paddrBits, + parameter.vaddrBits, + parameter.pgLevels, + parameter.xLen, + parameter.maxPAddrBits, + parameter.pgIdxBits + ) + val errors = new DCacheErrors(parameter.hasCorrectable, parameter.hasUncorrectable, parameter.paddrBits) + val loadStoreAXI: AXI4RWIrrevocable = org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(parameter.loadStoreParameter) + val dtimAXI: Option[AXI4RWIrrevocable] = parameter.dtimParameter.map(p => Flipped(org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(p))) +} + +@instantiable +class HellaCache(val parameter: HellaCacheParameter) + extends FixedIORawModule(new HellaCacheInterface(parameter)) + with SerializableModule[HellaCacheParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + // instantiate sub hierarchies + val tlb: Instance[TLB] = Instantiate(new TLB(parameter.tlbParameter)) + val pmaChecker: Instance[PMAChecker] = Instantiate(new PMAChecker(parameter.pmaCheckerParameter)) + val amoalus: Option[Seq[Instance[AMOALU]]] = parameter.amoaluParameter.map(amoaluParameter=>Seq.tabulate(parameter.coreDataBits / parameter.xLen)(i => Instantiate(new AMOALU(amoaluParameter)))) + + tlb.io.clock := io.clock + tlb.io.reset := io.reset + + // compatibility layers + object cacheParams { + def tagCode: Code = parameter.tagCode + def dataCode: Code = parameter.dataCode + def silentDrop: Boolean = parameter.silentDrop + def acquireBeforeRelease: Boolean = parameter.acquireBeforeRelease + def clockGate: Boolean = parameter.clockGate + def replacementPolicy: String = parameter.replacementPolicy + def separateUncachedResp: Boolean = parameter.separateUncachedResp + def pipelineWayMux: Boolean = parameter.pipelineWayMux + } + def rowWords: Int = parameter.rowWords + def wordOffBits: Int = parameter.wordOffBits + def beatWords: Int = parameter.beatWords + def beatBytes: Int = parameter.beatBytes + def idxMSB: Int = parameter.idxMSB + def idxLSB: Int = parameter.idxLSB + def subWordBits: Int = parameter.subWordBits.getOrElse(parameter.wordBits) + def eccBits: Int = parameter.eccBits + def eccBytes: Int = parameter.eccBytes + def coreMaxAddrBits: Int = parameter.coreMaxAddrBits + def usingVM: Boolean = parameter.usingVM + def pgIdxBits: Int = parameter.pgIdxBits + def pgLevelBits: Int = parameter.pgLevelBits + def dcacheReqTagBits: Int = parameter.dcacheReqTagBits + def dcacheArbPorts: Int = parameter.dcacheArbPorts + def coreDataBytes: Int = parameter.coreDataBytes + def encBits: Int = parameter.encBits + def untagBits: Int = parameter.untagBits + def rowBytes: Int = parameter.rowBytes + def subWordBytes: Int = parameter.subWordBytes + def rowOffBits: Int = parameter.rowOffBits + def beatOffBits: Int = parameter.beatOffBits + def wordBytes: Int = parameter.wordBytes + def usingAtomicsInCache: Boolean = parameter.usingAtomicsInCache + def nWays: Int = parameter.nWays + def nSets: Int = parameter.nSets + def cacheBlockBytes: Int = parameter.cacheBlockBytes + def vaddrBitsExtended: Int = parameter.vaddrBitsExtended + def paddrBits: Int = parameter.paddrBits + def maxUncachedInFlight: Int = parameter.maxUncachedInFlight + def tagBits: Int = parameter.tagBits + def idxBits: Int = parameter.idxBits + def blockOffBits: Int = parameter.blockOffBits + def usingDataScratchpad: Boolean = parameter.usingDataScratchpad + def usingAtomics: Boolean = parameter.usingAtomics + def lrscBackoff: Int = parameter.lrscBackoff + def lrscCycles: Int = parameter.lrscCycles + def rowBits: Int = parameter.rowBits + def cacheDataBits: Int = parameter.rowBits + def cacheDataBytes: Int = cacheDataBits / 8 + def cacheDataBeats: Int = (cacheBlockBytes * 8) / cacheDataBits + def refillCycles: Int = cacheDataBeats + def blockProbeAfterGrantCycles: Int = 8 + def wordBits: Int = parameter.coreDataBits + object outer { + def firstMMIO = parameter.firstMMIO + def flushOnFenceI = parameter.flushOnFenceI + } + object coreParams { + def useVector = parameter.useVector + def haveCFlush = parameter.haveCFlush + } + object ClientMetadata { + def isValid(cm: ClientMetadata): Bool = cm.state > 0.U + + def apply(perm: UInt): ClientMetadata = { + val meta = Wire(new ClientMetadata) + meta.state := perm + meta + } + } + object L1Metadata { + def apply(tag: Bits, coh: ClientMetadata) = { + val meta = Wire(new L1Metadata(parameter.tagBits)) + meta.tag := tag + meta.coh := coh + meta + } + } + def M_SFENCE = "b10100".U // SFENCE.VMA + def M_HFENCEV = "b10101".U // HFENCE.VVMA + def M_HFENCEG = "b10110".U // HFENCE.GVMA + def M_FLUSH_ALL = "b00101".U // flush all lines + def M_WOK = "b10111".U // check write permissions but don't perform a write + def M_PWR = "b10001".U // partial (masked) store + def M_XLR = "b00110".U + def M_XSC = "b00111".U + def M_XWR = "b00001".U; // int store + def M_XRD = "b00000".U; // int load + def M_PFW = "b00011".U; // prefetch with intent to write + + // todo + def grouped(x: UInt, width: Int): Seq[UInt] = + (0 until x.getWidth by width).map(base => x(base + width - 1, base)) + def grouped[T <: Data](x: Vec[T], width: Int): Seq[Vec[T]] = + (0 until x.size by width).map(base => + VecInit(Seq.tabulate(width){i => x(base + i)}) + ) + + val clock = io.clock + val reset = io.reset + val pma_checker = pmaChecker + + val tECC = cacheParams.tagCode + val dECC = cacheParams.dataCode + require(subWordBits % eccBits == 0, "subWordBits must be a multiple of eccBits") + require(eccBytes == 1 || !dECC.isInstanceOf[IdentityCode]) + require(cacheParams.silentDrop || cacheParams.acquireBeforeRelease, "!silentDrop requires acquireBeforeRelease") + val usingRMW = eccBytes > 1 || usingAtomicsInCache + val mmioOffset = outer.firstMMIO + // edge.manager.requireFifo(TLFIFOFixer.allVolatile) // TileLink pipelining MMIO requests + + val clock_en_reg = Reg(Bool()) + io.cpu.clock_enabled := clock_en_reg + + val gated_clock = + if (!cacheParams.clockGate) clock + else chisel3.util.circt.ClockGate(clock, clock_en_reg) + class DCacheModuleImpl { // entering gated-clock domain + // tags + val replacer = ReplacementPolicy.fromString(cacheParams.replacementPolicy, nWays) + + /** Metadata Arbiter: + * 0: Tag update on reset + * 1: Tag update on ECC error + * 2: Tag update on hit + * 3: Tag update on refill + * 4: Tag update on release + * 5: Tag update on flush + * 6: Tag update on probe + * 7: Tag update on CPU request + */ + + val metaArb = Module(new Arbiter(new DCacheMetadataReq(vaddrBitsExtended, idxBits, nWays, cacheParams.tagCode.width(new L1Metadata(tagBits).getWidth)), 8) with InlineInstance) + // todo: delete + metaArb.io.in(1).valid := false.B + metaArb.io.in(1).bits := DontCare + + val tag_array: SRAMInterface[Vec[UInt]] = SRAM.masked( + size = nSets, + tpe = Vec(nWays, chiselTypeOf(metaArb.io.out.bits.data)), + numReadPorts = 0, + numWritePorts = 0, + numReadwritePorts = 1 + ) + + // data + // val data = Module(new DCacheDataArray) + // no more DCacheDataArray module for better PD experience + // Vec(nWays, req.bits.wdata) + val dataArrays = Seq.tabulate(rowBits / subWordBits) { i => SRAM.masked( + size = nSets * cacheBlockBytes / rowBytes, + tpe = Vec(nWays * (subWordBits / eccBits), UInt(encBits.W)), + numReadPorts = 0, + numWritePorts = 0, + numReadwritePorts = 1 + )} + + /** Data Arbiter + * 0: data from pending store buffer + * 1: data from TL-D refill + * 2: release to TL-A + * 3: hit path to CPU + */ + val dataArb = Module(new Arbiter(new DCacheDataReq(untagBits, encBits, rowBytes, eccBytes, subWordBytes, wordBytes, nWays), 4) with InlineInstance) + + dataArb.io.in.tail.foreach(_.bits.wdata := dataArb.io.in.head.bits.wdata) // tie off write ports by default + dataArb.io.out.ready := true.B + metaArb.io.out.ready := clock_en_reg + + val readData: Seq[Seq[UInt]] = dataArrays.zipWithIndex.map { case (array, i) => + val valid = dataArb.io.out.valid && ((dataArrays.size == 1).B || dataArb.io.out.bits.wordMask(i)) + val dataEccMask = if (eccBits == subWordBits) Seq(true.B) else dataArb.io.out.bits.eccMask.asBools + val wMask = if (nWays == 1) dataEccMask else (0 until nWays).flatMap(i => dataEccMask.map(_ && dataArb.io.out.bits.way_en(i))) + val wWords = grouped(dataArb.io.out.bits.wdata, encBits * (subWordBits / eccBits)) + val addr = (dataArb.io.out.bits.addr >> rowOffBits).asUInt + val wData = VecInit(grouped(wWords(i), encBits)) + val wMaskSlice: Seq[Bool] = (0 until wMask.size) + .filter(j => i % (wordBytes * 8 / subWordBits) == (j % (wordBytes / eccBytes)) / (subWordBytes / eccBytes)) + .map(wMask(_)) + array.readwritePorts.foreach {arrayPort => + arrayPort.enable := valid + arrayPort.isWrite := dataArb.io.out.bits.write + arrayPort.address := addr + arrayPort.writeData := VecInit((0 until nWays).flatMap(i => wData)) + arrayPort.mask.foreach(_ := VecInit(wMaskSlice)) + } + val data: Vec[UInt] = array.readwritePorts.head.readData + // data.grouped(subWordBits / eccBits).map(_.asUInt).toSeq + grouped(data, subWordBits / eccBits).map(_.asUInt) + } + // (io.resp.zip(rdata.transpose)).foreach { case (resp, data) => resp := data.asUInt } + val rdata = readData.transpose.map(ds => VecInit(ds).asUInt) + + val release_queue_empty =Wire(Bool()) + + val s1_valid = RegNext(io.cpu.req.fire, false.B) + val releaseAddress = RegInit(0.U(parameter.paddrBits.W)) + val s1_nack = WireDefault(false.B) + val s1_valid_masked = s1_valid && !io.cpu.s1_kill + val s1_valid_not_nacked = s1_valid && !s1_nack + val s0_clk_en = metaArb.io.out.valid && !metaArb.io.out.bits.write + + val s0_req = WireInit(io.cpu.req.bits) + s0_req.addr := Cat(metaArb.io.out.bits.addr >> blockOffBits, io.cpu.req.bits.addr(blockOffBits - 1, 0)) + s0_req.idx.foreach(_ := Cat(metaArb.io.out.bits.idx, s0_req.addr(blockOffBits - 1, 0))) + when(!metaArb.io.in(7).ready) { s0_req.phys := true.B } + val s1_req = RegEnable(s0_req, s0_clk_en) + val s1_vaddr = Cat(s1_req.idx.getOrElse(s1_req.addr) >> tagLSB, s1_req.addr(tagLSB - 1, 0)) + + val s0_tlb_req: TLBReq = Wire(new TLBReq(paddrBits, vaddrBitsExtended)) + s0_tlb_req.passthrough := s0_req.phys + s0_tlb_req.vaddr := s0_req.addr + s0_tlb_req.size := s0_req.size + s0_tlb_req.cmd := s0_req.cmd + s0_tlb_req.prv := s0_req.dprv + s0_tlb_req.v := s0_req.dv + val s1_tlb_req = RegEnable(s0_tlb_req, s0_clk_en) + + val s1_read = isRead(s1_req.cmd) + val s1_write = isWrite(s1_req.cmd) + val s1_readwrite = s1_read || s1_write + val s1_sfence = s1_req.cmd === M_SFENCE || s1_req.cmd === M_HFENCEV || s1_req.cmd === M_HFENCEG + val s1_flush_line = s1_req.cmd === M_FLUSH_ALL && s1_req.size(0) + val s1_flush_valid = Reg(Bool()) + val s1_waw_hazard = Wire(Bool()) + + val s_ready :: s_voluntary_writeback :: s_voluntary_write_meta :: s_voluntary_aw :: Nil = Enum(4) + val supports_flush = outer.flushOnFenceI || coreParams.haveCFlush + val flushed = RegInit(true.B) + val flushing = RegInit(false.B) + val flushing_req = Reg(chiselTypeOf(s1_req)) + val cached_grant_wait = RegInit(false.B) + val resetting = RegInit(false.B) + val flushCounter = RegInit((nSets * (nWays - 1)).U(log2Ceil(nSets * nWays).W)) + val release_ack_wait = RegInit(false.B) + val release_ack_addr = Reg(UInt(paddrBits.W)) + val release_state = RegInit(s_ready) + val refill_way = Reg(UInt()) + val any_pstore_valid = Wire(Bool()) + def isOneOf(x: UInt, s: Seq[UInt]): Bool = VecInit(s.map(x === _)).asUInt.orR + + val inWriteback = release_state === s_voluntary_writeback + val awState = release_state === s_voluntary_aw + val releaseWay = Wire(UInt()) + io.cpu.req.ready := (release_state === s_ready) && !cached_grant_wait && !s1_nack + release_queue_empty := release_state =/= s_voluntary_writeback + + // I/O MSHRs + val uncachedInFlight = RegInit(VecInit(Seq.fill(maxUncachedInFlight)(false.B))) + val uncachedReqs = Reg(Vec(maxUncachedInFlight, new HellaCacheReq( + coreMaxAddrBits, + usingVM, + untagBits, + pgIdxBits, + dcacheReqTagBits, + dcacheArbPorts, + coreDataBytes))) + val uncachedResp = WireInit(new HellaCacheReq( + coreMaxAddrBits, + usingVM, + untagBits, + pgIdxBits, + dcacheReqTagBits, + dcacheArbPorts, + coreDataBytes + ), DontCare) + + // hit initiation path + val s0_read = isRead(io.cpu.req.bits.cmd) + dataArb.io.in(3).valid := io.cpu.req.valid && likelyNeedsRead(io.cpu.req.bits) + dataArb.io.in(3).bits := dataArb.io.in(1).bits + dataArb.io.in(3).bits.write := false.B + dataArb.io.in(3).bits.addr := Cat( + io.cpu.req.bits.idx.getOrElse(io.cpu.req.bits.addr) >> tagLSB, + io.cpu.req.bits.addr(tagLSB - 1, 0) + ) + dataArb.io.in(3).bits.wordMask := { + val mask = (log2Ceil(subWordBytes) until rowOffBits).foldLeft(1.U) { + case (in, i) => + val upper_mask = Mux( + (i >= log2Ceil(wordBytes)).B || io.cpu.req.bits.size <= i.U, + 0.U, + ((BigInt(1) << (1 << (i - log2Ceil(subWordBytes)))) - 1).U + ) + val upper = Mux(io.cpu.req.bits.addr(i), in, 0.U) | upper_mask + val lower = Mux(io.cpu.req.bits.addr(i), 0.U, in) + upper ## lower + } + Fill(subWordBytes / eccBytes, mask) + } + dataArb.io.in(3).bits.eccMask := ~0.U((wordBytes / eccBytes).W) + dataArb.io.in(3).bits.way_en := ~0.U(nWays.W) + when(!dataArb.io.in(3).ready && s0_read) { io.cpu.req.ready := false.B } + val s1_did_read = RegEnable(dataArb.io.in(3).ready && (io.cpu.req.valid && needsRead(io.cpu.req.bits)), s0_clk_en) + val s1_read_mask = RegEnable(dataArb.io.in(3).bits.wordMask, s0_clk_en) + metaArb.io.in(7).valid := io.cpu.req.valid + metaArb.io.in(7).bits.write := false.B + metaArb.io.in(7).bits.idx := dataArb.io.in(3).bits.addr(idxMSB, idxLSB) + metaArb.io.in(7).bits.addr := io.cpu.req.bits.addr + metaArb.io.in(7).bits.way_en := metaArb.io.in(4).bits.way_en + metaArb.io.in(7).bits.data := metaArb.io.in(4).bits.data + when(!metaArb.io.in(7).ready) { io.cpu.req.ready := false.B } + + // address translation + val s1_cmd_uses_tlb = s1_readwrite || s1_flush_line || s1_req.cmd === M_WOK + io.ptw <> tlb.io.ptw + tlb.io.kill := io.cpu.s2_kill + tlb.io.req.valid := s1_valid && !io.cpu.s1_kill && s1_cmd_uses_tlb + tlb.io.req.bits := s1_tlb_req + when(!tlb.io.req.ready && !tlb.io.ptw.resp.valid && !io.cpu.req.bits.phys) { io.cpu.req.ready := false.B } + when(s1_valid && s1_cmd_uses_tlb && tlb.io.resp.miss) { s1_nack := true.B } + + tlb.io.sfence.valid := s1_valid && !io.cpu.s1_kill && s1_sfence + tlb.io.sfence.bits.rs1 := s1_req.size(0) + tlb.io.sfence.bits.rs2 := s1_req.size(1) + tlb.io.sfence.bits.asid := io.cpu.s1_data.data + tlb.io.sfence.bits.addr := s1_req.addr + tlb.io.sfence.bits.hv := s1_req.cmd === M_HFENCEV + tlb.io.sfence.bits.hg := s1_req.cmd === M_HFENCEG + + val s1_paddr = Cat(tlb.io.resp.paddr >> pgIdxBits, s1_req.addr(pgIdxBits - 1, 0)) + + // pma_checker.io.req.bits.passthrough := true.B + // pma_checker.io.req.bits.vaddr := s1_req.addr + // pma_checker.io.req.bits.size := s1_req.size + // pma_checker.io.req.bits.cmd := s1_req.cmd + // pma_checker.io.req.bits.prv := s1_req.dprv + // pma_checker.io.req.bits.v := s1_req.dv + // todo: uncertain + pma_checker.io.paddr := s1_paddr + val s1_victim_way = Wire(UInt()) + val (s1_hit_way, s1_hit_state, s1_meta) = + if (usingDataScratchpad) { + val baseAddr: UInt = parameter.scratch.getOrElse(BigInt(0)).U + val inScratchpad = s1_paddr >= baseAddr && s1_paddr < baseAddr + (nSets * cacheBlockBytes).U + val hitState = Mux(inScratchpad, ClientMetadata(3.U), ClientMetadata(0.U)) + val dummyMeta = L1Metadata(0.U, ClientMetadata(0.U)) + (inScratchpad, hitState, Seq(tECC.encode(dummyMeta.asUInt))) + } else { + val metaReq = metaArb.io.out + val metaIdx = metaReq.bits.idx + val wmask = if (nWays == 1) Seq(true.B) else metaReq.bits.way_en.asBools + tag_array.readwritePorts.foreach { tagPort => + tagPort.enable := metaReq.valid + tagPort.isWrite := metaReq.bits.write + tagPort.address := metaIdx + tagPort.writeData := VecInit(Seq.fill(nWays)(metaReq.bits.data)) + tagPort.mask.foreach(_ := VecInit(wmask)) + } + val s1_meta: Seq[UInt] = tag_array.readwritePorts.head.readData + val s1_meta_uncorrected: Seq[L1Metadata] = s1_meta.map(tECC.decode(_).uncorrected.asTypeOf(new L1Metadata(tagBits))) + val s1_tag: UInt = s1_paddr >> tagLSB + val s1_meta_hit_way = VecInit(s1_meta_uncorrected.map(r => ClientMetadata.isValid(r.coh) && r.tag === s1_tag)).asUInt + val s1_meta_hit_state = (s1_meta_uncorrected + .map(r => Mux(r.tag === s1_tag && !s1_flush_valid, r.coh.asUInt, 0.U)) + .reduce(_ | _)) + .asTypeOf(chiselTypeOf(ClientMetadata(0.U))) + (s1_meta_hit_way, s1_meta_hit_state, s1_meta) + } + val s1_data_way = WireDefault(if (nWays == 1) 1.U else Mux(inWriteback, releaseWay, s1_hit_way)) +// val tl_d_data_encoded = Wire(chiselTypeOf(encodeData(tl_out.d.bits.data, false.B))) + val tl_d_data_encoded = Wire(chiselTypeOf(encodeData(io.loadStoreAXI.r.bits.data, false.B))) +// val s1_all_data_ways = VecInit(data.io.resp ++ (!cacheParams.separateUncachedResp).option(tl_d_data_encoded)) + val s1_all_data_ways: Vec[UInt] = VecInit(rdata ++ Option.when(!cacheParams.separateUncachedResp)(tl_d_data_encoded)) + val s1_mask_xwr = new StoreGen(s1_req.size, s1_req.addr, 0.U, wordBytes).mask + val s1_mask = Mux(s1_req.cmd === M_PWR, io.cpu.s1_data.mask, s1_mask_xwr) + // for partial writes, s1_data.mask must be a subset of s1_mask_xwr + assert(!(s1_valid_masked && s1_req.cmd === M_PWR) || (s1_mask_xwr | ~io.cpu.s1_data.mask).andR) + + val s2_valid = RegNext(s1_valid_masked && !s1_sfence, init = false.B) + val s2_valid_no_xcpt = s2_valid && !io.cpu.s2_xcpt.asUInt.orR + val releaseInFlight = release_state =/= s_ready + val s2_not_nacked_in_s1 = RegNext(!s1_nack) + val s2_valid_not_nacked_in_s1 = s2_valid && s2_not_nacked_in_s1 + val s2_valid_masked = s2_valid_no_xcpt && s2_not_nacked_in_s1 + val s2_valid_not_killed = s2_valid_masked && !io.cpu.s2_kill + val s2_req = Reg(chiselTypeOf(io.cpu.req.bits)) + val s2_cmd_flush_all = s2_req.cmd === M_FLUSH_ALL && !s2_req.size(0) + val s2_cmd_flush_line = s2_req.cmd === M_FLUSH_ALL && s2_req.size(0) + val s2_tlb_xcpt = Reg(chiselTypeOf(tlb.io.resp)) + val s2_pma = Reg(chiselTypeOf(tlb.io.resp)) + val s2_uncached_resp_addr = Reg(chiselTypeOf(s2_req.addr)) // should be DCE'd in synthesis + when(s1_valid_not_nacked || s1_flush_valid) { + s2_req := s1_req + s2_req.addr := s1_paddr + s2_tlb_xcpt := tlb.io.resp + s2_pma := tlb.io.resp + } + val s2_vaddr = Cat(RegEnable(s1_vaddr, s1_valid_not_nacked || s1_flush_valid) >> tagLSB, s2_req.addr(tagLSB - 1, 0)) + val s2_read = isRead(s2_req.cmd) + val s2_write = isWrite(s2_req.cmd) + val s2_readwrite = s2_read || s2_write + val s2_flush_valid_pre_tag_ecc = RegNext(s1_flush_valid) + val s1_meta_decoded = s1_meta.map(tECC.decode(_)) + val s1_meta_clk_en = s1_valid_not_nacked || s1_flush_valid + val s2_meta_correctable_errors = VecInit(s1_meta_decoded.map(m => RegEnable(m.correctable, s1_meta_clk_en))).asUInt + val s2_meta_uncorrectable_errors = VecInit(s1_meta_decoded.map(m => RegEnable(m.uncorrectable, s1_meta_clk_en))).asUInt + val s2_meta_error_uncorrectable = s2_meta_uncorrectable_errors.orR + val s2_meta_corrected = s1_meta_decoded.map(m => RegEnable(m.corrected, s1_meta_clk_en).asTypeOf(new L1Metadata(tagBits))) + val s2_meta_error = (s2_meta_uncorrectable_errors | s2_meta_correctable_errors).orR + val s2_flush_valid = s2_flush_valid_pre_tag_ecc && !s2_meta_error + val s2_data = { + val wordsPerRow = rowBits / subWordBits + val en = s1_valid || inWriteback || io.cpu.replay_next + val word_en = Mux(inWriteback, Fill(wordsPerRow, 1.U), Mux(s1_did_read, s1_read_mask, 0.U)) + val s1_way_words = s1_all_data_ways.map(grouped(_, dECC.width(eccBits) * (subWordBits / eccBits))) + if (cacheParams.pipelineWayMux) { + val s1_word_en = Mux(io.cpu.replay_next, 0.U, word_en) + VecInit(for (i <- 0 until wordsPerRow) yield { + val s2_way_en = RegEnable(Mux(s1_word_en(i), s1_data_way, 0.U), en) + val s2_way_words = (0 until nWays).map(j => RegEnable(s1_way_words(j)(i), en && word_en(i))) + (0 until nWays).map(j => Mux(s2_way_en(j), s2_way_words(j), 0.U)).reduce(_ | _) + }).asUInt + } else { + val s1_word_en = Mux( + !io.cpu.replay_next, + word_en, + UIntToOH(if (log2Ceil(rowBits / 8) == log2Ceil(wordBytes)) 0.U else uncachedResp.addr(log2Ceil(rowBits / 8) - 1, log2Ceil(wordBytes)), wordsPerRow) + ) + VecInit(for (i <- 0 until wordsPerRow) yield { + RegEnable(Mux1H(Mux(s1_word_en(i), s1_data_way, 0.U), s1_way_words.map(_(i))), en) + }).asUInt + } + } + val s2_hit_way = RegEnable(s1_hit_way, s1_valid_not_nacked) + val s2_hit_state: ClientMetadata = RegEnable(s1_hit_state, s1_valid_not_nacked || s1_flush_valid) + val s2_waw_hazard = RegEnable(s1_waw_hazard, s1_valid_not_nacked) + val s2_store_merge = Wire(Bool()) +// val s2_hit_valid = s2_hit_state.isValid() + val s2_hit_valid = s2_hit_state.state > 0.U + // No prob, so only D T N + // val (s2_hit, s2_grow_param, s2_new_hit_state) = s2_hit_state.onAccess(s2_req.cmd) + val s2_hit = s2_hit_valid + val nexState = Mux(s2_hit_state.state === 3.U || isWrite(s2_req.cmd), 3.U, 2.U) + val s2_new_hit_state: ClientMetadata = ClientMetadata(nexState) + val s2_data_decoded = decodeData(s2_data) + val s2_data_error = VecInit(s2_data_decoded.map(_.error)).asUInt.orR + val s2_data_error_uncorrectable = VecInit(s2_data_decoded.map(_.uncorrectable)).asUInt.orR + val s2_data_corrected = VecInit(s2_data_decoded.map(_.corrected): Seq[UInt]).asUInt + val s2_data_uncorrected = VecInit(s2_data_decoded.map(_.uncorrected): Seq[UInt]).asUInt + val s2_valid_hit_maybe_flush_pre_data_ecc_and_waw = s2_valid_masked && !s2_meta_error && s2_hit + val s2_no_alloc_hazard = + if (!usingVM || pgIdxBits >= untagBits) false.B + else { + // make sure that any in-flight non-allocating accesses are ordered before + // any allocating accesses. this can only happen if aliasing is possible. + val any_no_alloc_in_flight = Reg(Bool()) + when(!uncachedInFlight.asUInt.orR) { any_no_alloc_in_flight := false.B } + when(s2_valid && s2_req.no_alloc) { any_no_alloc_in_flight := true.B } + val s1_need_check = any_no_alloc_in_flight || s2_valid && s2_req.no_alloc + + val concerns = (uncachedInFlight.zip(uncachedReqs)) :+ (s2_valid && s2_req.no_alloc, s2_req) + val s1_uncached_hits = VecInit(concerns.map { c => + val concern_wmask = new StoreGen(c._2.size, c._2.addr, 0.U, wordBytes).mask + val addr_match = (c._2.addr ^ s1_paddr)(pgIdxBits + pgLevelBits - 1, log2Ceil(wordBytes)) === 0.U + val mask_match = (concern_wmask & s1_mask_xwr).orR || c._2.cmd === M_PWR || s1_req.cmd === M_PWR + val cmd_match = isWrite(c._2.cmd) || isWrite(s1_req.cmd) + c._1 && s1_need_check && cmd_match && addr_match && mask_match + }) + + val s2_uncached_hits = RegEnable(s1_uncached_hits.asUInt, s1_valid_not_nacked) + s2_uncached_hits.orR + } + val s2_valid_hit_pre_data_ecc_and_waw = + s2_valid_hit_maybe_flush_pre_data_ecc_and_waw && s2_readwrite && !s2_no_alloc_hazard + val s2_valid_flush_line = s2_valid_hit_maybe_flush_pre_data_ecc_and_waw && s2_cmd_flush_line + val s2_valid_hit_pre_data_ecc = s2_valid_hit_pre_data_ecc_and_waw && (!s2_waw_hazard || s2_store_merge) + val s2_valid_data_error = s2_valid_hit_pre_data_ecc_and_waw && s2_data_error + val s2_valid_hit = s2_valid_hit_pre_data_ecc && !s2_data_error + val s2_valid_miss = s2_valid_masked && s2_readwrite && !s2_meta_error && !s2_hit + val s2_uncached = !s2_pma.cacheable || s2_req.no_alloc && !s2_pma.must_alloc && !s2_hit_valid + val s2_valid_cached_miss = s2_valid_miss && !s2_uncached && !uncachedInFlight.asUInt.orR + dontTouch(s2_valid_cached_miss) + val s2_want_victimize = + (!usingDataScratchpad).B && (s2_valid_cached_miss || s2_valid_flush_line || s2_valid_data_error || s2_flush_valid) + val s2_cannot_victimize = !s2_flush_valid && io.cpu.s2_kill + val s2_victimize = s2_want_victimize && !s2_cannot_victimize + val s2_valid_uncached_pending = s2_valid_miss && s2_uncached && !uncachedInFlight.asUInt.andR + val s2_victim_way = UIntToOH(RegEnable(s1_victim_way, s1_valid_not_nacked || s1_flush_valid)) + val s2_victim_or_hit_way = Mux(s2_hit_valid, s2_hit_way, s2_victim_way) + val s2_victim_tag = Mux( + s2_valid_data_error || s2_valid_flush_line, + s2_req.addr(paddrBits - 1, tagLSB), + Mux1H(s2_victim_way, s2_meta_corrected).tag + ) + val s2_victim_state: ClientMetadata = Mux(s2_hit_valid, s2_hit_state, Mux1H(s2_victim_way, s2_meta_corrected).coh) + + val s2_victim_dirty = s2_victim_state.state === 3.U + dontTouch(s2_victim_dirty) + val s2_update_meta = s2_hit_state.state =/= s2_new_hit_state.state + val s2_dont_nack_uncached = s2_valid_uncached_pending && io.loadStoreAXI.aw.ready + val s2_dont_nack_misc = s2_valid_masked && !s2_meta_error && + (supports_flush.B && s2_cmd_flush_all && flushed && !flushing || + supports_flush.B && s2_cmd_flush_line && !s2_hit || + s2_req.cmd === M_WOK) + io.cpu.s2_nack := s2_valid_no_xcpt && !s2_dont_nack_uncached && !s2_dont_nack_misc && !s2_valid_hit + when(io.cpu.s2_nack || (s2_valid_hit_pre_data_ecc_and_waw && s2_update_meta)) { s1_nack := true.B } + + // tag updates on ECC errors + val s2_first_meta_corrected = PriorityMux(s2_meta_correctable_errors, s2_meta_corrected) + metaArb.io.in(1).valid := s2_meta_error && (s2_valid_masked || s2_flush_valid_pre_tag_ecc) + metaArb.io.in(1).bits.write := true.B + metaArb.io.in(1).bits.way_en := s2_meta_uncorrectable_errors | Mux( + s2_meta_error_uncorrectable, + 0.U, + PriorityEncoderOH(s2_meta_correctable_errors) + ) + + // tag updates on hit + metaArb.io.in(2).valid := s2_valid_hit_pre_data_ecc_and_waw && s2_update_meta + metaArb.io.in(2).bits.write := !io.cpu.s2_kill + metaArb.io.in(2).bits.way_en := s2_victim_or_hit_way + metaArb.io.in(2).bits.idx := s2_vaddr(idxMSB, idxLSB) + metaArb.io.in(2).bits.addr := Cat(io.cpu.req.bits.addr >> untagBits, s2_vaddr(idxMSB, 0)) + metaArb.io.in(2).bits.data := tECC.encode(L1Metadata(s2_req.addr >> tagLSB, s2_new_hit_state).asUInt) + + // load reservations and TL error reporting + val s2_lr = (usingAtomics && !usingDataScratchpad).B && s2_req.cmd === M_XLR + val s2_sc = (usingAtomics && !usingDataScratchpad).B && s2_req.cmd === M_XSC + val lrscCount = RegInit(0.U) + val lrscValid = lrscCount > lrscBackoff.U + val lrscBackingOff = lrscCount > 0.U && !lrscValid + val lrscAddr = Reg(UInt()) + val lrscAddrMatch = lrscAddr === (s2_req.addr >> blockOffBits) + val s2_sc_fail = s2_sc && !(lrscValid && lrscAddrMatch) + when((s2_valid_hit && s2_lr && !cached_grant_wait || s2_valid_cached_miss) && !io.cpu.s2_kill) { + lrscCount := Mux(s2_hit, (lrscCycles - 1).U, 0.U) + lrscAddr := s2_req.addr >> blockOffBits + } + when(lrscCount > 0.U) { lrscCount := lrscCount - 1.U } + when(s2_valid_not_killed && lrscValid) { lrscCount := lrscBackoff.U } + + // don't perform data correction if it might clobber a recent store + val s2_correct = + s2_data_error && !any_pstore_valid && !RegNext(any_pstore_valid || s2_valid) && usingDataScratchpad.B + // pending store buffer + val s2_valid_correct = s2_valid_hit_pre_data_ecc_and_waw && s2_correct && !io.cpu.s2_kill + def s2_store_valid_pre_kill = s2_valid_hit && s2_write && !s2_sc_fail + def s2_store_valid = s2_store_valid_pre_kill && !io.cpu.s2_kill + val pstore1_cmd = RegEnable(s1_req.cmd, s1_valid_not_nacked && s1_write) + val pstore1_addr = RegEnable(s1_vaddr, s1_valid_not_nacked && s1_write) + val pstore1_data = RegEnable(io.cpu.s1_data.data, s1_valid_not_nacked && s1_write) + val pstore1_way = RegEnable(s1_hit_way, s1_valid_not_nacked && s1_write) + val pstore1_mask = RegEnable(s1_mask, s1_valid_not_nacked && s1_write) + val pstore1_storegen_data = WireDefault(pstore1_data) + val pstore1_rmw = usingRMW.B && RegEnable(needsRead(s1_req), s1_valid_not_nacked && s1_write) + val pstore1_merge_likely = s2_valid_not_nacked_in_s1 && s2_write && s2_store_merge + val pstore1_merge = s2_store_valid && s2_store_merge + val pstore2_valid = RegInit(false.B) + val pstore_drain_opportunistic = + !(io.cpu.req.valid && likelyNeedsRead(io.cpu.req.bits)) && !(s1_valid && s1_waw_hazard) + val pstore_drain_on_miss = releaseInFlight || RegNext(io.cpu.s2_nack) + val pstore1_held = RegInit(false.B) + val pstore1_valid_likely = s2_valid && s2_write || pstore1_held + def pstore1_valid_not_rmw(s2_kill: Bool) = s2_valid_hit_pre_data_ecc && s2_write && !s2_kill || pstore1_held + val pstore1_valid = s2_store_valid || pstore1_held + any_pstore_valid := pstore1_held || pstore2_valid + val pstore_drain_structural = pstore1_valid_likely && pstore2_valid && ((s1_valid && s1_write) || pstore1_rmw) + assert(pstore1_rmw || pstore1_valid_not_rmw(io.cpu.s2_kill) === pstore1_valid) + ccover(pstore_drain_structural, "STORE_STRUCTURAL_HAZARD", "D$ read-modify-write structural hazard") + ccover(pstore1_valid && pstore_drain_on_miss, "STORE_DRAIN_ON_MISS", "D$ store buffer drain on miss") + ccover(s1_valid_not_nacked && s1_waw_hazard, "WAW_HAZARD", "D$ write-after-write hazard") + def should_pstore_drain(truly: Bool) = { + val s2_kill = truly && io.cpu.s2_kill + !pstore1_merge_likely && + (usingRMW.B && pstore_drain_structural || + (((pstore1_valid_not_rmw( + s2_kill + ) && !pstore1_rmw) || pstore2_valid) && (pstore_drain_opportunistic || pstore_drain_on_miss))) + } + val pstore_drain = should_pstore_drain(true.B) + pstore1_held := (s2_store_valid && !s2_store_merge || pstore1_held) && pstore2_valid && !pstore_drain + val advance_pstore1 = (pstore1_valid || s2_valid_correct) && (pstore2_valid === pstore_drain) + pstore2_valid := pstore2_valid && !pstore_drain || advance_pstore1 + val pstore2_addr = RegEnable(Mux(s2_correct, s2_vaddr, pstore1_addr), advance_pstore1) + val pstore2_way = RegEnable(Mux(s2_correct, s2_hit_way, pstore1_way), advance_pstore1) + val pstore2_storegen_data = VecInit({ + for (i <- 0 until wordBytes) + yield RegEnable( + pstore1_storegen_data(8 * (i + 1) - 1, 8 * i), + advance_pstore1 || pstore1_merge && pstore1_mask(i) + ) + }).asUInt + val pstore2_storegen_mask = { + val mask = Reg(UInt(wordBytes.W)) + when(advance_pstore1 || pstore1_merge) { + val mergedMask = pstore1_mask | Mux(pstore1_merge, mask, 0.U) + mask := ~Mux(s2_correct, 0.U, ~mergedMask) + } + mask + } + s2_store_merge := (if (eccBytes == 1) false.B + else { + ccover(pstore1_merge, "STORE_MERGED", "D$ store merged") + // only merge stores to ECC granules that are already stored-to, to avoid + // WAW hazards + val wordMatch = (eccMask(pstore2_storegen_mask) | ~eccMask(pstore1_mask)).andR + val idxMatch = s2_vaddr(untagBits - 1, log2Ceil(wordBytes)) === pstore2_addr( + untagBits - 1, + log2Ceil(wordBytes) + ) + val tagMatch = (s2_hit_way & pstore2_way).orR + pstore2_valid && wordMatch && idxMatch && tagMatch + }) + dataArb.io.in(0).valid := should_pstore_drain(false.B) + dataArb.io.in(0).bits.write := pstore_drain + dataArb.io.in(0).bits.addr := Mux(pstore2_valid, pstore2_addr, pstore1_addr) + dataArb.io.in(0).bits.way_en := Mux(pstore2_valid, pstore2_way, pstore1_way) + dataArb.io.in(0).bits.wdata := encodeData( + Fill(rowWords, Mux(pstore2_valid, pstore2_storegen_data, pstore1_data)), + false.B + ) + dataArb.io.in(0).bits.wordMask := { + // val eccMask = dataArb.io.in(0).bits.eccMask.asBools.grouped(subWordBytes / eccBytes).map(_.orR).toSeq.asUInt + val eccMask = VecInit(grouped(VecInit(dataArb.io.in(0).bits.eccMask.asBools), subWordBytes / eccBytes).map(_.asUInt.orR)).asUInt + val wordMask = UIntToOH( + if (rowOffBits == log2Ceil(wordBytes)) 0.U + else Mux(pstore2_valid, pstore2_addr, pstore1_addr)(rowOffBits - 1, log2Ceil(wordBytes)) + ) + FillInterleaved(wordBytes / subWordBytes, wordMask) & Fill(rowBytes / wordBytes, eccMask) + } + dataArb.io.in(0).bits.eccMask := eccMask(Mux(pstore2_valid, pstore2_storegen_mask, pstore1_mask)) + + // store->load RAW hazard detection + def s1Depends(addr: UInt, mask: UInt) = + addr(idxMSB, wordOffBits) === s1_vaddr(idxMSB, wordOffBits) && + Mux(s1_write, (eccByteMask(mask) & eccByteMask(s1_mask_xwr)).orR, (mask & s1_mask_xwr).orR) + val s1_hazard = + (pstore1_valid_likely && s1Depends(pstore1_addr, pstore1_mask)) || + (pstore2_valid && s1Depends(pstore2_addr, pstore2_storegen_mask)) + val s1_raw_hazard = s1_read && s1_hazard + s1_waw_hazard := (if (eccBytes == 1) false.B + else { + ccover(s1_valid_not_nacked && s1_waw_hazard, "WAW_HAZARD", "D$ write-after-write hazard") + s1_write && (s1_hazard || needsRead(s1_req) && !s1_did_read) + }) + when(s1_valid && s1_raw_hazard) { s1_nack := true.B } + + // performance hints to processor + io.cpu.s2_nack_cause_raw := RegNext(s1_raw_hazard) || !(!s2_waw_hazard || s2_store_merge) + + // Prepare a TileLink request message that initiates a transaction + val a_source = PriorityEncoder(~uncachedInFlight.asUInt << mmioOffset) // skip the MSHR + val acquire_address = (s2_req.addr >> idxLSB) << idxLSB + val access_address = s2_req.addr + val a_size = s2_req.size + val a_data = Fill(beatWords, pstore1_data) + val a_mask = pstore1_mask << ((if (log2Ceil(beatBytes) == log2Ceil(wordBytes)) 0.U else access_address(log2Ceil(beatBytes) - 1, log2Ceil(wordBytes))) << 3) + val memAccessValid = !io.cpu.s2_kill && + (s2_valid_uncached_pending || + (s2_valid_cached_miss && + !(release_ack_wait && (s2_req.addr ^ release_ack_addr)( + ((pgIdxBits + pgLevelBits).min(paddrBits)) - 1, + idxLSB + ) === 0.U) && + (cacheParams.acquireBeforeRelease.B && !release_ack_wait && release_queue_empty || !s2_victim_dirty))) + // !s2_uncached -> read cache line + val accessWillRead: Bool = !s2_uncached || !s2_write + // If no managers support atomics, assert fail if processor asks for them + assert(!(memAccessValid && s2_read && s2_write && s2_uncached)) + io.loadStoreAXI.ar.valid := memAccessValid && accessWillRead + io.loadStoreAXI.ar.bits := DontCare + io.loadStoreAXI.ar.bits.burst := 1.U + io.loadStoreAXI.ar.bits.addr := Mux( + s2_uncached, + access_address, + access_address >> parameter.lgCacheBlockBytes << parameter.lgCacheBlockBytes + ) + io.loadStoreAXI.ar.bits.len := Mux( + s2_uncached, + 0.U, + (parameter.cacheBlockBytes * 8 / parameter.loadStoreParameter.dataWidth - 1).U + ) + io.loadStoreAXI.ar.bits.size := Mux(s2_uncached, a_size, parameter.lgCacheBlockBytes.U) + io.loadStoreAXI.ar.bits.id := a_source + io.loadStoreAXI.ar.bits.user := s2_uncached + + io.loadStoreAXI.aw.valid := memAccessValid && !accessWillRead + io.loadStoreAXI.aw.bits := DontCare + io.loadStoreAXI.aw.bits.burst := 1.U + io.loadStoreAXI.aw.bits.addr := access_address + io.loadStoreAXI.aw.bits.len := 0.U + io.loadStoreAXI.aw.bits.size := a_size + + val dataQueue: Queue[W] = Module(new Queue(chiselTypeOf(io.loadStoreAXI.w.bits), cacheDataBeats)) + dataQueue.io.enq.valid := memAccessValid && !accessWillRead + dataQueue.io.enq.bits.data := a_data + dataQueue.io.enq.bits.strb := a_mask + dataQueue.io.enq.bits.last := true.B + dataQueue.io.enq.bits.user := true.B // always uc + io.loadStoreAXI.w <> dataQueue.io.deq + +// // Drive APROT Bits +// tl_out_a.bits.user.lift(AMBAProt).foreach { x => +// val user_bit_cacheable = s2_pma.cacheable +// +// x.privileged := s2_req.dprv === PRV.M.U || user_bit_cacheable +// // if the address is cacheable, enable outer caches +// x.bufferable := user_bit_cacheable +// x.modifiable := user_bit_cacheable +// x.readalloc := user_bit_cacheable +// x.writealloc := user_bit_cacheable +// +// // Following are always tied off +// x.fetch := false.B +// x.secure := true.B +// } + + // Set pending bits for outstanding TileLink transaction + val a_sel = UIntToOH(a_source, maxUncachedInFlight + mmioOffset) >> mmioOffset + when(io.loadStoreAXI.ar.fire || io.loadStoreAXI.aw.fire) { + when(s2_uncached) { + (a_sel.asBools.zip(uncachedInFlight.zip(uncachedReqs))).foreach { + case (s, (f, r)) => + when(s) { + f := true.B + r := s2_req + r.cmd := Mux(s2_write, Mux(s2_req.cmd === M_PWR, M_PWR, M_XWR), M_XRD) + } + } + }.otherwise { + cached_grant_wait := true.B + refill_way := s2_victim_or_hit_way + } + } + + def axiHelper(x: AXI4ChiselBundle, fire: Bool): (Bool, Bool, Bool, UInt) = { + // same as len + val count = RegInit(0.U(8.W)) + val first = count === 0.U + val last: Bool = x match { + case r: R => r.last + case w: W => w.last + case _ => true.B + } + val done = last && fire + when(fire) { + count := Mux(last, 0.U, count + 1.U) + } + (first, last, done, count) + } + + // grant + val (d_first, d_last, d_done, d_refill_count) = axiHelper(io.loadStoreAXI.r.bits, io.loadStoreAXI.r.fire) +// val (d_opc, grantIsUncached, grantIsUncachedData) = { +// val uncachedGrantOpcodesSansData = Seq(AccessAck, HintAck) +// val uncachedGrantOpcodesWithData = Seq(AccessAckData) +// val uncachedGrantOpcodes = uncachedGrantOpcodesWithData ++ uncachedGrantOpcodesSansData +// val whole_opc = tl_out.d.bits.opcode +// if (usingDataScratchpad) { +// assert(!tl_out.d.valid || whole_opc.isOneOf(uncachedGrantOpcodes)) +// // the only valid TL-D messages are uncached, so we can do some pruning +// val opc = whole_opc(uncachedGrantOpcodes.map(_.getWidth).max - 1, 0) +// val data = DecodeLogic(opc, uncachedGrantOpcodesWithData, uncachedGrantOpcodesSansData) +// (opc, true.B, data) +// } else { +// (whole_opc, whole_opc.isOneOf(uncachedGrantOpcodes), whole_opc.isOneOf(uncachedGrantOpcodesWithData)) +// } +// } + tl_d_data_encoded := encodeData( + io.loadStoreAXI.r.bits.data, + // tl_out.d.bits.corrupt && !io.ptw.customCSRs.suppressCorruptOnGrantData && !grantIsUncached + false.B + ) + val grantIsUncachedData = io.loadStoreAXI.r.bits.user(0) + val grantIsCached = !io.loadStoreAXI.r.bits.user(0) + val grantIsRefill = grantIsCached // Writes the data array + val grantInProgress = RegInit(false.B) + val blockProbeAfterGrantCount = RegInit(0.U) + when(blockProbeAfterGrantCount > 0.U) { blockProbeAfterGrantCount := blockProbeAfterGrantCount - 1.U } + // !release_state.isOneOf(s_voluntary_writeback, s_voluntary_write_meta, s_voluntary_aw) + val canAcceptCachedGrant = !Seq(s_voluntary_writeback, s_voluntary_write_meta, s_voluntary_aw).map(_ === release_state).reduce(_ || _) + io.loadStoreAXI.r.ready := Mux(grantIsCached, canAcceptCachedGrant, true.B) + val uncachedRespIdxOH = (UIntToOH(io.loadStoreAXI.r.bits.id, maxUncachedInFlight + mmioOffset) >> mmioOffset).asUInt + uncachedResp := Mux1H(uncachedRespIdxOH, uncachedReqs) + when(io.loadStoreAXI.r.fire) { + when(grantIsCached) { + grantInProgress := true.B + assert(cached_grant_wait, "A GrantData was unexpected by the dcache.") + when(d_last) { + cached_grant_wait := false.B + grantInProgress := false.B + blockProbeAfterGrantCount := (blockProbeAfterGrantCycles - 1).U + replacer.miss + } + }.otherwise { + (uncachedRespIdxOH.asBools.zip(uncachedInFlight)).foreach { + case (s, f) => + when(s && d_last) { + assert(f, "An AccessAck was unexpected by the dcache.") // TODO must handle Ack coming back on same cycle! + f := false.B + } + } + // r always has data + if (!cacheParams.separateUncachedResp) { + if (!cacheParams.pipelineWayMux) + s1_data_way := 1.U << nWays + s2_req.cmd := M_XRD + s2_req.size := uncachedResp.size + s2_req.signed := uncachedResp.signed + s2_req.tag := uncachedResp.tag + s2_req.addr := { + require(rowOffBits >= beatOffBits) + val dontCareBits = s1_paddr >> rowOffBits << rowOffBits + dontCareBits | uncachedResp.addr(beatOffBits - 1, 0) + } + s2_uncached_resp_addr := uncachedResp.addr + } + } + } + + io.loadStoreAXI.b.ready := true.B + when(io.loadStoreAXI.b.fire) { + assert( + release_ack_wait, + "A ReleaseAck was unexpected by the dcache." + ) // TODO should handle Ack coming back on same cycle! + release_ack_wait := false.B + } + + // Finish TileLink transaction by issuing a GrantAck + // tl_out.e.valid := tl_out.d.valid && d_first && grantIsCached && canAcceptCachedGrant + // tl_out.e.bits := edge.GrantAck(tl_out.d.bits) + // assert(tl_out.e.fire === (tl_out.d.fire && d_first && grantIsCached)) + + // data refill + // note this ready-valid signaling ignores E-channel backpressure, which + // benignly means the data RAM might occasionally be redundantly written + dataArb.io.in(1).valid := io.loadStoreAXI.r.valid && grantIsRefill && canAcceptCachedGrant + when(grantIsRefill && !dataArb.io.in(1).ready) { + // tl_out.e.valid := false.B + // tl_out.d.ready := false.B + io.loadStoreAXI.r.ready := false.B + } + if (!usingDataScratchpad) { + dataArb.io.in(1).bits.write := true.B + dataArb.io.in(1).bits.addr := + (s2_vaddr >> idxLSB) << idxLSB | + (d_refill_count << log2Ceil(parameter.loadStoreParameter.dataWidth / 8)) + dataArb.io.in(1).bits.way_en := refill_way + dataArb.io.in(1).bits.wdata := tl_d_data_encoded + dataArb.io.in(1).bits.wordMask := ~0.U((rowBytes / subWordBytes).W) + dataArb.io.in(1).bits.eccMask := ~0.U((wordBytes / eccBytes).W) + } else { + dataArb.io.in(1).bits := dataArb.io.in(0).bits + } + + // tag updates on refill + // ignore backpressure from metaArb, which can only be caused by tag ECC + // errors on hit-under-miss. failing to write the new tag will leave the + // line invalid, so we'll simply request the line again later. +// metaArb.io.in(3).valid := grantIsCached && d_done && !tl_out.d.bits.denied + metaArb.io.in(3).valid := grantIsCached && d_done + metaArb.io.in(3).bits.write := true.B + metaArb.io.in(3).bits.way_en := refill_way + metaArb.io.in(3).bits.idx := s2_vaddr(idxMSB, idxLSB) + metaArb.io.in(3).bits.addr := Cat(io.cpu.req.bits.addr >> untagBits, s2_vaddr(idxMSB, 0)) + metaArb.io.in(3).bits.data := tECC.encode( + L1Metadata(s2_req.addr >> tagLSB, s2_new_hit_state).asUInt + ) + + if (!cacheParams.separateUncachedResp) { + // don't accept uncached grants if there's a structural hazard on s2_data... + val blockUncachedGrant = Reg(Bool()) + blockUncachedGrant := dataArb.io.out.valid + when(grantIsUncachedData && (blockUncachedGrant || s1_valid)) { + io.loadStoreAXI.r.ready := false.B + // ...but insert bubble to guarantee grant's eventual forward progress + when(io.loadStoreAXI.r.valid) { + io.cpu.req.ready := false.B + dataArb.io.in(1).valid := true.B + dataArb.io.in(1).bits.write := false.B + blockUncachedGrant := !dataArb.io.in(1).ready + } + } + } + ccover(io.loadStoreAXI.r.valid && !io.loadStoreAXI.r.ready, "BLOCK_D", "D$ D-channel blocked") + + // no probe + metaArb.io.in(6).valid := false.B + metaArb.io.in(6).bits := DontCare + + // replacement policy + s1_victim_way := (if (replacer.perSet && nWays > 1) { + val repl_array = Mem(nSets, UInt(replacer.nBits.W)) + val s1_repl_idx = s1_req.addr(idxBits + blockOffBits - 1, blockOffBits) + val s2_repl_idx = s2_vaddr(idxBits + blockOffBits - 1, blockOffBits) + val s2_repl_state = Reg(UInt(replacer.nBits.W)) + val s2_new_repl_state = replacer.get_next_state(s2_repl_state, OHToUInt(s2_hit_way)) + val s2_repl_wen = s2_valid_masked && s2_hit_way.orR && s2_repl_state =/= s2_new_repl_state + val s1_repl_state = + Mux(s2_repl_wen && s2_repl_idx === s1_repl_idx, s2_new_repl_state, repl_array(s1_repl_idx)) + when(s1_valid_not_nacked) { s2_repl_state := s1_repl_state } + + val waddr = Mux(resetting, flushCounter(idxBits - 1, 0), s2_repl_idx) + val wdata = Mux(resetting, 0.U, s2_new_repl_state) + val wen = resetting || s2_repl_wen + when(wen) { repl_array(waddr) := wdata } + + replacer.get_replace_way(s1_repl_state) + } else { + replacer.way + }) + + // release + val (_, _, releaseDone, c_count) = axiHelper(io.loadStoreAXI.w.bits, io.loadStoreAXI.w.fire) + val releaseRejected = Wire(Bool()) + val s1_release_data_valid = RegNext(dataArb.io.in(2).fire) + val s2_release_data_valid = RegNext(s1_release_data_valid && !releaseRejected) + releaseRejected := s2_release_data_valid && !io.loadStoreAXI.w.fire + val releaseDataBeat = + Cat(0.U, c_count) + Mux(releaseRejected, 0.U, s1_release_data_valid + Cat(0.U, s2_release_data_valid)) + val s1_release_last: Bool = RegEnable(releaseDataBeat === (refillCycles - 1).U, dataArb.io.in(2).fire) + val s2_release_last: Bool = RegEnable(s1_release_last, s1_release_data_valid && !releaseRejected) + + when(awState) { + io.loadStoreAXI.aw.valid := true.B + io.loadStoreAXI.aw.bits.addr := releaseAddress >> parameter.lgCacheBlockBytes << parameter.lgCacheBlockBytes + io.loadStoreAXI.aw.bits.len := (parameter.cacheBlockBytes * 8 / parameter.loadStoreParameter.dataWidth - 1).U + io.loadStoreAXI.aw.bits.size := parameter.lgCacheBlockBytes.U + io.loadStoreAXI.aw.bits.id := (mmioOffset - 1).U + } + + when(s2_release_data_valid) { + io.loadStoreAXI.w.valid := true.B + io.loadStoreAXI.w.bits := DontCare + io.loadStoreAXI.w.bits.data := s2_data_corrected + io.loadStoreAXI.w.bits.strb := (-1.S(io.loadStoreAXI.w.bits.strb.getWidth.W)).asUInt + io.loadStoreAXI.w.bits.last := s2_release_last + // tl_out_c.bits.corrupt := inWriteback && s2_data_error_uncorrectable + } + + val newCoh = ClientMetadata(0.U(2.W)) + releaseWay := s2_victim_or_hit_way + + if (!usingDataScratchpad) { + when(s2_victimize) { + assert(s2_valid_flush_line || s2_flush_valid || io.cpu.s2_nack) + val discard_line = s2_valid_flush_line && s2_req.size(1) || s2_flush_valid && flushing_req.size(1) + release_state := Mux( + s2_victim_dirty && !discard_line, + s_voluntary_aw, + s_voluntary_write_meta + ) + releaseAddress := Cat(s2_victim_tag, s2_req.addr(tagLSB - 1, idxLSB) << idxLSB) + } + + when(awState) { + when(io.loadStoreAXI.aw.ready) { + release_state := s_voluntary_writeback + release_ack_wait := true.B + release_ack_addr := releaseAddress + } + } + + when(release_state === s_voluntary_writeback) { + when(releaseDone) { release_state := s_voluntary_write_meta } + } + } + + dataArb.io.in(2).valid := inWriteback && releaseDataBeat < refillCycles.U + dataArb.io.in(2).bits := dataArb.io.in(1).bits + dataArb.io.in(2).bits.write := false.B + dataArb.io.in(2).bits.addr := (probeIdx(releaseAddress) << blockOffBits).asUInt | (releaseDataBeat( + log2Ceil(refillCycles) - 1, + 0 + ) << rowOffBits) + dataArb.io.in(2).bits.wordMask := ~0.U((rowBytes / subWordBytes).W) + dataArb.io.in(2).bits.eccMask := ~0.U((wordBytes / eccBytes).W) + dataArb.io.in(2).bits.way_en := ~0.U(nWays.W) + + metaArb.io.in(4).valid := release_state === s_voluntary_write_meta + metaArb.io.in(4).bits.write := true.B + metaArb.io.in(4).bits.way_en := releaseWay + metaArb.io.in(4).bits.idx := probeIdx(releaseAddress) + metaArb.io.in(4).bits.addr := Cat(io.cpu.req.bits.addr >> untagBits, releaseAddress(idxMSB, 0)) + metaArb.io.in(4).bits.data := tECC.encode(L1Metadata(releaseAddress >> tagLSB, newCoh).asUInt) + when(metaArb.io.in(4).fire) { release_state := s_ready } + + // cached response + (io.cpu.resp.bits: Data).waiveAll :<>= (s2_req: Data).waiveAll + io.cpu.resp.bits.has_data := s2_read + io.cpu.resp.bits.replay := false.B + io.cpu.s2_uncached := s2_uncached && !s2_hit + io.cpu.s2_paddr := s2_req.addr + io.cpu.s2_gpa := s2_tlb_xcpt.gpa + io.cpu.s2_gpa_is_pte := s2_tlb_xcpt.gpa_is_pte + + // report whether there are any outstanding accesses. disregard any + // slave-port accesses, since they don't affect local memory ordering. + val s1_isSlavePortAccess = s1_req.no_xcpt + val s2_isSlavePortAccess = s2_req.no_xcpt + io.cpu.ordered := !(s1_valid && !s1_isSlavePortAccess || s2_valid && !s2_isSlavePortAccess || cached_grant_wait || uncachedInFlight.asUInt.orR) + + val s1_xcpt_valid = tlb.io.req.valid && !s1_isSlavePortAccess && !s1_nack + io.cpu.s2_xcpt := Mux(RegNext(s1_xcpt_valid), s2_tlb_xcpt, 0.U.asTypeOf(s2_tlb_xcpt)) + + if (usingDataScratchpad) { + assert(!(s2_valid_masked && (s2_req.cmd === M_XLR || s2_req.cmd === M_XSC))) + } else { + // ccover(tl_out.b.valid && !tl_out.b.ready, "BLOCK_B", "D$ B-channel blocked") + } + + // uncached response + val s1_uncached_data_word = { + val word_idx = if(log2Ceil(rowBits / 8) == log2Ceil(wordBytes)) 0.U else uncachedResp.addr(log2Ceil(rowBits / 8) - 1, log2Ceil(wordBytes)) + val words: Seq[UInt] = grouped(io.loadStoreAXI.r.bits.data, wordBits) + Mux1H(UIntToOH(word_idx), words) + } + val s2_uncached_data_word = RegEnable(s1_uncached_data_word, io.cpu.replay_next) + val doUncachedResp = RegNext(io.cpu.replay_next) + io.cpu.resp.valid := (s2_valid_hit_pre_data_ecc || doUncachedResp) && !s2_data_error + io.cpu.replay_next := io.loadStoreAXI.r.fire && grantIsUncachedData && !cacheParams.separateUncachedResp.B + when(doUncachedResp) { + assert(!s2_valid_hit) + io.cpu.resp.bits.replay := true.B + io.cpu.resp.bits.addr := s2_uncached_resp_addr + } + + io.cpu.uncached_resp.map { resp => + resp.valid := io.loadStoreAXI.r.valid && grantIsUncachedData + resp.bits.tag := uncachedResp.tag + resp.bits.size := uncachedResp.size + resp.bits.signed := uncachedResp.signed + resp.bits.data := new LoadGen( + uncachedResp.size, + uncachedResp.signed, + uncachedResp.addr, + s1_uncached_data_word, + false.B, + wordBytes + ).data + resp.bits.data_raw := s1_uncached_data_word + when(grantIsUncachedData && !resp.ready) { + io.loadStoreAXI.r.ready := false.B + } + } + + // load data subword mux/sign extension + val s2_data_word = (0 until rowBits by wordBits).map(i => s2_data_uncorrected(wordBits + i - 1, i)).reduce(_ | _) + val s2_data_word_corrected = + (0 until rowBits by wordBits).map(i => s2_data_corrected(wordBits + i - 1, i)).reduce(_ | _) + val s2_data_word_possibly_uncached = + Mux(cacheParams.pipelineWayMux.B && doUncachedResp, s2_uncached_data_word, 0.U) | s2_data_word + val loadgen = new LoadGen(s2_req.size, s2_req.signed, s2_req.addr, s2_data_word_possibly_uncached, s2_sc, wordBytes) + io.cpu.resp.bits.data := loadgen.data | s2_sc_fail + io.cpu.resp.bits.data_word_bypass := loadgen.wordData + io.cpu.resp.bits.data_raw := s2_data_word + io.cpu.resp.bits.store_data := pstore1_data + + // AMOs + amoalus.map { amoalus => + amoalus.zipWithIndex.map { case(amoalu, i) => + amoalu.io.mask := pstore1_mask >> (i * (parameter.xLen / 8)) + amoalu.io.cmd := (if (usingAtomicsInCache) pstore1_cmd else M_XWR) + amoalu.io.lhs := s2_data_word >> (i * parameter.xLen) + amoalu.io.rhs := pstore1_data >> (i * parameter.xLen) + amoalu + } + pstore1_storegen_data := (if (!usingDataScratchpad) VecInit(amoalus.map(_.io.out)).asUInt + else { + val mask = FillInterleaved(8, Mux(s2_correct, 0.U, pstore1_mask)) + VecInit(amoalus.map(_.io.out_unmasked)).asUInt & mask | s2_data_word_corrected & ~mask + }) + }.getOrElse { + if (!usingAtomics) { + assert(!(s1_valid_masked && s1_read && s1_write), "unsupported D$ operation") + } + } + + // flushes + if (!usingDataScratchpad) + when(RegNext(reset.asBool)) { resetting := true.B } + val flushCounterNext = flushCounter +& 1.U + val flushDone = (flushCounterNext >> log2Ceil(nSets)) === nWays.U + val flushCounterWrap = flushCounterNext(log2Ceil(nSets) - 1, 0) + ccover( + s2_valid_masked && s2_cmd_flush_all && s2_meta_error, + "TAG_ECC_ERROR_DURING_FENCE_I", + "D$ ECC error in tag array during cache flush" + ) + ccover( + s2_valid_masked && s2_cmd_flush_all && s2_data_error, + "DATA_ECC_ERROR_DURING_FENCE_I", + "D$ ECC error in data array during cache flush" + ) + s1_flush_valid := metaArb.io + .in(5) + .fire && !s1_flush_valid && !s2_flush_valid_pre_tag_ecc && release_state === s_ready && !release_ack_wait + metaArb.io.in(5).valid := flushing && !flushed + metaArb.io.in(5).bits.write := false.B + metaArb.io.in(5).bits.idx := flushCounter(idxBits - 1, 0) + metaArb.io.in(5).bits.addr := Cat(io.cpu.req.bits.addr >> untagBits, metaArb.io.in(5).bits.idx << blockOffBits) + metaArb.io.in(5).bits.way_en := metaArb.io.in(4).bits.way_en + metaArb.io.in(5).bits.data := metaArb.io.in(4).bits.data + + // Only flush D$ on FENCE.I if some cached executable regions are untracked. + if (supports_flush) { + when(s2_valid_masked && s2_cmd_flush_all) { + when(!flushed && !io.cpu.s2_kill && !release_ack_wait && !uncachedInFlight.asUInt.orR) { + flushing := true.B + flushing_req := s2_req + } + } + + // when(tl_out_a.fire && !s2_uncached) { flushed := false.B } + when(io.loadStoreAXI.aw.fire && !s2_uncached) { flushed := false.B } + when(flushing) { + s1_victim_way := flushCounter >> log2Ceil(nSets) + when(s2_flush_valid) { + flushCounter := flushCounterNext + when(flushDone) { + flushed := true.B + if (!isPow2(nWays)) flushCounter := flushCounterWrap + } + } + when(flushed && release_state === s_ready && !release_ack_wait) { + flushing := false.B + } + } + } + metaArb.io.in(0).valid := resetting + metaArb.io.in(0).bits := metaArb.io.in(5).bits + metaArb.io.in(0).bits.write := true.B + metaArb.io.in(0).bits.way_en := ~0.U(nWays.W) + metaArb.io.in(0).bits.data := tECC.encode(L1Metadata(0.U, ClientMetadata(0.U)).asUInt) + when(resetting) { + flushCounter := flushCounterNext + when(flushDone) { + resetting := false.B + if (!isPow2(nWays)) flushCounter := flushCounterWrap + } + } + + // gate the clock + clock_en_reg := !cacheParams.clockGate.B || + //io.ptw.customCSRs.disableDCacheClockGate || // todo: customCSRs? + io.cpu.keep_clock_enabled || + metaArb.io.out.valid || // subsumes resetting || flushing + //s1Release || s2_release || + s1_valid || s2_valid || + // tlb_port.req.valid || + // s1_tlb_req_valid || s2_tlb_req_valid || + pstore1_held || pstore2_valid || + release_state =/= s_ready || + release_ack_wait || !release_queue_empty || + !tlb.io.req.ready || + cached_grant_wait || uncachedInFlight.asUInt.orR || + lrscCount > 0.U || blockProbeAfterGrantCount > 0.U + + // performance events + io.cpu.perf.acquire := io.loadStoreAXI.ar.fire + io.cpu.perf.release := releaseDone + io.cpu.perf.grant := d_done + io.cpu.perf.tlbMiss := io.ptw.req.fire + io.cpu.perf.storeBufferEmptyAfterLoad := !((s1_valid && s1_write) || + ((s2_valid && s2_write && !s2_waw_hazard) || pstore1_held) || + pstore2_valid) + io.cpu.perf.storeBufferEmptyAfterStore := !((s1_valid && s1_write) || + (s2_valid && s2_write && pstore1_rmw) || + ((s2_valid && s2_write && !s2_waw_hazard || pstore1_held) && pstore2_valid)) + io.cpu.perf.canAcceptStoreThenLoad := !(((s2_valid && s2_write && pstore1_rmw) && (s1_valid && s1_write && !s1_waw_hazard)) || + (pstore2_valid && pstore1_valid_likely && (s1_valid && s1_write))) + io.cpu.perf.canAcceptStoreThenRMW := io.cpu.perf.canAcceptStoreThenLoad && !pstore2_valid + io.cpu.perf.canAcceptLoadThenLoad := !((s1_valid && s1_write && needsRead( + s1_req + )) && ((s2_valid && s2_write && !s2_waw_hazard || pstore1_held) || pstore2_valid)) + io.cpu.perf.blocked := { + // stop reporting blocked just before unblocking to avoid overly conservative stalling + /*val beatsBeforeEnd = outer.crossing match { + case SynchronousCrossing(_) => 2 + case RationalCrossing(_) => 1 // assumes 1 < ratio <= 2; need more bookkeeping for optimal handling of >2 + case _: AsynchronousCrossing => 1 // likewise + case _: CreditedCrossing => 1 // likewise + } + val near_end_of_refill = + if (cacheBlockBytes / beatBytes <= beatsBeforeEnd) io.loadStoreAXI.r.valid + else { + val refill_count = RegInit(0.U((cacheBlockBytes / beatBytes).log2.W)) + when(io.loadStoreAXI.r.fire && grantIsRefill) { refill_count := refill_count + 1.U } + refill_count >= (cacheBlockBytes / beatBytes - beatsBeforeEnd).U + } + cached_grant_wait && !near_end_of_refill*/ + false.B // todo: axi grant wait? + } + + // report errors + val (data_error, data_error_uncorrectable, data_error_addr) = + if (usingDataScratchpad) (s2_valid_data_error, s2_data_error_uncorrectable, s2_req.addr) + else { + ( + RegNext(io.loadStoreAXI.w.fire && inWriteback && s2_data_error), + RegNext(s2_data_error_uncorrectable), + releaseAddress + ) // This is stable for a cycle after tl_out_c.fire, so don't need a register + } + { + val error_addr = + Mux( + metaArb.io.in(1).valid, + Cat(s2_first_meta_corrected.tag, metaArb.io.in(1).bits.addr(tagLSB - 1, idxLSB)), + data_error_addr >> idxLSB + ) << idxLSB + io.errors.uncorrectable.foreach { u => + u.valid := metaArb.io.in(1).valid && s2_meta_error_uncorrectable || data_error && data_error_uncorrectable + u.bits := error_addr + } + io.errors.correctable.foreach { c => + c.valid := metaArb.io.in(1).valid || data_error + c.bits := error_addr + io.errors.uncorrectable.foreach { u => when(u.valid) { c.valid := false.B } } + } + // io.errors.bus.valid := tl_out.d.fire && (tl_out.d.bits.denied || tl_out.d.bits.corrupt) + io.errors.bus.valid := false.B + io.errors.bus.bits := Mux(grantIsCached, s2_req.addr >> idxLSB << idxLSB, 0.U) + + ccoverNotScratchpad(io.errors.bus.valid && grantIsCached, "D_ERROR_CACHED", "D$ D-channel error, cached") + ccover(io.errors.bus.valid && !grantIsCached, "D_ERROR_UNCACHED", "D$ D-channel error, uncached") + } +// +// if (usingDataScratchpad) { +// val data_error_cover = Seq( +// property.CoverBoolean(!data_error, Seq("no_data_error")), +// property.CoverBoolean(data_error && !data_error_uncorrectable, Seq("data_correctable_error")), +// property.CoverBoolean(data_error && data_error_uncorrectable, Seq("data_uncorrectable_error")) +// ) +// val request_source = Seq( +// property.CoverBoolean(s2_isSlavePortAccess, Seq("from_TL")), +// property.CoverBoolean(!s2_isSlavePortAccess, Seq("from_CPU")) +// ) +// +// property.cover( +// new property.CrossProperty( +// Seq(data_error_cover, request_source), +// Seq(), +// "MemorySystem;;Scratchpad Memory Bit Flip Cross Covers" +// ) +// ) +// } else { +// +// val data_error_type = Seq( +// property.CoverBoolean(!s2_valid_data_error, Seq("no_data_error")), +// property.CoverBoolean(s2_valid_data_error && !s2_data_error_uncorrectable, Seq("data_correctable_error")), +// property.CoverBoolean(s2_valid_data_error && s2_data_error_uncorrectable, Seq("data_uncorrectable_error")) +// ) +// val data_error_dirty = Seq( +// property.CoverBoolean(!s2_victim_dirty, Seq("data_clean")), +// property.CoverBoolean(s2_victim_dirty, Seq("data_dirty")) +// ) +// val request_source = if (supports_flush) { +// Seq(property.CoverBoolean(!flushing, Seq("access")), property.CoverBoolean(flushing, Seq("during_flush"))) +// } else { +// Seq(property.CoverBoolean(true.B, Seq("never_flush"))) +// } +// val tag_error_cover = Seq( +// property.CoverBoolean(!s2_meta_error, Seq("no_tag_error")), +// property.CoverBoolean(s2_meta_error && !s2_meta_error_uncorrectable, Seq("tag_correctable_error")), +// property.CoverBoolean(s2_meta_error && s2_meta_error_uncorrectable, Seq("tag_uncorrectable_error")) +// ) +// property.cover( +// new property.CrossProperty( +// Seq(data_error_type, data_error_dirty, request_source, tag_error_cover), +// Seq(), +// "MemorySystem;;Cache Memory Bit Flip Cross Covers" +// ) +// ) +// } + + } // leaving gated-clock domain + val dcacheImpl = withClock(gated_clock) { new DCacheModuleImpl } + + def encodeData(x: UInt, poison: Bool) = + VecInit(grouped(x, eccBits).map(dECC.encode(_, if (dECC.canDetect) poison else false.B))).asUInt + def dummyEncodeData(x: UInt) = VecInit(grouped(x, eccBits).map(dECC.swizzle)).asUInt + def decodeData(x: UInt) = grouped(x, dECC.width(eccBits)).map(dECC.decode) + def eccMask(byteMask: UInt) = VecInit(grouped(byteMask, eccBytes).map(_.orR)).asUInt + def eccByteMask(byteMask: UInt) = FillInterleaved(eccBytes, eccMask(byteMask)) + + def likelyNeedsRead(req: HellaCacheReq): Bool = { + // req.cmd.isOneOf(M_XWR, M_PFW) + val res = !Seq(M_XWR, M_PFW).map(_ === req.cmd).reduce(_ ||_) || req.size < log2Ceil(eccBytes).U + assert(!needsRead(req) || res) + res + } + + def isRead(cmd: UInt) = Seq(M_XRD, M_XLR, M_XSC).map(_ === cmd).reduce(_ || _) + def isWrite(cmd: UInt) = cmd === M_XWR || cmd === M_PWR || cmd === M_XSC + def isWriteIntent(cmd: UInt) = isWrite(cmd) || cmd === M_PFW || cmd === M_XLR + + def needsRead(req: HellaCacheReq) = + isRead(req.cmd) || + (isWrite(req.cmd) && (req.cmd === M_PWR || req.size < log2Ceil(eccBytes).U)) + + def ccover(cond: Bool, label: String, desc: String)(implicit sourceInfo: SourceInfo) = {} + def ccoverNotScratchpad(cond: Bool, label: String, desc: String)(implicit sourceInfo: SourceInfo) = {} + + require( + !usingVM || tagLSB <= pgIdxBits, + s"D$$ set size must not exceed ${1 << (pgIdxBits - 10)} KiB; got ${(nSets * cacheBlockBytes) >> 10} KiB" + ) + def tagLSB: Int = untagBits + def probeIdx(b: UInt): UInt = b(idxMSB, idxLSB) +} + +class StoreGen(typ: UInt, addr: UInt, dat: UInt, maxSize: Int) { + val size = Wire(UInt(log2Ceil(log2Ceil(maxSize) + 1).W)) + size := typ + def misaligned: Bool = + (addr & ((1.U << size) - 1.U)(log2Ceil(maxSize) - 1, 0)).orR + + def mask = { + var res = 1.U + for (i <- 0 until log2Ceil(maxSize)) { + val upper = Mux(addr(i), res, 0.U) | Mux(size >= (i + 1).U, ((BigInt(1) << (1 << i)) - 1).U, 0.U) + val lower = Mux(addr(i), 0.U, res) + res = Cat(upper, lower) + } + res + } + + protected def genData(i: Int): UInt = + if (i >= log2Ceil(maxSize)) dat + else Mux(size === i.U, Fill(1 << (log2Ceil(maxSize) - i), dat((8 << i) - 1, 0)), genData(i + 1)) + + def data = genData(0) + def wordData = genData(2) +} + +class LoadGen(typ: UInt, signed: Bool, addr: UInt, dat: UInt, zero: Bool, maxSize: Int) { + private val size = new StoreGen(typ, addr, dat, maxSize).size + + private def genData(logMinSize: Int): UInt = { + var res = dat + for (i <- log2Ceil(maxSize) - 1 to logMinSize by -1) { + val pos = 8 << i + val shifted = Mux(addr(i), res(2 * pos - 1, pos), res(pos - 1, 0)) + val doZero = (i == 0).B && zero + val zeroed = Mux(doZero, 0.U, shifted) + res = Cat( + Mux(size === i.U || doZero, Fill(8 * maxSize - pos, signed && zeroed(pos - 1)), res(8 * maxSize - 1, pos)), + zeroed + ) + } + res + } + + def wordData = genData(2) + def data = genData(0) +} diff --git a/rocketv/src/HellaCacheArbiter.scala b/rocketv/src/HellaCacheArbiter.scala new file mode 100644 index 000000000..5862b887d --- /dev/null +++ b/rocketv/src/HellaCacheArbiter.scala @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +// TODO: inline and remove this Module +import chisel3._ +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util.{Cat, log2Ceil} + +case class HellaCacheArbiterParameter(useAsyncReset: Boolean, + xLen: Int, + fLen: Int, + paddrBits: Int, + cacheBlockBytes: Int, + dcacheNSets: Int, + usingVM: Boolean, + separateUncachedResp: Boolean + ) extends SerializableModuleParameter { + def lgCacheBlockBytes: Int = log2Ceil(cacheBlockBytes) + + def blockOffBits: Int = lgCacheBlockBytes + + def coreMaxAddrBits: Int = paddrBits max vaddrBitsExtended + + def idxBits: Int = log2Ceil(dcacheNSets) + + def dcacheArbPorts: Int = 2 + + def untagBits: Int = blockOffBits + idxBits + + def pgIdxBits: Int = 12 + + def coreDataBits: Int = xLen max fLen + + def pgLevels: Int = xLen match { + case 32 => 2 + case 64 => 3 + } + + def pgLevelBits: Int = 10 - log2Ceil(xLen / 32) + + def coreDataBytes: Int = coreDataBits / 8 + + def vpnBits: Int = vaddrBits - pgIdxBits + + def vpnBitsExtended: Int = vpnBits + (if (vaddrBits < xLen) (if (usingHypervisor) 1 else 0) + 1 else 0) + + def vaddrBitsExtended: Int = vpnBitsExtended + pgIdxBits + + def maxSVAddrBits: Int = pgIdxBits + pgLevels * pgLevelBits + + def maxHypervisorExtraAddrBits: Int = 2 + + def hypervisorExtraAddrBits: Int = if (usingHypervisor) maxHypervisorExtraAddrBits else 0 + + def maxHVAddrBits: Int = maxSVAddrBits + hypervisorExtraAddrBits + + def vaddrBits: Int = if (usingVM) { + val v = maxHVAddrBits + require(v == xLen || xLen > v && v > paddrBits) + v + } else { + // since virtual addresses sign-extend but physical addresses + // zero-extend, make room for a zero sign bit for physical addresses + (paddrBits + 1) min xLen + } + + // static for now + def dcacheReqTagBits: Int = 6 + + def usingHypervisor = false +} + +class HellaCacheArbiterInterface(parameter: HellaCacheArbiterParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + val requestor = Flipped(Vec(parameter.dcacheArbPorts, new HellaCacheIO( + parameter.coreMaxAddrBits, + parameter.usingVM, + parameter.untagBits, + parameter.pgIdxBits, + parameter.dcacheReqTagBits, + parameter.dcacheArbPorts, + parameter.coreDataBytes, + parameter.paddrBits, + parameter.vaddrBitsExtended, + parameter.separateUncachedResp + ))) + val mem = new HellaCacheIO( + parameter.coreMaxAddrBits, + parameter.usingVM, + parameter.untagBits, + parameter.pgIdxBits, + parameter.dcacheReqTagBits, + parameter.dcacheArbPorts, + parameter.coreDataBytes, + parameter.paddrBits, + parameter.vaddrBitsExtended, + parameter.separateUncachedResp + ) +} + +@instantiable +class HellaCacheArbiter(val parameter: HellaCacheArbiterParameter) + extends FixedIORawModule(new HellaCacheArbiterInterface(parameter)) + with SerializableModule[HellaCacheArbiterParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + val n = parameter.dcacheArbPorts + + if (n == 1) { + io.mem <> io.requestor.head + } else { + val s1_id = Reg(UInt()) + val s2_id = RegNext(s1_id) + + io.mem.keep_clock_enabled := io.requestor.map(_.keep_clock_enabled).reduce(_ || _) + + io.mem.req.valid := io.requestor.map(_.req.valid).reduce(_ || _) + io.requestor(0).req.ready := io.mem.req.ready + for (i <- 1 until n) + io.requestor(i).req.ready := io.requestor(i - 1).req.ready && !io.requestor(i - 1).req.valid + + for (i <- n - 1 to 0 by -1) { + val req = io.requestor(i).req + def connect_s0() = { + io.mem.req.bits := req.bits + io.mem.req.bits.tag := Cat(req.bits.tag, i.U(log2Ceil(n).W)) + s1_id := i.U + } + def connect_s1() = { + io.mem.s1_kill := io.requestor(i).s1_kill + io.mem.s1_data := io.requestor(i).s1_data + } + def connect_s2() = { + io.mem.s2_kill := io.requestor(i).s2_kill + } + + if (i == n - 1) { + connect_s0() + connect_s1() + connect_s2() + } else { + when(req.valid) { connect_s0() } + when(s1_id === i.U) { connect_s1() } + when(s2_id === i.U) { connect_s2() } + } + } + + io.mem.uncached_resp.foreach(_.ready := false.B) + + for (i <- 0 until n) { + val resp = io.requestor(i).resp + val tag_hit = io.mem.resp.bits.tag(log2Ceil(n) - 1, 0) === i.U + resp.valid := io.mem.resp.valid && tag_hit + io.requestor(i).s2_xcpt := io.mem.s2_xcpt + io.requestor(i).s2_gpa := io.mem.s2_gpa + io.requestor(i).s2_gpa_is_pte := io.mem.s2_gpa_is_pte + io.requestor(i).ordered := io.mem.ordered + io.requestor(i).perf := io.mem.perf + io.requestor(i).s2_nack := io.mem.s2_nack && s2_id === i.U + io.requestor(i).s2_nack_cause_raw := io.mem.s2_nack_cause_raw + io.requestor(i).s2_uncached := io.mem.s2_uncached + io.requestor(i).s2_paddr := io.mem.s2_paddr + io.requestor(i).clock_enabled := io.mem.clock_enabled + resp.bits := io.mem.resp.bits + resp.bits.tag := io.mem.resp.bits.tag >> log2Ceil(n) + + io.requestor(i).replay_next := io.mem.replay_next + + io.requestor(i).uncached_resp.foreach { uncached_resp => + val uncached_tag_hit = io.mem.uncached_resp.get.bits.tag(log2Ceil(n) - 1, 0) === i.U + uncached_resp.valid := io.mem.uncached_resp.get.valid && uncached_tag_hit + when(uncached_resp.ready && uncached_tag_hit) { + io.mem.uncached_resp.get.ready := true.B + } + uncached_resp.bits := io.mem.uncached_resp.get.bits + uncached_resp.bits.tag := io.mem.uncached_resp.get.bits.tag >> log2Ceil(n) + } + } + } +} diff --git a/rocketv/src/IBuf.scala b/rocketv/src/IBuf.scala new file mode 100644 index 000000000..781d2a94e --- /dev/null +++ b/rocketv/src/IBuf.scala @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.experimental.hierarchy.{Instantiate, instantiable} +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util._ + +object IBufParameter { + implicit def rwP: upickle.default.ReadWriter[IBufParameter] = upickle.default.macroRW[IBufParameter] +} + +case class IBufParameter( + useAsyncReset: Boolean, + xLen: Int, + usingCompressed: Boolean, + vaddrBits: Int, + entries: Int, + // TODO: have a better way to calculate it, like what we did in the CSR... + vaddrBitsExtended: Int, + bhtHistoryLength: Option[Int], + bhtCounterLength: Option[Int], + fetchWidth: Int + ) extends SerializableModuleParameter { + val retireWidth: Int = 1 + val coreInstBits: Int = if (usingCompressed) 16 else 32 + val coreInstBytes: Int = coreInstBits / 8 +} + +class IBufInterface(parameter: IBufParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + val imem = Flipped( + Decoupled( + new FrontendResp( + parameter.vaddrBits, + parameter.entries, + parameter.bhtHistoryLength, + parameter.bhtCounterLength, + parameter.vaddrBitsExtended, + parameter.coreInstBits, + parameter.fetchWidth + ) + ) + ) + val kill = Input(Bool()) + val pc = Output(UInt(parameter.vaddrBitsExtended.W)) + val btb_resp = Output( + new BTBResp( + parameter.vaddrBits, + parameter.entries, + parameter.fetchWidth, + parameter.bhtHistoryLength, + parameter.bhtCounterLength + ) + ) + // 4. Give out the instruction to Decode. + val inst = Vec(parameter.retireWidth, Decoupled(new Instruction)) +} + +@instantiable +class IBuf(val parameter: IBufParameter) + extends FixedIORawModule(new IBufInterface(parameter)) + with SerializableModule[IBufParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + val xLen = parameter.xLen + val fetchWidth = parameter.fetchWidth + val vaddrBits = parameter.vaddrBits + val entries = parameter.entries + val bhtHistoryLength = parameter.bhtHistoryLength + val bhtCounterLength = parameter.bhtCounterLength + val coreInstBytes = parameter.coreInstBytes + val vaddrBitsExtended = parameter.vaddrBitsExtended + val coreInstBits = parameter.coreInstBits + val retireWidth = parameter.retireWidth + val usingCompressed = parameter.usingCompressed + + val n = fetchWidth - 1 + val nBufValid = if (n == 0) 0.U else RegInit(init = 0.U(log2Ceil(fetchWidth).W)) + val buf = Reg(chiselTypeOf(io.imem.bits)) + val ibufBTBResp = Reg( + new BTBResp( + vaddrBits, + entries, + fetchWidth, + bhtHistoryLength, + bhtCounterLength + ) + ) + val pcWordMask = (coreInstBytes * fetchWidth - 1).U(vaddrBitsExtended.W) + val pcWordBits = io.imem.bits.pc(log2Ceil(fetchWidth*coreInstBytes)-1, log2Ceil(coreInstBytes)) + val nReady = WireDefault(0.U(log2Ceil(fetchWidth + 1).W)) + val nIC = Mux(io.imem.bits.btb.taken, io.imem.bits.btb.bridx +& 1.U, fetchWidth.U) - pcWordBits + val nICReady = nReady - nBufValid + val nValid = Mux(io.imem.valid, nIC, 0.U) + nBufValid + io.imem.ready := io.inst(0).ready && nReady >= nBufValid && (nICReady >= nIC || n.U >= nIC - nICReady) + + if (n > 0) { + when(io.inst(0).ready) { + nBufValid := Mux((nReady >= nBufValid) || nBufValid === 0.U, 0.U, nBufValid - nReady) + if (n > 1) when(nReady > 0.U && nReady < nBufValid) { + val shiftedBuf = + shiftInsnRight(buf.data(n * coreInstBits - 1, coreInstBits), (nReady - 1.U)(log2Ceil(n - 1) - 1, 0)) + buf.data := Cat( + buf.data(n * coreInstBits - 1, (n - 1) * coreInstBits), + shiftedBuf((n - 1) * coreInstBits - 1, 0) + ) + buf.pc := buf.pc & ~pcWordMask | (buf.pc + (nReady << log2Ceil(coreInstBytes))) & pcWordMask + } + when(io.imem.valid && nReady >= nBufValid && nICReady < nIC && n.U >= nIC - nICReady) { + val shamt = pcWordBits + nICReady + nBufValid := nIC - nICReady + buf := io.imem.bits + buf.data := shiftInsnRight(io.imem.bits.data, shamt)(n * coreInstBits - 1, 0) + buf.pc := io.imem.bits.pc & ~pcWordMask | (io.imem.bits.pc + (nICReady << log2Ceil(coreInstBytes))) & pcWordMask + ibufBTBResp := io.imem.bits.btb + } + } + when(io.kill) { + nBufValid := 0.U + } + } + + val icShiftAmt = (fetchWidth.U + nBufValid - pcWordBits)(log2Ceil(fetchWidth), 0) + val icData = + shiftInsnLeft(Cat(io.imem.bits.data, Fill(fetchWidth, io.imem.bits.data(coreInstBits - 1, 0))), icShiftAmt)( + 3 * fetchWidth * coreInstBits - 1, + 2 * fetchWidth * coreInstBits + ) + val icMask = + (~0.U((fetchWidth * coreInstBits).W) << (nBufValid << log2Ceil(coreInstBits)))(fetchWidth * coreInstBits - 1, 0) + val inst = icData & icMask | buf.data & ~icMask + + val valid = (UIntToOH(nValid) - 1.U)(fetchWidth - 1, 0) + val bufMask = UIntToOH(nBufValid) - 1.U + val xcpt = (0 until bufMask.getWidth).map(i => Mux(bufMask(i), buf.xcpt, io.imem.bits.xcpt)) + val buf_replay = Mux(buf.replay, bufMask, 0.U) + val ic_replay = buf_replay | Mux(io.imem.bits.replay, valid & ~bufMask, 0.U) + assert(!io.imem.valid || !io.imem.bits.btb.taken || io.imem.bits.btb.bridx >= pcWordBits) + + io.btb_resp := io.imem.bits.btb + io.pc := Mux(nBufValid > 0.U, buf.pc, io.imem.bits.pc) + expand(0, 0.U, inst) + + def expand(i: Int, j: UInt, curInst: UInt): Unit = if (i < retireWidth) { + // TODO: Dont instantiate it unless usingCompressed is true + val exp = Instantiate(new RVCExpander(RVCExpanderParameter(xLen, usingCompressed))) + exp.io.in := curInst + io.inst(i).bits.inst := exp.io.out + io.inst(i).bits.raw := curInst + + if (usingCompressed) { + val replay = ic_replay(j) || (!exp.io.rvc && ic_replay(j + 1.U)) + val full_insn = exp.io.rvc || valid(j + 1.U) || buf_replay(j) + io.inst(i).valid := valid(j) && full_insn + io.inst(i).bits.xcpt0 := VecInit(xcpt)(j) + io.inst(i).bits.xcpt1 := Mux(exp.io.rvc, 0.U, VecInit(xcpt)(j + 1.U).asUInt).asTypeOf(new FrontendExceptions) + io.inst(i).bits.replay := replay + io.inst(i).bits.rvc := exp.io.rvc + + when((bufMask(j) && exp.io.rvc) || bufMask(j + 1.U)) { io.btb_resp := ibufBTBResp } + + when(full_insn && ((i == 0).B || io.inst(i).ready)) { nReady := Mux(exp.io.rvc, j + 1.U, j + 2.U) } + + expand(i + 1, Mux(exp.io.rvc, j + 1.U, j + 2.U), Mux(exp.io.rvc, curInst >> 16, curInst >> 32)) + } else { + when((i == 0).B || io.inst(i).ready) { nReady := (i + 1).U } + io.inst(i).valid := valid(i) + io.inst(i).bits.xcpt0 := xcpt(i) + io.inst(i).bits.xcpt1 := 0.U.asTypeOf(new FrontendExceptions) + io.inst(i).bits.replay := ic_replay(i) + io.inst(i).bits.rvc := false.B + + expand(i + 1, null, curInst >> 32) + } + } + + def shiftInsnLeft(in: UInt, dist: UInt): UInt = { + val r = in.getWidth / coreInstBits + require(in.getWidth % coreInstBits == 0) + val data = Cat(Fill((1 << (log2Ceil(r) + 1)) - r, in >> (r - 1) * coreInstBits), in) + data << (dist << log2Ceil(coreInstBits)) + } + + def shiftInsnRight(in: UInt, dist: UInt): UInt = { + val r = in.getWidth / coreInstBits + require(in.getWidth % coreInstBits == 0) + val data = Cat(Fill((1 << (log2Ceil(r) + 1)) - r, in >> (r - 1) * coreInstBits), in) + data >> (dist << log2Ceil(coreInstBits)) + } +} diff --git a/rocketv/src/ICache.scala b/rocketv/src/ICache.scala new file mode 100644 index 000000000..ca508f513 --- /dev/null +++ b/rocketv/src/ICache.scala @@ -0,0 +1,930 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util.random.LFSR +import chisel3.util._ +import org.chipsalliance.amba.axi4.bundle.{AXI4BundleParameter, AXI4ROIrrevocable, AXI4RWIrrevocable} + +case class ICacheParameter(useAsyncReset: Boolean, + prefetch: Boolean, + nSets: Int, + nWays: Int, + blockBytes: Int, + usingVM: Boolean, + vaddrBits: Int, + paddrBits: Int + ) extends SerializableModuleParameter { + // static for now + val latency: Int = 2 + val itimAXIParameter: Option[AXI4BundleParameter] = None + val itimBaseAddr: Option[BigInt] = None + val tagECC: Option[String] = None + val dataECC: Option[String] = None + // calculated + // todo: param? + val fetchBytes: Int = 4 + val usingITIM: Boolean = itimAXIParameter.isDefined + val tagCode: Code = Code.fromString(tagECC) + val dataCode: Code = Code.fromString(dataECC) + // (cacheParams.tagCode.canDetect || cacheParams.dataCode.canDetect).option(Valid(UInt(paddrBits.W))) + val hasCorrectable: Boolean = tagCode.canDetect || dataCode.canDetect + // (cacheParams.itimAddr.nonEmpty && cacheParams.dataCode.canDetect).option(Valid(UInt(paddrBits.W))) + val hasUncorrekoctable: Boolean = itimBaseAddr.nonEmpty && dataCode.canDetect + val isDM: Boolean = nWays == 1 + // axi data with + val rowBits: Int = blockBytes * 8 + val refillCycles: Int = blockBytes * 8 / rowBits + val blockOffBits: Int = log2Up(blockBytes) + val idxBits: Int = log2Up(nSets) + val pgIdxBits: Int = 12 + val untagBits: Int = blockOffBits + idxBits + val pgUntagBits: Int = if (usingVM) untagBits min pgIdxBits else untagBits + val tagBits: Int = paddrBits - pgUntagBits + val instructionFetchParameter: AXI4BundleParameter = AXI4BundleParameter( + idWidth = 1, + dataWidth = rowBits, + addrWidth = paddrBits, + userReqWidth = 0, + userDataWidth = 0, + userRespWidth = 0, + hasAW = false, + hasW = false, + hasB = false, + hasAR = true, + hasR = true, + supportId = true, + supportRegion = false, + supportLen = true, + supportSize = true, + supportBurst = true, + supportLock = false, + supportCache = false, + supportQos = false, + supportStrb = false, + supportResp = false, + supportProt = false, + ) +} + +object ICacheParameter { + implicit def rwP: upickle.default.ReadWriter[ICacheParameter] = upickle.default.macroRW[ICacheParameter] +} + +class ICacheInterface(parameter: ICacheParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + /** first cycle requested from CPU. */ + val req = Flipped(Decoupled(new ICacheReq(parameter.vaddrBits))) + /** from TLB. */ + val s1_paddr = Input(UInt(parameter.paddrBits.W)) + /** from frontend, pipe from s0. */ + val s2_vaddr = Input(UInt(parameter.vaddrBits.W)) + /** - instruction jmp away(at S2). + * - if TLB not valid, kill it. + * - S2 replay + */ + val s1_kill = Input(Bool()) + /** @todo s2_kill only kill refill? + * - S2 speculative access(refill?) cannot access non-cacheable address? why? + * - S2 exception (PF, AF) + */ + val s2_kill = Input(Bool()) // delayed two cycles; prevents I$ miss emission + /** should L2 cache line on a miss? */ + val s2_cacheable = Input(Bool()) // should L2 cache line on a miss? + /** should I$ prefetch next line on a miss? */ + val s2_prefetch = Input(Bool()) // should I$ prefetch next line on a miss? + /** response to CPU. */ + val resp = Valid(new ICacheResp(parameter.fetchBytes)) + + /** flush L1 cache from CPU. + * TODO: IIRC, SFENCE.I + */ + val invalidate = Input(Bool()) + + /** I$ has error, notify to bus. */ + val errors = new ICacheErrors(parameter.hasCorrectable, parameter.hasUncorrekoctable, parameter.paddrBits) + + /** for performance counting. */ + val perf = Output(new ICachePerfEvents) + + /** enable clock. */ + val clock_enabled = Input(Bool()) + + /** I$ miss or ITIM access will still enable clock even [[ICache]] is asked to be gated. */ + val keep_clock_enabled = Output(Bool()) + + val instructionFetchAXI: AXI4ROIrrevocable = org.chipsalliance.amba.axi4.bundle.AXI4ROIrrevocable(parameter.instructionFetchParameter) + + val itimAXI: Option[AXI4RWIrrevocable] = parameter.itimAXIParameter.map(p => Flipped(org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(p))) +} + +@instantiable +class ICache(val parameter: ICacheParameter) + extends FixedIORawModule(new ICacheInterface(parameter)) + with SerializableModule[ICacheParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + // compatiblity mode + object Split { + def apply(x: UInt, n0: Int) = { + val w = x.getWidth + (x(w-1,n0), x(n0-1,0)) + } + def apply(x: UInt, n1: Int, n0: Int) = { + val w = x.getWidth + (x(w-1,n1), x(n1-1,n0), x(n0-1,0)) + } + def apply(x: UInt, n2: Int, n1: Int, n0: Int) = { + val w = x.getWidth + (x(w-1,n2), x(n2-1,n1), x(n1-1,n0), x(n0-1,0)) + } + } + + val usingVM = parameter.usingVM + val refillCycles = parameter.refillCycles + val pgIdxBits = parameter.pgIdxBits + val untagBits = parameter.untagBits + val nWays = parameter.nWays + val nSets = parameter.nSets + val blockOffBits = parameter.blockOffBits + val idxBits = parameter.idxBits + val pgUntagBits = parameter.pgUntagBits + val tagBits = parameter.tagBits + val isDM = parameter.isDM + object outer { + val size = parameter.nSets * parameter.nWays * parameter.blockBytes + object icacheParams { + val fetchBytes = parameter.fetchBytes + val latency = parameter.latency + } + } + object cacheParams { + val prefetch = parameter.prefetch + } + // end + + // TODO: move ecc + val tECC: Code = parameter.tagCode + val dECC: Code = parameter.dataCode + + require(isPow2(parameter.nSets) && isPow2(parameter.nWays)) + require( + !usingVM || parameter.usingITIM || pgIdxBits >= untagBits, + s"When VM and ITIM are enabled, I$$ set size must not exceed ${1 << (pgIdxBits - 10)} KiB; got ${(outer.size / nWays) >> 10} KiB" + ) + + /** register indicates wheather ITIM is enabled. */ + val scratchpadOn = RegInit(false.B) + + /** a cut point to SRAM, indicates which SRAM will be used as SRAM or Cache. */ + val scratchpadMax = Option.when(parameter.usingITIM)(Reg(UInt(log2Ceil(nSets * (nWays - 1)).W))) + + /** Check if a line is in the scratchpad. + * + * line is a minimal granularity accessing to SRAM, calculated by [[scratchpadLine]] + */ + def lineInScratchpad(line: UInt) = scratchpadMax.map(scratchpadOn && line <= _).getOrElse(false.B) + + /** scratchpad base address, if exist [[ICacheParams.itimAddr]], add [[ReplicatedRegion]] to base. + * @todo seem [[io_hartid]] is not connected? + * maybe when implementing itim, LookupByHartId should be changed to [[]]? + * should be a Int + */ + val scratchpadBase: Option[UInt] = None + + /** check an address in the scratchpad address range. */ + def addrMaybeInScratchpad(addr: UInt) = + scratchpadBase.map(base => addr >= base && addr < base + outer.size.U).getOrElse(false.B) + + /** check property this address(paddr) exists in scratchpad. + * @todo seems duplicated in `addrMaybeInScratchpad(addr)` between `lineInScratchpad(addr(untagBits+log2Ceil(nWays)-1, blockOffBits))`? + */ + def addrInScratchpad(addr: UInt) = + addrMaybeInScratchpad(addr) && lineInScratchpad(addr(untagBits + log2Ceil(nWays) - 1, blockOffBits)) + + /** return the way which will be used as scratchpad for accessing address + * {{{ + * │ tag │ set │offset│ + * └way┘ + * }}} + * @param addr address to be found. + */ + def scratchpadWay(addr: UInt) = addr(untagBits + log2Ceil(nWays) - 1, untagBits) + + /** check if the selected way is legal. + * note: the last way should be reserved to ICache. + */ + def scratchpadWayValid(way: UInt) = way < (nWays - 1).U + + /** return the cacheline which will be used as scratchpad for accessing address + * {{{ + * │ tag │ set │offset│ + * ├way┘ → indicate way location + * │ line │ + * }}} + * @param addr address to be found. + * applied to slave_addr + */ + def scratchpadLine(addr: UInt) = addr(untagBits + log2Ceil(nWays) - 1, blockOffBits) + + /** scratchpad access valid in stage N */ + val s0_slaveValid = io.itimAXI.map(axi => axi.w.fire || axi.ar.fire).getOrElse(false.B) + val s0_slaveWriteValid = io.itimAXI.map(axi => axi.w.fire).getOrElse(false.B) + + val s1_slaveValid = RegNext(s0_slaveValid, false.B) + val s1_slaveWriteValid = RegNext(s0_slaveWriteValid, false.B) + val s2_slaveValid = RegNext(s1_slaveValid, false.B) + val s2_slaveWriteValid = RegNext(s1_slaveWriteValid, false.B) + val s3_slaveValid = RegNext(false.B) + + /** valid signal for CPU accessing cache in stage 0. */ + val s0_valid = io.req.fire + + /** virtual address from CPU in stage 0. */ + val s0_vaddr = io.req.bits.addr + + /** valid signal for stage 1, drived by s0_valid. */ + val s1_valid = RegInit(false.B) + + /** virtual address from CPU in stage 1. */ + val s1_vaddr = RegEnable(s0_vaddr, s0_valid) + + /** tag hit vector to indicate hit which way. */ + val s1_tag_hit = Wire(Vec(nWays, Bool())) + + /** CPU I$ Hit in stage 1. + * + * @note + * for logic in `Mux(s1_slaveValid, true.B, addrMaybeInScratchpad(io.s1_paddr))`, + * there are two different types based on latency: + * + * if latency is 1: `s1_slaveValid === false.B` and `addrMaybeInScratchpad(io.s1_paddr) === false.B` , + * since in this case, ITIM must be empty. + * + * if latency is 2: if `s1_slaveValid` is true, this SRAM accessing is coming from [[tl_in]], so it will hit. + * if `s1_slaveValid` is false, but CPU is accessing memory range in scratchpad address, it will hit by default. + * Hardware won't guarantee this access will access to a data which have been written in ITIM. + * + * @todo seem CPU access are both processed by `s1_tag_hit` and `Mux(s1_slaveValid, true.B, addrMaybeInScratchpad(io.s1_paddr))`? + */ + val s1_hit = s1_tag_hit.reduce(_ || _) || Mux(s1_slaveValid, true.B, addrMaybeInScratchpad(io.s1_paddr)) + dontTouch(s1_hit) + val s2_valid = RegNext(s1_valid && !io.s1_kill, false.B) + val s2_hit = RegNext(s1_hit) + + /** status register to indicate a cache flush. */ + val invalidated = Reg(Bool()) + val refill_valid = RegInit(false.B) + + /** register to indicate [[tl_out]] is performing a hint. + * prefetch only happens after refilling + */ + val send_hint = RegInit(false.B) + + /** indicate [[tl_out]] is performing a refill. */ + // val refill_fire = tl_out.a.fire && !send_hint + val refill_fire = io.instructionFetchAXI.ar.fire && !send_hint + + /** register to indicate there is a outstanding hint. */ + val hint_outstanding = RegInit(false.B) + + /** [[io]] access L1 I$ miss. */ + val s2_miss = s2_valid && !s2_hit && !io.s2_kill + + /** forward signal to stage 1, permit stage 1 refill. */ + val s1_can_request_refill = !(s2_miss || refill_valid) + + /** real refill signal, stage 2 miss, and was permit to refill in stage 1. + * Since a miss will trigger burst. + * miss under miss won't trigger another burst. + */ + val s2_request_refill = s2_miss && RegNext(s1_can_request_refill) + val refill_paddr = RegEnable(io.s1_paddr, s1_valid && s1_can_request_refill) + val refill_vaddr = RegEnable(s1_vaddr, s1_valid && s1_can_request_refill) + val refill_tag = refill_paddr >> pgUntagBits + val refill_idx = index(refill_vaddr, refill_paddr) + + /** AccessAckData, is refilling I$, it will block request from CPU. */ + // val refill_one_beat = tl_out.d.fire && edge_out.hasData(tl_out.d.bits) + // TODO: check hasData? + val refill_one_beat = io.instructionFetchAXI.r.fire + + /** block request from CPU when refill or scratch pad access. */ + io.req.ready := !(refill_one_beat || s0_slaveValid || s3_slaveValid) + s1_valid := s0_valid + + // val (_, _, d_done, refill_cnt) = edge_out.count(tl_out.d) + val d_done: Bool = io.instructionFetchAXI.r.valid && io.instructionFetchAXI.r.bits.last + // todo: burst index always == 0? + val refill_cnt: UInt = 0.U + + /** at last beat of `tl_out.d.fire`, finish refill. */ + val refill_done = refill_one_beat && d_done + + /** scratchpad is writing data. block refill. */ + io.instructionFetchAXI.r.ready := !s3_slaveValid + // require(edge_out.manager.minLatency > 0) + + /** way to be replaced, implemented with a hardcoded random replacement algorithm */ + val repl_way = + if (isDM) 0.U + else { + // pick a way that is not used by the scratchpad + val v0 = LFSR(16, refill_fire)(log2Up(nWays) - 1, 0) + var v = v0 + for (i <- log2Ceil(nWays) - 1 to 0 by -1) { + val mask = nWays - (BigInt(1) << (i + 1)) + v = v | (lineInScratchpad(Cat(v0 | mask.U, refill_idx)) << i) + } + assert(!lineInScratchpad(Cat(v, refill_idx))) + v + } + + /** Tag SRAM, indexed with virtual memory, + * content with `refillError ## tag[19:0]` after ECC + */ + val tag_array: SRAMInterface[Vec[UInt]] = SRAM.masked( + size = parameter.nSets, + tpe = Vec(nWays, UInt(tECC.width(1 + tagBits).W)), + numReadPorts = 0, + numWritePorts = 0, + numReadwritePorts = 1 + ) + + // val tag_rdata = tag_array.read(s0_vaddr(untagBits - 1, blockOffBits), !refill_done && s0_valid) + // todo: read req + val tag_rdata: Vec[UInt] = tag_array.readwritePorts.head.readData + + /** register indicates the ongoing GetAckData transaction is corrupted. */ + val accruedRefillError = Reg(Bool()) + + /** wire indicates the ongoing GetAckData transaction is corrupted. */ + // todo: tl_out.d.bits.corrupt -> false.B + val refillError: Bool = false.B || (refill_cnt > 0.U && accruedRefillError) + val enc_tag = tECC.encode(Cat(refillError, refill_tag)) + tag_array.readwritePorts.foreach {ramPort => + ramPort.enable := s0_valid || refill_done + ramPort.isWrite := refill_done + ramPort.address := Mux(refill_done, refill_idx, s0_vaddr(untagBits - 1, blockOffBits)) + ramPort.writeData := VecInit(Seq.fill(nWays) { enc_tag }) + ramPort.mask.foreach(_ := VecInit(Seq.tabulate(nWays)(repl_way === _.U))) + } + // ccover(refillError, "D_CORRUPT", "I$ D-channel corrupt") + // notify CPU, I$ has corrupt. + // flase.B -> (tl_out.d.bits.denied || tl_out.d.bits.corrupt) + io.errors.bus.valid := io.instructionFetchAXI.r.fire && false.B + io.errors.bus.bits := (refill_paddr >> blockOffBits) << blockOffBits + + /** true indicate this cacheline is valid, + * indexed by (wayIndex ## setIndex) + * after refill_done and not FENCE.I, (repl_way ## refill_idx) set to true. + */ + val vb_array = RegInit(0.U((nSets * nWays).W)) + when(refill_one_beat) { + accruedRefillError := refillError + // clear bit when refill starts so hit-under-miss doesn't fetch bad data + vb_array := vb_array.bitSet(Cat(repl_way, refill_idx), refill_done && !invalidated) + } + + /** flush cache when invalidate is true. */ + val invalidate = WireDefault(io.invalidate) + when(invalidate) { + vb_array := 0.U + invalidated := true.B + } + + /** wire indicates that tag is correctable or uncorrectable. + * will trigger CPU to replay and I$ invalidating, if correctable. + */ + val s1_tag_disparity = Wire(Vec(nWays, Bool())) + + /** wire indicates that bus has an uncorrectable error. + * respond to CPU [[io.resp.bits.ae]], cause [[Causes.fetch_access]]. + */ + val s1_tl_error = Wire(Vec(nWays, Bool())) + + /** how many bits will be fetched by CPU for each fetch. */ + val wordBits = outer.icacheParams.fetchBytes * 8 + + /** a set of raw data read from [[data_arrays]]. */ + val s1_dout = Wire(Vec(nWays, UInt(dECC.width(wordBits).W))) + s1_dout := DontCare + + /** address accessed by [[tl_in]] for ITIM. */ + // val s0_slaveAddr = tl_in.map(_.a.bits.address).getOrElse(0.U) + val s0_slaveAddr = io.itimAXI.map(_.aw.bits.addr).getOrElse(0.U) + + /** address used at stage 1 and 3. + * {{{ + * In stage 1, it caches TileLink data, store in stage 2 if ECC passed. + * In stage 3, it caches corrected data from stage 2, and store in stage 4.}}} + */ + val s1s3_slaveAddr = Reg(UInt(log2Ceil(outer.size).W)) + + /** data used at stage 1 and 3. + * {{{ + * In stage 1, it caches TileLink data, store in stage 2. + * In stage 3, it caches corrected data from data ram, and return to d channel.}}} + */ + val s1s3_slaveData = Reg(UInt(wordBits.W)) + + for (i <- 0 until nWays) { + val s1_idx = index(s1_vaddr, io.s1_paddr) + val s1_tag = io.s1_paddr >> pgUntagBits + + /** this way is used by scratchpad. + * [[tag_array]] corrupted. + */ + val scratchpadHit = scratchpadWayValid(i.U) && + Mux( + s1_slaveValid, + // scratchpad accessing form [[tl_in]]. + // @todo I think XBar will guarantee there won't be an illegal access on the bus? + // so why did have this check `lineInScratchpad(scratchpadLine(s1s3_slaveAddr))`? + // I think it will always be true. + lineInScratchpad(scratchpadLine(s1s3_slaveAddr)) && scratchpadWay(s1s3_slaveAddr) === i.U, + // scratchpad accessing from [[io]]. + // @todo Accessing ITIM correspond address will be able to read cacheline? + // is this desired behavior? + addrInScratchpad(io.s1_paddr) && scratchpadWay(io.s1_paddr) === i.U + ) + val s1_vb = vb_array(Cat(i.U, s1_idx)) && !s1_slaveValid + val enc_tag = tECC.decode(tag_rdata(i)) + + /** [[tl_error]] ECC error bit. + * [[tag]] of [[tag_array]] access. + */ + + val (tl_error, tag) = Split(enc_tag.uncorrected, tagBits) + val tagMatch = s1_vb && tag === s1_tag + + /** tag error happens. */ + s1_tag_disparity(i) := s1_vb && enc_tag.error + + /** if tag matched but ecc checking failed, this access will trigger [[Causes.fetch_access]] exception. */ + s1_tl_error(i) := tagMatch && tl_error.asBool + s1_tag_hit(i) := tagMatch || scratchpadHit + } + assert( + !(s1_valid || s1_slaveValid) || PopCount(s1_tag_hit.zip(s1_tag_disparity).map { case (h, d) => h && !d }) <= 1.U + ) + + require(io.instructionFetchAXI.r.bits.data.getWidth % wordBits == 0) + + /** Data SRAM + * + * banked with TileLink beat bytes / CPU fetch bytes, + * indexed with [[index]] and multi-beats cycle, + * content with `eccError ## wordBits` after ECC. + * {{{ + * │ │xx│xxxxxx│xxx│x│xx│ + * ↑word + * ↑bank + * ↑way + * └─set──┴─offset─┘ + * └────row───┘ + * }}} + * Note: + * Data SRAM is indexed with virtual memory(vaddr[11:2]), + * - vaddr[11:3]->row, + * - vaddr[2]->bank=i + * - Cache line size = refillCycels(8) * bank(2) * datasize(4 bytes) = 64 bytes + * - data width = 32 + * + * read: + * read happens in stage 0 + * + * write: + * It takes 8 beats to refill 16 instruction in each refilling cycle. + * Data_array receives data[63:0](2 instructions) at once,they will be allocated in deferent bank according to vaddr[2] + */ + val data_arrays: Seq[SRAMInterface[Vec[UInt]]] = Seq.tabulate(io.instructionFetchAXI.r.bits.data.getWidth / wordBits) { i => + SRAM.masked( + size = nSets * refillCycles, + tpe = Vec(nWays, UInt(dECC.width(wordBits).W)), + numReadPorts = 0, + numWritePorts = 0, + numReadwritePorts = 1 + ) + } + + for ((data_array, i) <- data_arrays.zipWithIndex) { + + /** bank match (vaddr[2]) */ + def wordMatch(addr: UInt): Bool = addr(log2Ceil(io.instructionFetchAXI.r.bits.data.getWidth / 8) - 1, log2Ceil(wordBits / 8)) === i.U + // TODO: if we have last? do we need refillCycles? + def row(addr: UInt) = addr(untagBits - 1, blockOffBits - log2Ceil(refillCycles)) + + /** read_enable signal */ + val s0_ren = (s0_valid && wordMatch(s0_vaddr)) || (s0_slaveValid && wordMatch(s0_slaveAddr)) + + /** write_enable signal + * refill from [[tl_out]] or ITIM write. + */ + val wen = (refill_one_beat && !invalidated) || (s3_slaveValid && wordMatch(s1s3_slaveAddr)) + + /** index to access [[data_array]]. */ + val mem_idx = + // I$ refill. refill_idx[2:0] is the beats + Mux( + refill_one_beat, + (refill_idx << log2Ceil(refillCycles)) | refill_cnt, + // ITIM write. + Mux( + s3_slaveValid, + row(s1s3_slaveAddr), + // ITIM read. + Mux( + s0_slaveValid, + row(s0_slaveAddr), + // CPU read. + row(s0_vaddr) + ) + ) + ) + val data: UInt = Mux(s3_slaveValid, s1s3_slaveData, io.instructionFetchAXI.r.bits.data(wordBits * (i + 1) - 1, wordBits * i)) + //the way to be replaced/written + val way = Mux(s3_slaveValid, scratchpadWay(s1s3_slaveAddr), repl_way) + data_array.readwritePorts.foreach { dataPort => + dataPort.enable := wen || s0_ren + dataPort.isWrite := wen + dataPort.address := mem_idx + dataPort.writeData := VecInit(Seq.fill(nWays) { dECC.encode(data) }) + dataPort.mask.foreach(_ := VecInit((0 until nWays).map(way === _.U))) + } + + // write access + /** data read from [[data_array]]. */ + val dout: Vec[UInt] = data_array.readwritePorts.head.readData + // Mux to select a way to [[s1_dout]] + when(wordMatch(Mux(s1_slaveValid, s1s3_slaveAddr, io.s1_paddr))) { + s1_dout := dout + } + } + + /** When writing full words to ITIM, ECC errors are correctable. + * When writing a full scratchpad word, suppress the read so Xs don't leak out + */ + val s1s2_full_word_write = WireDefault(false.B) + val s1_dont_read = s1_slaveValid && s1s2_full_word_write + + /** clock gate signal for [[s2_tag_hit]], [[s2_dout]], [[s2_tag_disparity]], [[s2_tl_error]], [[s2_scratchpad_hit]]. */ + val s1_clk_en = s1_valid || s1_slaveValid + val s2_tag_hit = RegEnable(Mux(s1_dont_read, 0.U.asTypeOf(s1_tag_hit), s1_tag_hit), s1_clk_en) + + /** way index to access [[data_arrays]]. */ + val s2_hit_way = OHToUInt(s2_tag_hit) + + /** ITIM index to access [[data_arrays]]. + * replace tag with way, word set to 0. + */ + val s2_scratchpad_word_addr = Cat( + s2_hit_way, + Mux(s2_slaveValid, s1s3_slaveAddr, io.s2_vaddr)(untagBits - 1, log2Ceil(wordBits / 8)), + 0.U(log2Ceil(wordBits / 8).W) + ) + val s2_dout = RegEnable(s1_dout, s1_clk_en) + val s2_way_mux = Mux1H(s2_tag_hit, s2_dout) + val s2_tag_disparity = RegEnable(s1_tag_disparity, s1_clk_en).asUInt.orR + val s2_tl_error = RegEnable(s1_tl_error.asUInt.orR, s1_clk_en) + + /** ECC decode result for [[data_arrays]]. */ + val s2_data_decoded = dECC.decode(s2_way_mux) + + /** ECC error happened, correctable or uncorrectable, ask CPU to replay. */ + val s2_disparity = s2_tag_disparity || s2_data_decoded.error + + /** access hit in ITIM, if [[s1_slaveValid]], this access is from [[tl_in]], else from CPU [[io]]. */ + val s1_scratchpad_hit = + Mux(s1_slaveValid, lineInScratchpad(scratchpadLine(s1s3_slaveAddr)), addrInScratchpad(io.s1_paddr)) + + /** stage 2 of [[s1_scratchpad_hit]]. */ + val s2_scratchpad_hit = RegEnable(s1_scratchpad_hit, s1_clk_en) + + /** ITIM uncorrectable read. + * `s2_scratchpad_hit`: processing a scratchpad read(from [[tl_in]] or [[io]]) + * `s2_data_decoded.uncorrectable`: read a uncorrectable data. + * `s2_valid`: [[io]] non-canceled read. + * `(s2_slaveValid && !s2_full_word_write)`: [[tl_in]] read or write a word with wormhole. + * if write a full word, even stage 2 read uncorrectable. + * stage 3 full word write will recovery this. + */ + val s2_report_uncorrectable_error = + s2_scratchpad_hit && s2_data_decoded.uncorrectable && (s2_valid || (s2_slaveValid && !s1s2_full_word_write)) + + /** ECC uncorrectable address, send to Bus Error Unit. */ + val s2_error_addr = + scratchpadBase.map(base => Mux(s2_scratchpad_hit, base + s2_scratchpad_word_addr, 0.U)).getOrElse(0.U) + + // output signals + outer.icacheParams.latency match { + // if I$ latency is 1, no ITIM, no ECC. + case 1 => + require(tECC.isInstanceOf[IdentityCode]) + require(dECC.isInstanceOf[IdentityCode]) + require(parameter.itimAXIParameter.isEmpty) + // reply data to CPU at stage 2. no replay. + io.resp.bits.data := Mux1H(s1_tag_hit, s1_dout) + io.resp.bits.ae := s1_tl_error.asUInt.orR + io.resp.valid := s1_valid && s1_hit + io.resp.bits.replay := false.B + + // if I$ latency is 2, can have ITIM and ECC. + case 2 => + // when some sort of memory bit error have occurred + // @todo why so aggressive to invalidate all when ecc corrupted. + when(s2_valid && s2_disparity) { invalidate := true.B } + + // reply data to CPU at stage 2. + io.resp.bits.data := s2_data_decoded.uncorrected + io.resp.bits.ae := s2_tl_error + io.resp.bits.replay := s2_disparity + io.resp.valid := s2_valid && s2_hit + + // report correctable error to BEU at stage 2. + io.errors.correctable.foreach { c => + c.valid := (s2_valid || s2_slaveValid) && s2_disparity && !s2_report_uncorrectable_error + c.bits := s2_error_addr + } + // report uncorrectable error to BEU at stage 2. + io.errors.uncorrectable.foreach { u => + u.valid := s2_report_uncorrectable_error + u.bits := s2_error_addr + } + + // ITIM access + io.itimAXI.foreach { axi => + /** valid signal for D channel. */ + val respValid = RegInit(false.B) + // ITIM access is unpipelined + axi.ar.ready := !(io.instructionFetchAXI.r.valid || s1_slaveValid || s2_slaveValid || s3_slaveValid || respValid || !io.clock_enabled) + /** register used to latch TileLink request for one cycle. */ + val s1_a = RegEnable(axi.ar.bits, s0_slaveValid) + val s1_aw = RegEnable(axi.aw.bits, axi.aw.fire) + val s1_w = RegEnable(axi.w.bits, axi.w.fire) + // Write Data(Put / PutPartial all mask is 1) + s1s2_full_word_write := axi.w.bits.strb.andR + // (de)allocate ITIM + when(axi.w.fire) { + // address + s1s3_slaveAddr := s1_aw.addr + // store Put/PutP data + s1s3_slaveData := axi.w.bits.data + // S0 + // access data in 0 -> way - 2 allocate and enable, access data in way - 1(last way), deallocate. + val enable = scratchpadWayValid(scratchpadWay(s1_aw.addr)) + //The address isn't in range, + when(!lineInScratchpad(scratchpadLine(s1_aw.addr))) { + scratchpadMax.get := scratchpadLine(s1_aw.addr) + invalidate := true.B + } + scratchpadOn := enable + // val itim_allocated = !scratchpadOn && enable + // val itim_deallocated = scratchpadOn && !enable + // val itim_increase = scratchpadOn && enable && scratchpadLine(a.address) > scratchpadMax.get + // val refilling = refill_valid && refill_cnt > 0.U + // ccover(itim_allocated, "ITIM_ALLOCATE", "ITIM allocated") + // ccover(itim_allocated && refilling, "ITIM_ALLOCATE_WHILE_REFILL", "ITIM allocated while I$ refill") + // ccover(itim_deallocated, "ITIM_DEALLOCATE", "ITIM deallocated") + // ccover(itim_deallocated && refilling, "ITIM_DEALLOCATE_WHILE_REFILL", "ITIM deallocated while I$ refill") + // ccover(itim_increase, "ITIM_SIZE_INCREASE", "ITIM size increased") + // ccover(itim_increase && refilling, "ITIM_SIZE_INCREASE_WHILE_REFILL", "ITIM size increased while I$ refill") + } + + assert(!s2_valid || RegNext(RegNext(s0_vaddr)) === io.s2_vaddr) + when( + !(axi.w.valid || s1_slaveValid || s2_slaveValid || respValid) + && s2_valid && s2_data_decoded.error && !s2_tag_disparity + ) { + // handle correctable errors on CPU accesses to the scratchpad. + // if there is an in-flight slave-port access to the scratchpad, + // report the miss but don't correct the error (as there is + // a structural hazard on s1s3_slaveData/s1s3_slaveAddress). + s3_slaveValid := true.B + s1s3_slaveData := s2_data_decoded.corrected + s1s3_slaveAddr := s2_scratchpad_word_addr | s1s3_slaveAddr(log2Ceil(wordBits / 8) - 1, 0) + } + + // back pressure is allowed on the [[tl]] + // pull up [[respValid]] when [[s2_slaveValid]] until [[tl.d.fire]] + respValid := s2_slaveValid || (respValid && !axi.r.ready) + // if [[s2_full_word_write]] will overwrite data, and [[s2_data_decoded.uncorrectable]] can be ignored. + val respError = + RegEnable(s2_scratchpad_hit && s2_data_decoded.uncorrectable && !s1s2_full_word_write, s2_slaveValid) + when(s2_slaveValid) { + // need stage 3 if Put or correct decoding. + // @todo if uncorrectable [[s2_data_decoded]]? + when(s2_slaveWriteValid || s2_data_decoded.error) { s3_slaveValid := true.B } + + /** data not masked by the TileLink PutData/PutPartialData. + * means data is stored at [[s1s3_slaveData]] which was read at stage 1. + */ + def byteEn(i: Int) = !axi.w.bits.strb(i) + // write [[s1s3_slaveData]] based on index of wordBits. + // @todo seems a problem here? + // granularity of CPU fetch is `wordBits/8`, + // granularity of TileLink access is `TLBundleParameters.dataBits/8` + // these two granularity can be different. + // store data read from RAM + s1s3_slaveData := VecInit((0 until wordBits / 8) + .map(i => Mux(byteEn(i), s2_data_decoded.corrected, s1s3_slaveData)(8 * (i + 1) - 1, 8 * i)) + ).asUInt + } + + axi.r.valid := respValid + // tl.d.bits := Mux( + // edge_in.get.hasData(s1_a), + // // PutData/PutPartialData -> AccessAck + // edge_in.get.AccessAck(s1_a), + // // Get -> AccessAckData + // edge_in.get.AccessAck(s1_a, 0.U, denied = false.B, corrupt = respError) + // ) + axi.r.bits := DontCare + axi.r.bits.data := s1s3_slaveData + axi.r.bits.last := true.B + // Tie off unused channels + axi.b.valid := false.B + + // ccover(s0_valid && s1_slaveValid, "CONCURRENT_ITIM_ACCESS_1", "ITIM accessed, then I$ accessed next cycle") + // ccover( + // s0_valid && s2_slaveValid, + // "CONCURRENT_ITIM_ACCESS_2", + // "ITIM accessed, then I$ accessed two cycles later" + // ) + // ccover(tl.d.valid && !tl.d.ready, "ITIM_D_STALL", "ITIM response blocked by D-channel") + // ccover(tl_out.d.valid && !tl_out.d.ready, "ITIM_BLOCK_D", "D-channel blocked by ITIM access") + } + } + + io.instructionFetchAXI.ar.valid := s2_request_refill + io.instructionFetchAXI.ar.bits := DontCare + io.instructionFetchAXI.ar.bits.id := 0.U + io.instructionFetchAXI.ar.bits.addr := (refill_paddr >> blockOffBits) << blockOffBits + io.instructionFetchAXI.ar.bits.size := log2Up(parameter.blockBytes).U + io.instructionFetchAXI.ar.bits.len := 0.U + io.instructionFetchAXI.ar.bits.burst := 1.U + + // prefetch when next-line access does not cross a page + if (cacheParams.prefetch) { + + /** [[crosses_page]] indicate if there is a crosses page access + * [[next_block]] : the address to be prefetched. + */ + val (crosses_page, next_block) = Split(refill_paddr(pgIdxBits - 1, blockOffBits) +& 1.U, pgIdxBits - blockOffBits) + // AXI Hint via AxCache ? + + // when(tl_out.a.fire) { + // send_hint := !hint_outstanding && io.s2_prefetch && !crosses_page + // when(send_hint) { + // send_hint := false.B + // hint_outstanding := true.B + // } + // } + // + // // @todo why refill_done will kill hint at this cycle? + // when(refill_done) { + // send_hint := false.B + // } + + + // D channel reply with HintAck. + // when(tl_out.d.fire && !refill_one_beat) { + // hint_outstanding := false.B + // } + + // when(send_hint) { + // tl_out.a.valid := true.B + // tl_out.a.bits := edge_out + // .Hint( + // fromSource = 1.U, + // toAddress = Cat(refill_paddr >> pgIdxBits, next_block) << blockOffBits, + // lgSize = lgCacheBlockBytes.U, + // param = TLHints.PREFETCH_READ + // ) + // ._2 + // } + + // ccover(send_hint && !tl_out.a.ready, "PREFETCH_A_STALL", "I$ prefetch blocked by A-channel") + // ccover( + // refill_valid && (tl_out.d.fire && !refill_one_beat), + // "PREFETCH_D_BEFORE_MISS_D", + // "I$ prefetch resolves before miss" + // ) + // ccover( + // !refill_valid && (tl_out.d.fire && !refill_one_beat), + // "PREFETCH_D_AFTER_MISS_D", + // "I$ prefetch resolves after miss" + // ) + // ccover(tl_out.a.fire && hint_outstanding, "PREFETCH_D_AFTER_MISS_A", "I$ prefetch resolves after second miss") + } + // Drive APROT information + // bufferable ## modifiable ## readalloc ## writealloc ## privileged ## secure ## fetch + io.instructionFetchAXI.ar.bits.user := true.B ## true.B ## io.s2_cacheable ## io.s2_cacheable ## + true.B ## true.B ## true.B + // tl_out.a.bits.user.lift(AMBAProt).foreach { x => + // // Rocket caches all fetch requests, and it's difficult to differentiate privileged/unprivileged on + // // cached data, so mark as privileged + // x.fetch := true.B + // x.secure := true.B + // x.privileged := true.B + // x.bufferable := true.B + // x.modifiable := true.B + // x.readalloc := io.s2_cacheable + // x.writealloc := io.s2_cacheable + // } + // tl_out.b.ready := true.B + // tl_out.c.valid := false.B + // tl_out.e.valid := false.B + assert(!(io.instructionFetchAXI.ar.valid && addrMaybeInScratchpad(io.instructionFetchAXI.ar.bits.addr))) + + // if there is an outstanding refill, cannot flush I$. + when(!refill_valid) { invalidated := false.B } + when(refill_fire) { refill_valid := true.B } + when(refill_done) { refill_valid := false.B } + + io.perf.acquire := refill_fire + // don't gate I$ clock since there are outstanding transcations. + io.keep_clock_enabled := + io.itimAXI + .map(axi => + axi.ar.valid || axi.aw.valid || axi.w.valid // tl.a.valid + || axi.r.valid //tl.d.valid + || s1_slaveValid || s2_slaveValid || s3_slaveValid) + .getOrElse(false.B) || // ITIM + s1_valid || s2_valid || refill_valid || send_hint || hint_outstanding // I$ + + /** index to access [[data_arrays]] and [[tag_array]]. + * @note + * if [[untagBits]] > [[pgIdxBits]] in + * {{{ + * ┌──idxBits──┐ + * ↓ ↓ + * │ tag │ set │offset│ + * │ pageTag │ pageIndex│ + * ↑ ↑ ↑ │ + * untagBits│ blockOffBits│ + * pgIdxBits │ + * └msb┴──lsb──┘ + * vaddr paddr + * }}} + * + * else use paddr directly. + * Note: if [[untagBits]] > [[pgIdxBits]], there will be a alias issue which isn't addressend by the icache yet. + */ + def index(vaddr: UInt, paddr: UInt) = { + + /** [[paddr]] as LSB to be used for VIPT. */ + val lsbs = paddr(pgUntagBits - 1, blockOffBits) + + /** if [[untagBits]] > [[pgIdxBits]], append [[vaddr]] to higher bits of index as [[msbs]]. */ + val msbs = Option.when(idxBits + blockOffBits > pgUntagBits)(vaddr(idxBits + blockOffBits - 1, pgUntagBits)) + msbs.map(_ ## lsbs).getOrElse(lsbs) + } + + // ccover(!send_hint && (tl_out.a.valid && !tl_out.a.ready), "MISS_A_STALL", "I$ miss blocked by A-channel") + // ccover(invalidate && refill_valid, "FLUSH_DURING_MISS", "I$ flushed during miss") + + // def ccover(cond: Bool, label: String, desc: String)(implicit sourceInfo: SourceInfo) = + // property.cover(cond, s"ICACHE_$label", "MemorySystem;;" + desc) + // + // val mem_active_valid = Seq(property.CoverBoolean(s2_valid, Seq("mem_active"))) + // val data_error = Seq( + // property.CoverBoolean(!s2_data_decoded.correctable && !s2_data_decoded.uncorrectable, Seq("no_data_error")), + // property.CoverBoolean(s2_data_decoded.correctable, Seq("data_correctable_error")), + // property.CoverBoolean(s2_data_decoded.uncorrectable, Seq("data_uncorrectable_error")) + // ) + // val request_source = Seq( + // property.CoverBoolean(!s2_slaveValid, Seq("from_CPU")), + // property.CoverBoolean(s2_slaveValid, Seq("from_TL")) + // ) + // val tag_error = Seq( + // property.CoverBoolean(!s2_tag_disparity, Seq("no_tag_error")), + // property.CoverBoolean(s2_tag_disparity, Seq("tag_error")) + // ) + // val mem_mode = Seq( + // property.CoverBoolean(s2_scratchpad_hit, Seq("ITIM_mode")), + // property.CoverBoolean(!s2_scratchpad_hit, Seq("cache_mode")) + // ) + + // val error_cross_covers = new property.CrossProperty( + // Seq(mem_active_valid, data_error, tag_error, request_source, mem_mode), + // Seq( + // // tag error cannot occur in ITIM mode + // Seq("tag_error", "ITIM_mode"), + // // Can only respond to TL in ITIM mode + // Seq("from_TL", "cache_mode") + // ), + // "MemorySystem;;Memory Bit Flip Cross Covers" + // ) + // + // property.cover(error_cross_covers) +} diff --git a/rocketv/src/ImmGen.scala b/rocketv/src/ImmGen.scala new file mode 100644 index 000000000..d78ab20ac --- /dev/null +++ b/rocketv/src/ImmGen.scala @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.util.Cat + +object ImmGen { + def IMM_S = 0.U(3.W) + def IMM_SB = 1.U(3.W) + def IMM_U = 2.U(3.W) + def IMM_UJ = 3.U(3.W) + def IMM_I = 4.U(3.W) + def IMM_Z = 5.U(3.W) + + def apply(sel: UInt, inst: UInt) = { + val sign = Mux(sel === IMM_Z, 0.S, inst(31).asSInt) + val b30_20 = Mux(sel === IMM_U, inst(30, 20).asSInt, sign) + val b19_12 = Mux(sel =/= IMM_U && sel =/= IMM_UJ, sign, inst(19, 12).asSInt) + val b11 = Mux( + sel === IMM_U || sel === IMM_Z, + 0.S, + Mux(sel === IMM_UJ, inst(20).asSInt, Mux(sel === IMM_SB, inst(7).asSInt, sign)) + ) + val b10_5 = Mux(sel === IMM_U || sel === IMM_Z, 0.U, inst(30, 25)) + val b4_1 = Mux( + sel === IMM_U, + 0.U, + Mux(sel === IMM_S || sel === IMM_SB, inst(11, 8), Mux(sel === IMM_Z, inst(19, 16), inst(24, 21))) + ) + val b0 = Mux(sel === IMM_S, inst(7), Mux(sel === IMM_I, inst(20), Mux(sel === IMM_Z, inst(15), 0.U))) + + Cat(sign, b30_20, b19_12, b11, b10_5, b4_1, b0).asSInt + } +} diff --git a/rocketv/src/MulDiv.scala b/rocketv/src/MulDiv.scala new file mode 100644 index 000000000..21eb121bf --- /dev/null +++ b/rocketv/src/MulDiv.scala @@ -0,0 +1,220 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util.{BitPat, Cat, Decoupled, Enum, Fill, Log2, log2Ceil, log2Floor} + +object MulDivParameter { + implicit def rwP: upickle.default.ReadWriter[MulDivParameter] = upickle.default.macroRW[MulDivParameter] +} + +case class MulDivParameter(useAsyncReset: Boolean, + latency: Int, + xLen: Int, + divUnroll: Int, + divEarlyOut: Boolean, + divEarlyOutGranularity: Int, + mulUnroll: Int, + mulEarlyOut: Boolean) + extends SerializableModuleParameter { + // optional to 16 when rve? + val nXpr: Int = 32 + val uopWidth: Int = 4 + + def FN_MUL = 1.U(4.W) + def FN_MULH = 2.U(4.W) + def FN_MULHU = 3.U(4.W) + def FN_MULHSU = 4.U(4.W) + def FN_DIV = 4.U(4.W) + def FN_REM = 5.U(4.W) + def FN_DIVU = 6.U(4.W) + def FN_REMU = 7.U(4.W) + def DW_32 = false.B +} +class MulDivInterface(parameter: MulDivParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + val req = Flipped(Decoupled(new MultiplierReq(parameter.xLen, log2Ceil(parameter.nXpr), parameter.uopWidth))) + val kill = Input(Bool()) + val resp = Decoupled(new MultiplierResp(parameter.xLen, log2Ceil(parameter.nXpr))) +} + +@instantiable +class MulDiv(val parameter: MulDivParameter) + extends FixedIORawModule(new MulDivInterface(parameter)) + with SerializableModule[MulDivParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + object cfg { + val divUnroll = parameter.divUnroll + val divEarlyOut = parameter.divEarlyOut + val divEarlyOutGranularity = parameter.divEarlyOutGranularity + val mulUnroll = parameter.mulUnroll + val mulEarlyOut = parameter.mulEarlyOut + } + + def N = BitPat.N() + def Y = BitPat.N() + def X = BitPat.dontCare(1) + + val w = io.req.bits.in1.getWidth + val mulw = if (cfg.mulUnroll == 0) w else (w + cfg.mulUnroll - 1) / cfg.mulUnroll * cfg.mulUnroll + val fastMulW = if (cfg.mulUnroll == 0) false else w / 2 > cfg.mulUnroll && w % (2 * cfg.mulUnroll) == 0 + + val s_ready :: s_neg_inputs :: s_mul :: s_div :: s_dummy :: s_neg_output :: s_done_mul :: s_done_div :: Nil = Enum(8) + val state = RegInit(s_ready) + + val req = Reg(chiselTypeOf(io.req.bits)) + val count = Reg( + UInt( + log2Ceil( + (Option.when(cfg.divUnroll != 0)(w / cfg.divUnroll + 1).toSeq ++ + Option.when(cfg.mulUnroll != 0)(mulw / cfg.mulUnroll)).reduce(_ max _) + ).W + ) + ) + val neg_out = Reg(Bool()) + val isHi = Reg(Bool()) + val resHi = Reg(Bool()) + val divisor = Reg(UInt((w + 1).W)) // div only needs w bits + val remainder = Reg(UInt((2 * mulw + 2).W)) // div only needs 2*w+1 bits + + val mulDecode = List( + parameter.FN_MUL -> List(Y, N, X, X), + parameter.FN_MULH -> List(Y, Y, Y, Y), + parameter.FN_MULHU -> List(Y, Y, N, N), + parameter.FN_MULHSU -> List(Y, Y, Y, N) + ) + val divDecode = List( + parameter.FN_DIV -> List(N, N, Y, Y), + parameter.FN_REM -> List(N, Y, Y, Y), + parameter.FN_DIVU -> List(N, N, N, N), + parameter.FN_REMU -> List(N, Y, N, N) + ) + // TODO: move these decoding to Decoder. + val cmdMul :: cmdHi :: lhsSigned :: rhsSigned :: Nil = + DecodeLogic( + io.req.bits.fn, + List(X, X, X, X), + (if (cfg.divUnroll != 0) divDecode else Nil) ++ (if (cfg.mulUnroll != 0) mulDecode else Nil) + ).map(_.asBool) + + require(w == 32 || w == 64) + def halfWidth(req: MultiplierReq) = (w > 32).B && req.dw === parameter.DW_32 + + def sext(x: Bits, halfW: Bool, signed: Bool) = { + val sign = signed && Mux(halfW, x(w / 2 - 1), x(w - 1)) + val hi = Mux(halfW, Fill(w / 2, sign), x(w - 1, w / 2)) + (Cat(hi, x(w / 2 - 1, 0)), sign) + } + val (lhs_in, lhs_sign) = sext(io.req.bits.in1, halfWidth(io.req.bits), lhsSigned) + val (rhs_in, rhs_sign) = sext(io.req.bits.in2, halfWidth(io.req.bits), rhsSigned) + + val subtractor = remainder(2 * w, w) - divisor + val result = Mux(resHi, remainder(2 * w, w + 1), remainder(w - 1, 0)) + val negated_remainder = -result + + if (cfg.divUnroll != 0) when(state === s_neg_inputs) { + when(remainder(w - 1)) { + remainder := negated_remainder + } + when(divisor(w - 1)) { + divisor := subtractor + } + state := s_div + } + if (cfg.divUnroll != 0) when(state === s_neg_output) { + remainder := negated_remainder + state := s_done_div + resHi := false.B + } + if (cfg.mulUnroll != 0) when(state === s_mul) { + val mulReg = Cat(remainder(2 * mulw + 1, w + 1), remainder(w - 1, 0)) + val mplierSign = remainder(w) + val mplier = mulReg(mulw - 1, 0) + val accum = mulReg(2 * mulw, mulw).asSInt + val mpcand = divisor.asSInt + val prod = Cat(mplierSign, mplier(cfg.mulUnroll - 1, 0)).asSInt * mpcand + accum + val nextMulReg = Cat(prod, mplier(mulw - 1, cfg.mulUnroll)) + val nextMplierSign = count === (mulw / cfg.mulUnroll - 2).U && neg_out + + val eOutMask = ((BigInt(-1) << mulw).S >> (count * cfg.mulUnroll.U)(log2Ceil(mulw) - 1, 0))(mulw - 1, 0) + val eOut = (cfg.mulEarlyOut).B && count =/= (mulw / cfg.mulUnroll - 1).U && count =/= 0.U && + !isHi && (mplier & ~eOutMask) === 0.U + val eOutRes = (mulReg >> (mulw.U - count * cfg.mulUnroll.U)(log2Ceil(mulw) - 1, 0)) + val nextMulReg1 = Cat(nextMulReg(2 * mulw, mulw), Mux(eOut, eOutRes, nextMulReg)(mulw - 1, 0)) + remainder := Cat(nextMulReg1 >> w, nextMplierSign, nextMulReg1(w - 1, 0)) + + count := count + 1.U + when(eOut || count === (mulw / cfg.mulUnroll - 1).U) { + state := s_done_mul + resHi := isHi + } + } + if (cfg.divUnroll != 0) when(state === s_div) { + val unrolls = ((0 until cfg.divUnroll) + .scanLeft(remainder)) { + case (rem, i) => + // the special case for iteration 0 is to save HW, not for correctness + val difference = if (i == 0) subtractor else rem(2 * w, w) - divisor(w - 1, 0) + val less = difference(w) + Cat(Mux(less, rem(2 * w - 1, w), difference(w - 1, 0)), rem(w - 1, 0), !less) + } + .tail + + remainder := unrolls.last + when(count === (w / cfg.divUnroll).U) { + state := Mux(neg_out, s_neg_output, s_done_div) + resHi := isHi + if (w % cfg.divUnroll < cfg.divUnroll - 1) + remainder := unrolls(w % cfg.divUnroll) + } + count := count + 1.U + + val divby0 = count === 0.U && !subtractor(w) + if (cfg.divEarlyOut) { + val align = 1 << log2Floor(cfg.divUnroll.max(cfg.divEarlyOutGranularity)) + val alignMask = ~((align - 1).U(log2Ceil(w).W)) + val divisorMSB = Log2(divisor(w - 1, 0), w) & alignMask + val dividendMSB = Log2(remainder(w - 1, 0), w) | ~alignMask + val eOutPos = ~(dividendMSB - divisorMSB) + val eOut = count === 0.U && !divby0 && eOutPos >= align.U + when(eOut) { + remainder := remainder(w - 1, 0) << eOutPos + count := eOutPos >> log2Floor(cfg.divUnroll) + } + } + when(divby0 && !isHi) { neg_out := false.B } + } + when(io.resp.fire || io.kill) { + state := s_ready + } + when(io.req.fire) { + state := Mux(cmdMul, s_mul, Mux(lhs_sign || rhs_sign, s_neg_inputs, s_div)) + isHi := cmdHi + resHi := false.B + count := (if (fastMulW) Mux[UInt](cmdMul && halfWidth(io.req.bits), (w / cfg.mulUnroll / 2).U, 0.U) else 0.U) + neg_out := Mux(cmdHi, lhs_sign, lhs_sign =/= rhs_sign) + divisor := Cat(rhs_sign, rhs_in) + remainder := lhs_in + req := io.req.bits + } + + val outMul = (state & (s_done_mul ^ s_done_div)) === (s_done_mul & ~s_done_div) + val loOut = Mux(fastMulW.B && halfWidth(req) && outMul, result(w - 1, w / 2), result(w / 2 - 1, 0)) + val hiOut = Mux(halfWidth(req), Fill(w / 2, loOut(w / 2 - 1)), result(w - 1, w / 2)) + io.resp.bits.tag := req.tag + + io.resp.bits.data := Cat(hiOut, loOut) + io.resp.bits.full_data := Cat(remainder(2*w, w+1), remainder(w-1, 0)) + io.resp.valid := (state === s_done_mul || state === s_done_div) + io.req.ready := state === s_ready +} diff --git a/rocketv/src/PMA.scala b/rocketv/src/PMA.scala new file mode 100644 index 000000000..7c3ae51f9 --- /dev/null +++ b/rocketv/src/PMA.scala @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util.experimental.BitSet + +object PMACheckerParameter { + implicit def bitSetP: upickle.default.ReadWriter[BitSet] = upickle.default + .readwriter[String] + .bimap[BitSet]( + bs => bs.terms.map("b" + _.rawString).mkString("\n"), + str => if(str.isEmpty) BitSet.empty else BitSet.fromString(str) + ) + implicit def rwP: upickle.default.ReadWriter[PMACheckerParameter] = upickle.default.macroRW[PMACheckerParameter] +} + +case class PMACheckerParameter( + paddrBits: Int, + legal: BitSet, + cacheable: BitSet, + read: BitSet, + write: BitSet, + putPartial: BitSet, + logic: BitSet, + arithmetic: BitSet, + exec: BitSet, + sideEffects: BitSet) + extends SerializableModuleParameter + +class PMACheckerInterface(parameter: PMACheckerParameter) extends Bundle { + val paddr = Input(UInt(parameter.paddrBits.W)) + val resp = Output(new PMACheckerResponse) +} + +@instantiable +class PMAChecker(val parameter: PMACheckerParameter) + extends FixedIORawModule(new PMACheckerInterface(parameter)) + with SerializableModule[PMACheckerParameter] { + // check exist a slave can consume this address. + val legal_address = parameter.legal.matches(io.paddr) + io.resp.cacheable := legal_address && (if(parameter.cacheable.isEmpty) false.B else parameter.cacheable.matches(io.paddr)) + io.resp.r := legal_address && (if(parameter.read.isEmpty) false.B else parameter.read.matches(io.paddr)) + io.resp.w := legal_address && (if(parameter.write.isEmpty) false.B else parameter.write.matches(io.paddr)) + io.resp.pp := legal_address && (if(parameter.putPartial.isEmpty) false.B else parameter.putPartial.matches(io.paddr)) + io.resp.al := legal_address && (if(parameter.logic.isEmpty) false.B else parameter.logic.matches(io.paddr)) + io.resp.aa := legal_address && (if(parameter.arithmetic.isEmpty) false.B else parameter.arithmetic.matches(io.paddr)) + io.resp.x := legal_address && (if(parameter.exec.isEmpty) false.B else parameter.exec.matches(io.paddr)) + io.resp.eff := legal_address && (if(parameter.sideEffects.isEmpty) false.B else parameter.sideEffects.matches(io.paddr)) +} diff --git a/rocketv/src/PMP.scala b/rocketv/src/PMP.scala new file mode 100644 index 000000000..be454eae4 --- /dev/null +++ b/rocketv/src/PMP.scala @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.experimental.hierarchy.{instantiable, public} +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util.log2Ceil + +object PMPCheckerParameter { + implicit def rwP: upickle.default.ReadWriter[PMPCheckerParameter] = upickle.default.macroRW[PMPCheckerParameter] +} + +case class PMPCheckerParameter( + nPMPs: Int, + paddrBits: Int, + // @todo: log2Ceil(coreDataBytes)? + lgMaxSize: Int, + pmpGranularity: Int) + extends SerializableModuleParameter + +class PMPCheckerInterface(parameter: PMPCheckerParameter) extends Bundle { + val prv = Input(UInt(PRV.SZ.W)) + val pmp = Input(Vec(parameter.nPMPs, new PMP(parameter.paddrBits))) + val addr = Input(UInt(parameter.paddrBits.W)) + val size = Input(UInt(log2Ceil(parameter.lgMaxSize + 1).W)) + val r = Output(Bool()) + val w = Output(Bool()) + val x = Output(Bool()) +} + +@instantiable +class PMPChecker(val parameter: PMPCheckerParameter) + extends FixedIORawModule(new PMPCheckerInterface(parameter)) + with SerializableModule[PMPCheckerParameter] { + + val paddrBits = parameter.paddrBits + val pmpGranularity = parameter.pmpGranularity + val lgMaxSize = parameter.lgMaxSize + + val default = if (io.pmp.isEmpty) true.B else io.prv > PRV.S.U + val pmp0 = WireInit(0.U.asTypeOf(new PMP(paddrBits))) + pmp0.cfg.r := default + pmp0.cfg.w := default + pmp0.cfg.x := default + + val res = io.pmp.zip(pmp0 +: io.pmp).reverse.foldLeft(pmp0) { + case (prev, (pmp, prevPMP)) => + val hit = PMP.hit(pmp, io.addr, io.size, lgMaxSize, prevPMP, pmpGranularity) + val ignore = default && !pmp.cfg.l + val aligned = PMP.aligned(pmp, io.addr, io.size, lgMaxSize, prevPMP, pmpGranularity) + val cur = WireInit(pmp) + cur.cfg.r := aligned && (pmp.cfg.r || ignore) + cur.cfg.w := aligned && (pmp.cfg.w || ignore) + cur.cfg.x := aligned && (pmp.cfg.x || ignore) + Mux(hit, cur, prev) + } + + io.r := res.cfg.r + io.w := res.cfg.w + io.x := res.cfg.x +} diff --git a/rocketv/src/PTW.scala b/rocketv/src/PTW.scala new file mode 100644 index 000000000..745e4aa66 --- /dev/null +++ b/rocketv/src/PTW.scala @@ -0,0 +1,914 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util.circt.ClockGate +import chisel3.util.{Arbiter, Cat, Enum, Mux1H, OHToUInt, PopCount, PriorityEncoder, PriorityEncoderOH, RegEnable, SRAM, SRAMInterface, UIntToOH, Valid, is, isPow2, log2Ceil, switch} + +object PTWParameter { + implicit def rwP: upickle.default.ReadWriter[PTWParameter] = upickle.default.macroRW[PTWParameter] +} + +case class PTWParameter(useAsyncReset: Boolean, + hasClockGate: Boolean, + usingVM: Boolean, + usingHypervisor: Boolean, + xLen: Int, + fLen: Int, + paddrBits: Int, + asidBits: Int, + pgLevels: Int, + nPTECacheEntries: Int, + nL2TLBWays: Int, + nL2TLBEntries: Int, + nPMPs: Int, + ) extends SerializableModuleParameter { + def pmpGranularity: Int = if (usingHypervisor) 4096 else 4 + // TODO: configurable + def cacheBlockBytes = 64 + def lgCacheBlockBytes = log2Ceil(cacheBlockBytes) + def blockOffBits = lgCacheBlockBytes + def nL2TLBSets = nL2TLBEntries / nL2TLBWays + def idxBits: Int = if (nL2TLBSets == 0) 0 else log2Ceil(nL2TLBSets) + def untagBits: Int = blockOffBits + idxBits + def coreMaxAddrBits: Int = paddrBits max vaddrBitsExtended + def maxHypervisorExtraAddrBits: Int = 2 + def pgIdxBits: Int = 12 + def maxSVAddrBits: Int = pgIdxBits + pgLevels * pgLevelBits + def dcacheReqTagBits: Int = 6 + def separateUncachedResp: Boolean = false + + // @todo make it true in the future. + def usingDTIM: Boolean = false + def dcacheArbPorts: Int = 1 + (if (usingVM) 1 else 0) + (if (usingDTIM) 1 else 0) + def coreDataBytes: Int = xLen max fLen + def vpnBitsExtended: Int = vpnBits + (if (vaddrBits < xLen) 1 + ( if(usingHypervisor) 1 else 0) else 0) + def vaddrBitsExtended: Int = vpnBitsExtended + pgIdxBits + def vpnBits: Int = vaddrBits - pgIdxBits + def ppnBits: Int = paddrBits - pgIdxBits + def hypervisorExtraAddrBits: Int = { + if (usingHypervisor) maxHypervisorExtraAddrBits + else 0 + } + def maxHVAddrBits: Int = maxSVAddrBits + hypervisorExtraAddrBits + def vaddrBits: Int = + if (usingVM) { + val v = maxHVAddrBits + require(v == xLen || xLen > v && v > paddrBits) + v + } else { + // since virtual addresses sign-extend but physical addresses + // zero-extend, make room for a zero sign bit for physical addresses + (paddrBits + 1) min xLen + } + def pgLevelBits: Int = 10 - log2Ceil(xLen / 32) + def minPgLevels: Int = { + val res = xLen match { case 32 => 2; case 64 => 3 } + require(pgLevels >= res) + res + } + def maxPAddrBits: Int = xLen match { + case 32 => 34 + case 64 => 56 + } + // I$ + D$ + def nPTWPorts: Int = 2 +} + +class PTWInterface(parameter: PTWParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + + /** to n TLB */ + val requestor = Flipped( + Vec( + parameter.nPTWPorts, + new TLBPTWIO( + parameter.nPMPs, + parameter.vpnBits, + parameter.paddrBits, + parameter.vaddrBits, + parameter.pgLevels, + parameter.xLen, + parameter.maxPAddrBits, + parameter.pgIdxBits + ) + ) + ) + + /** to HellaCache */ + val mem = new HellaCacheIO( + parameter.coreMaxAddrBits, + parameter.usingVM, + parameter.untagBits, + parameter.pgIdxBits, + parameter.dcacheReqTagBits, + parameter.dcacheArbPorts, + parameter.coreDataBytes, + parameter.paddrBits, + parameter.vaddrBitsExtended, + parameter.separateUncachedResp + ) + + /** to Core + * + * contains CSRs info and performance statistics + */ + val dpath = new DatapathPTWIO( + parameter.xLen, + parameter.maxPAddrBits, + parameter.pgIdxBits, + parameter.vaddrBits, + parameter.asidBits, + parameter.nPMPs, + parameter.paddrBits + ) +} + +/** PTW contains L2TLB, and performs page table walk for high level TLB, and cache queries from L1 TLBs(I$, D$, RoCC) + * + * It performs hierarchy page table query to mem for the desired leaf PTE and cache them in l2tlb. + * Besides leaf PTEs, it also caches non-leaf PTEs in pte_cache to accerlerate the process. + * + * ==Structure== + * - l2tlb : for leaf PTEs + * - set-associative (configurable with [[CoreParams.nL2TLBEntries]]and [[CoreParams.nL2TLBWays]])) + * - PLRU + * - pte_cache: for non-leaf PTEs + * - set-associative + * - LRU + * - s2_pte_cache: for non-leaf PTEs in 2-stage translation + * - set-associative + * - PLRU + * + * l2tlb Pipeline: 3 stage + * {{{ + * stage 0 : read + * stage 1 : decode + * stage 2 : hit check + * }}} + * ==State Machine== + * s_ready: ready to reveive request from TLB + * s_req: request mem; pte_cache hit judge + * s_wait1: deal with l2tlb error + * s_wait2: final hit judge + * s_wait3: receive mem response + * s_fragment_superpage: for superpage PTE + * + * @note l2tlb hit happens in s_req or s_wait1 + * @see RV-priv spec 4.3-4.6 for Virtual-Memory System + * @see RV-priv spec 8.5 for Two-Stage Address Translation + * @todo details in two-stage translation + */ +@instantiable +class PTW(val parameter: PTWParameter) + extends FixedIORawModule(new PTWInterface(parameter)) + with SerializableModule[PTWParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + val vpnBits: Int = parameter.vpnBits + val ppnBits: Int = parameter.ppnBits + val vaddrBits: Int = parameter.vaddrBits + val paddrBits: Int = parameter.paddrBits + val n: Int = parameter.nPTWPorts + val pgLevels: Int = parameter.pgLevels + val pgLevelBits: Int = parameter.pgLevelBits + val minPgLevels: Int = parameter.minPgLevels + val hypervisorExtraAddrBits: Int = parameter.hypervisorExtraAddrBits + val usingHypervisor: Boolean = parameter.usingHypervisor + val xLen: Int = parameter.xLen + val maxPAddrBits: Int = parameter.maxPAddrBits + val pgIdxBits: Int = parameter.pgIdxBits + val maxSVAddrBits: Int = parameter.maxSVAddrBits + val pmpGranularity: Int = parameter.pmpGranularity + val usingVM: Boolean = parameter.usingVM + val hasClockGate: Boolean = parameter.hasClockGate + val maxHypervisorExtraAddrBits: Int = parameter.maxHypervisorExtraAddrBits + def M_XRD = "b00000".U + + object coreParams { + val nPTECacheEntries: Int = parameter.nPTECacheEntries + val nL2TLBWays: Int = parameter.nL2TLBWays + val nL2TLBEntries: Int = parameter.nL2TLBEntries + } + + def OptimizationBarrier[T <: Data](in: T): T = { + val barrier = Module(new Module { + val io = IO(new Bundle { + val x = Input(chiselTypeOf(in)) + val y = Output(chiselTypeOf(in)) + }) + io.y := io.x + override def desiredName = "OptimizationBarrier" + }) + barrier.io.x := in + barrier.io.y + } + + def additionalPgLevels(ptbr: PTBR) = ptbr.mode(log2Ceil(pgLevels - minPgLevels + 1) - 1, 0) + + def padTo(x: UInt, n: Int): UInt = { + require(x.getWidth <= n) + if (x.getWidth == n) x + else Cat(0.U((n - x.getWidth).W), x) + } + + + class PMPHomogeneityChecker(pmps: Seq[PMP]) { + def apply(addr: UInt, pgLevel: UInt): Bool = { + pmps + .foldLeft((true.B, 0.U.asTypeOf(chiselTypeOf(pmps.head)))) { + case ((h, prev), pmp) => + (h && PMP.homogeneous(pmp, addr, pgLevel, prev, paddrBits, pmpGranularity, pgLevels, pgIdxBits, pgLevelBits), pmp) + } + ._1 + } + } + + // compatiblity mode + object Split { + def apply(x: UInt, n0: Int) = { + val w = x.getWidth + (x(w-1,n0), x(n0-1,0)) + } + def apply(x: UInt, n1: Int, n0: Int) = { + val w = x.getWidth + (x(w-1,n1), x(n1-1,n0), x(n0-1,0)) + } + def apply(x: UInt, n2: Int, n1: Int, n0: Int) = { + val w = x.getWidth + (x(w-1,n2), x(n2-1,n1), x(n1-1,n0), x(n0-1,0)) + } + } + + val s_ready :: s_req :: s_wait1 :: s_dummy1 :: s_wait2 :: s_wait3 :: s_dummy2 :: s_fragment_superpage :: Nil = Enum(8) + val state = RegInit(s_ready) + val l2_refill_wire = Wire(Bool()) + + /** Arbiter to arbite request from n TLB */ + val arb = Module(new Arbiter(Valid(new PTWReq(vpnBits)), n)) + // use TLB req as arbitor's input + arb.io.in <> io.requestor.map(_.req) + // receive req only when s_ready and not in refill + arb.io.out.ready := (state === s_ready) && !l2_refill_wire + + val resp_valid = RegNext(VecInit(Seq.fill(io.requestor.size)(false.B))) + + // val clock_en = + // state =/= s_ready || l2_refill_wire || arb.io.out.valid || io.dpath.sfence.valid || io.dpath.customCSRs.disableDCacheClockGate + val clock_en = state =/= s_ready || l2_refill_wire || arb.io.out.valid || io.dpath.sfence.valid + io.dpath.clock_enabled := usingVM.B && clock_en + val gated_clock: Clock = + if (!usingVM || !hasClockGate) io.clock + else ClockGate(io.clock, clock_en) + withClock(gated_clock) { // entering gated-clock domain + + val invalidated = Reg(Bool()) + + /** current PTE level + * {{{ + * 0 <= count <= pgLevel-1 + * count = pgLevel - 1 : leaf PTE + * count < pgLevel - 1 : non-leaf PTE + * }}} + */ + val count = Reg(UInt(log2Ceil(pgLevels).W)) + val resp_ae_ptw = Reg(Bool()) + val resp_ae_final = Reg(Bool()) + val resp_pf = Reg(Bool()) + val resp_gf = Reg(Bool()) + val resp_hr = Reg(Bool()) + val resp_hw = Reg(Bool()) + val resp_hx = Reg(Bool()) + val resp_fragmented_superpage = Reg(Bool()) + + /** tlb request */ + val r_req = Reg(new PTWReq(vpnBits)) + + /** current selected way in arbitor */ + val r_req_dest = Reg(Bits()) + // to respond to L1TLB : l2_hit + // to construct mem.req.addr + val r_pte = Reg(new PTE) + val r_hgatp = Reg(new PTBR(xLen, maxPAddrBits, pgIdxBits)) + // 2-stage pageLevel + val aux_count = Reg(UInt(log2Ceil(pgLevels).W)) + + /** pte for 2-stage translation */ + val aux_pte = Reg(new PTE) + val aux_ppn_hi = Option.when(pgLevels > 4 && r_req.addr.getWidth > aux_pte.ppn.getWidth)( + Reg(UInt((r_req.addr.getWidth - aux_pte.ppn.getWidth).W)) + ) + val gpa_pgoff = Reg(UInt(pgIdxBits.W)) // only valid in resp_gf case + val stage2 = Reg(Bool()) + val stage2_final = Reg(Bool()) + + val satp = Mux(arb.io.out.bits.bits.vstage1, io.dpath.vsatp, io.dpath.ptbr) + + val r_hgatp_initial_count = pgLevels.U - minPgLevels.U - additionalPgLevels(r_hgatp) + + /** 2-stage translation both enable */ + val do_both_stages = r_req.vstage1 && r_req.stage2 + val max_count = count.max(aux_count) + val vpn = Mux(r_req.vstage1 && stage2, aux_pte.ppn, r_req.addr) + + val mem_resp_valid = RegNext(io.mem.resp.valid) + val mem_resp_data = RegNext(io.mem.resp.bits.data) + io.mem.uncached_resp.map { resp => + assert(!(resp.valid && io.mem.resp.valid)) + resp.ready := true.B + when(resp.valid) { + mem_resp_valid := true.B + mem_resp_data := resp.bits.data + } + } + // construct pte from mem.resp + val (pte: PTE, invalid_paddr: Bool) = { + val tmp = mem_resp_data.asTypeOf(new PTE()) + val res = WireDefault(tmp) + res.ppn := Mux(do_both_stages && !stage2, tmp.ppn(vpnBits.min(tmp.ppn.getWidth) - 1, 0), tmp.ppn(ppnBits - 1, 0)) + when(tmp.r || tmp.w || tmp.x) { + // for superpage mappings, make sure PPN LSBs are zero + for (i <- 0 until pgLevels - 1) + when( + count <= i.U && tmp.ppn((pgLevels - 1 - i) * pgLevelBits - 1, (pgLevels - 2 - i) * pgLevelBits) =/= 0.U + ) { res.v := false.B } + } + (res, Mux(do_both_stages && !stage2, (tmp.ppn >> vpnBits) =/= 0.U, (tmp.ppn >> ppnBits) =/= 0.U)) + } + // find non-leaf PTE, need traverse + def table(pte: PTE) = + pte.v && !pte.r && !pte.w && !pte.x && !pte.d && !pte.a && !pte.u && pte.reserved_for_future === 0.U + val traverse = table(pte) && !invalid_paddr && count < (pgLevels - 1).U + + /** address send to mem for enquerry */ + val pte_addr = + if (!usingVM) 0.U + else { + val vpn_idxs = VecInit((0 until pgLevels).map { i => + val width = pgLevelBits + (if (i <= pgLevels - minPgLevels) hypervisorExtraAddrBits else 0) + (vpn >> (pgLevels - i - 1) * pgLevelBits)(width - 1, 0) + }) + val mask = Mux( + stage2 && count === r_hgatp_initial_count, + ((1 << (hypervisorExtraAddrBits + pgLevelBits)) - 1).U, + ((1 << pgLevelBits) - 1).U + ) + val vpn_idx = vpn_idxs(count) & mask + val raw_pte_addr: UInt = ((r_pte.ppn << pgLevelBits) | vpn_idx) << log2Ceil(xLen / 8) + val size = if (usingHypervisor) vaddrBits else paddrBits + //use r_pte.ppn as page table base address + //use vpn slice as offset + raw_pte_addr.apply(size.min(raw_pte_addr.getWidth) - 1, 0) + } + + /** pte_cache input addr */ + val pte_cache_addr = + if (!usingHypervisor) pte_addr + else { + val vpn_idxs = VecInit((0 until pgLevels - 1).map { i => + val ext_aux_pte_ppn = aux_ppn_hi match { + case None => aux_pte.ppn + case Some(hi) => Cat(hi, aux_pte.ppn) + } + (ext_aux_pte_ppn >> (pgLevels - i - 1) * pgLevelBits)(pgLevelBits - 1, 0) + }) + val vpn_idx = vpn_idxs(count) + val raw_pte_cache_addr = Cat(r_pte.ppn, vpn_idx) << log2Ceil(xLen / 8) + raw_pte_cache_addr(vaddrBits.min(raw_pte_cache_addr.getWidth) - 1, 0) + } + + /** stage2_pte_cache input addr */ + val stage2_pte_cache_addr = + if (!usingHypervisor) 0.U + else { + val vpn_idxs = VecInit((0 until pgLevels - 1).map { i => + (r_req.addr >> (pgLevels - i - 1) * pgLevelBits)(pgLevelBits - 1, 0) + }) + val vpn_idx = vpn_idxs(aux_count) + val raw_s2_pte_cache_addr = Cat(aux_pte.ppn, vpn_idx) << log2Ceil(xLen / 8) + raw_s2_pte_cache_addr(vaddrBits.min(raw_s2_pte_cache_addr.getWidth) - 1, 0) + } + + def makeFragmentedSuperpagePPN(ppn: UInt): Seq[UInt] = { + def padTo(x: UInt, n: Int): UInt = { + require(x.getWidth <= n) + if (x.getWidth == n) x + else Cat(0.U((n - x.getWidth).W), x) + } + (pgLevels - 1 until 0 by -1).map(i => + Cat(ppn >> (pgLevelBits * i), padTo(r_req.addr(((pgLevelBits * i).min(vpnBits)) - 1, 0), (pgLevelBits * i))) + ) + } + + /** PTECache caches non-leaf PTE + * @param s2 true: 2-stage address translation + */ + def makePTECache(s2: Boolean): (Bool, UInt) = if (coreParams.nPTECacheEntries == 0) { + (false.B, 0.U) + } else { + val plru = new PseudoLRU(coreParams.nPTECacheEntries) + val valid = RegInit(0.U(coreParams.nPTECacheEntries.W)) + val tags = Reg(Vec(coreParams.nPTECacheEntries, UInt((if (usingHypervisor) 1 + vaddrBits else paddrBits).W))) + // not include full pte, only ppn + val data = Reg(Vec(coreParams.nPTECacheEntries, UInt((if (usingHypervisor && s2) vpnBits else ppnBits).W))) + val can_hit = + if (s2) + count === r_hgatp_initial_count && aux_count < (pgLevels - 1).U && r_req.vstage1 && stage2 && !stage2_final + else count < (pgLevels - 1).U && Mux(r_req.vstage1, stage2, !r_req.stage2) + val can_refill = + if (s2) do_both_stages && !stage2 && !stage2_final + else can_hit + + val tag = + if (s2) Cat(true.B, padTo(stage2_pte_cache_addr, vaddrBits)) + else Cat(r_req.vstage1, padTo(pte_cache_addr, if (usingHypervisor) vaddrBits else paddrBits)) + + val hits = VecInit(tags.map(_ === tag)).asUInt & valid + val hit = hits.orR && can_hit + // refill with mem response + when(mem_resp_valid && traverse && can_refill && !hits.orR && !invalidated) { + val r = Mux(valid.andR, plru.way, PriorityEncoder(~valid)) + valid := valid | UIntToOH(r) + tags(r) := tag + data(r) := pte.ppn + plru.access(r) + } + // replace + when(hit && state === s_req) { plru.access(OHToUInt(hits)) } + when(io.dpath.sfence.valid && (!io.dpath.sfence.bits.rs1 || usingHypervisor.B && io.dpath.sfence.bits.hg)) { + valid := 0.U + } + + val lcount = if (s2) aux_count else count + // for (i <- 0 until pgLevels - 1) { + // ccover(hit && state === s_req && lcount === i.U, s"PTE_CACHE_HIT_L$i", s"PTE cache hit, level $i") + // } + + (hit, Mux1H(hits, data)) + } + // generate pte_cache + val (pte_cache_hit, pte_cache_data) = makePTECache(false) + // generate pte_cache with 2-stage translation + val (stage2_pte_cache_hit, stage2_pte_cache_data) = makePTECache(true) + // pte_cache hit or 2-stage pte_cache hit + val pte_hit = RegNext(false.B) + io.dpath.perf.pte_miss := false.B + io.dpath.perf.pte_hit := pte_hit && (state === s_req) && !io.dpath.perf.l2hit + assert( + !(io.dpath.perf.l2hit && (io.dpath.perf.pte_miss || io.dpath.perf.pte_hit)), + "PTE Cache Hit/Miss Performance Monitor Events are lower priority than L2TLB Hit event" + ) + // l2_refill happens when find the leaf pte + val l2_refill = RegNext(false.B) + l2_refill_wire := l2_refill + io.dpath.perf.l2miss := false.B + io.dpath.perf.l2hit := false.B + // l2tlb + val (l2_hit, l2_error, l2_pte, l2_tlb_ram) = + if (coreParams.nL2TLBEntries == 0) (false.B, false.B, WireDefault(0.U.asTypeOf(new PTE)), None) + else { + val code = new ParityCode + require(isPow2(coreParams.nL2TLBEntries)) + require(isPow2(coreParams.nL2TLBWays)) + require(coreParams.nL2TLBEntries >= coreParams.nL2TLBWays) + val nL2TLBSets = coreParams.nL2TLBEntries / coreParams.nL2TLBWays + require(isPow2(nL2TLBSets)) + val idxBits = log2Ceil(nL2TLBSets) + + val l2_plru = new SetAssocLRU(nL2TLBSets, coreParams.nL2TLBWays, "plru") + val ram: SRAMInterface[Vec[UInt]] = SRAM.masked( + size = nL2TLBSets, + tpe = Vec(coreParams.nL2TLBWays, UInt(code.width(new L2TLBEntry(nL2TLBSets, ppnBits, maxSVAddrBits, pgIdxBits, usingHypervisor).getWidth).W)), + numReadPorts = 0, + numWritePorts = 0, + numReadwritePorts = 1 + ) + + val g = Reg(Vec(coreParams.nL2TLBWays, UInt(nL2TLBSets.W))) + val valid = RegInit(VecInit(Seq.fill(coreParams.nL2TLBWays)(0.U(nL2TLBSets.W)))) + // use r_req to construct tag + val (r_tag, r_idx) = Split(Cat(r_req.vstage1, r_req.addr(maxSVAddrBits - pgIdxBits - 1, 0)), idxBits) + + /** the valid vec for the selected set(including n ways) */ + val r_valid_vec = VecInit(valid.map(_(r_idx))).asUInt + val r_valid_vec_q = Reg(UInt(coreParams.nL2TLBWays.W)) + val r_l2_plru_way = Reg(UInt(log2Ceil(coreParams.nL2TLBWays.max(1)).W)) + r_valid_vec_q := r_valid_vec + // replacement way + r_l2_plru_way := (if (coreParams.nL2TLBWays > 1) l2_plru.way(r_idx) else 0.U) + // refill with r_pte(leaf pte) + val entry = Wire(new L2TLBEntry(nL2TLBSets, ppnBits, maxSVAddrBits, pgIdxBits, usingHypervisor)) + entry.ppn := r_pte.ppn + entry.d := r_pte.d + entry.a := r_pte.a + entry.u := r_pte.u + entry.x := r_pte.x + entry.w := r_pte.w + entry.r := r_pte.r + entry.tag := r_tag + // if all the way are valid, use plru to select one way to be replaced, + // otherwise use PriorityEncoderOH to select one + val wmask = + if (coreParams.nL2TLBWays > 1) + Mux(r_valid_vec_q.andR, UIntToOH(r_l2_plru_way, coreParams.nL2TLBWays), PriorityEncoderOH(~r_valid_vec_q)) + else 1.U(1.W) + + val mask = UIntToOH(r_idx) + for (way <- 0 until coreParams.nL2TLBWays) { + when(wmask(way)) { + valid(way) := valid(way) | mask + g(way) := Mux(r_pte.g, g(way) | mask, g(way) & ~mask) + } + } + // sfence happens + when(io.dpath.sfence.valid) { + val hg = usingHypervisor.B && io.dpath.sfence.bits.hg + for (way <- 0 until coreParams.nL2TLBWays) { + valid(way) := + Mux( + !hg && io.dpath.sfence.bits.rs1, + valid(way) & ~UIntToOH(io.dpath.sfence.bits.addr(idxBits + pgIdxBits - 1, pgIdxBits)), + Mux(!hg && io.dpath.sfence.bits.rs2, valid(way) & g(way), 0.U) + ) + } + } + + val s0_valid = !l2_refill && arb.io.out.fire + val s0_suitable = arb.io.out.bits.bits.vstage1 === arb.io.out.bits.bits.stage2 && !arb.io.out.bits.bits.need_gpa + val s1_valid = RegNext(s0_valid && s0_suitable && arb.io.out.bits.valid) + val s2_valid = RegNext(s1_valid) + // read from tlb idx + val s1_rdata = ram.readwritePorts.head.readData + val s2_rdata = s1_rdata.map(s1_rdway => code.decode(RegEnable(s1_rdway, s1_valid))) + val s2_valid_vec = RegEnable(r_valid_vec, s1_valid) + val s2_g_vec = RegEnable(VecInit(g.map(_(r_idx))), s1_valid) + val s2_error = VecInit((0 until coreParams.nL2TLBWays).map(way => s2_valid_vec(way) && s2_rdata(way).error)).asUInt.orR + when(s2_valid && s2_error) { valid.foreach { _ := 0.U } } + // ram connect + ram.readwritePorts.foreach { ramPort => + ramPort.enable := (l2_refill && !invalidated) || s0_valid + ramPort.isWrite := (l2_refill && !invalidated) + ramPort.address := Mux(l2_refill && !invalidated, r_idx, arb.io.out.bits.bits.addr(idxBits - 1, 0)) + ramPort.writeData := VecInit(Seq.fill(coreParams.nL2TLBWays)(code.encode(entry.asUInt))) + ramPort.mask.foreach(_ := VecInit(wmask.asBools)) + } + // decode + val s2_entry_vec = s2_rdata.map(_.uncorrected.asTypeOf(new L2TLBEntry(nL2TLBSets, ppnBits, maxSVAddrBits, pgIdxBits, usingHypervisor))) + val s2_hit_vec = + (0 until coreParams.nL2TLBWays).map(way => s2_valid_vec(way) && (r_tag === s2_entry_vec(way).tag)) + val s2_hit = s2_valid && VecInit(s2_hit_vec).asUInt.orR + io.dpath.perf.l2miss := s2_valid && !(VecInit(s2_hit_vec).asUInt.orR) + io.dpath.perf.l2hit := s2_hit + when(s2_hit) { + l2_plru.access(r_idx, OHToUInt(s2_hit_vec)) + assert((PopCount(s2_hit_vec) === 1.U) || s2_error, "L2 TLB multi-hit") + } + + val s2_pte = Wire(new PTE) + val s2_hit_entry = Mux1H(s2_hit_vec, s2_entry_vec) + s2_pte.ppn := s2_hit_entry.ppn + s2_pte.d := s2_hit_entry.d + s2_pte.a := s2_hit_entry.a + s2_pte.g := Mux1H(s2_hit_vec, s2_g_vec) + s2_pte.u := s2_hit_entry.u + s2_pte.x := s2_hit_entry.x + s2_pte.w := s2_hit_entry.w + s2_pte.r := s2_hit_entry.r + s2_pte.v := true.B + s2_pte.reserved_for_future := 0.U + s2_pte.reserved_for_software := 0.U + +// for (way <- 0 until coreParams.nL2TLBWays) { +// ccover(s2_hit && s2_hit_vec(way), s"L2_TLB_HIT_WAY$way", s"L2 TLB hit way$way") +// } + + (s2_hit, s2_error, s2_pte, Some(ram)) + } + + // if SFENCE occurs during walk, don't refill PTE cache or L2 TLB until next walk + invalidated := io.dpath.sfence.valid || (invalidated && state =/= s_ready) + // mem request + io.mem.keep_clock_enabled := false.B + + io.mem.req.valid := state === s_req || state === s_dummy1 + io.mem.req.bits.phys := true.B + io.mem.req.bits.cmd := M_XRD + io.mem.req.bits.size := log2Ceil(xLen / 8).U + io.mem.req.bits.signed := false.B + io.mem.req.bits.addr := pte_addr + io.mem.req.bits.idx.foreach(_ := pte_addr) + io.mem.req.bits.dprv := PRV.S.U // PTW accesses are S-mode by definition + io.mem.req.bits.dv := do_both_stages && !stage2 + io.mem.req.bits.tag := DontCare + io.mem.req.bits.no_alloc := DontCare + io.mem.req.bits.no_xcpt := DontCare + io.mem.req.bits.data := DontCare + io.mem.req.bits.mask := DontCare + + io.mem.s1_kill := l2_hit || state =/= s_wait1 + io.mem.s1_data := DontCare + io.mem.s2_kill := false.B + + val pageGranularityPMPs = pmpGranularity >= (1 << pgIdxBits) + require(!usingHypervisor || pageGranularityPMPs, s"hypervisor requires pmpGranularity >= ${1 << pgIdxBits}") + + val pmaPgLevelHomogeneous: Seq[Bool] = (0 until pgLevels).map { i => + val pgSize = BigInt(1) << (pgIdxBits + ((pgLevels - 1 - i) * pgLevelBits)) + if (pageGranularityPMPs && i == pgLevels - 1) { + require( +// TLBPageLookup.homogeneous(edge.manager.managers, pgSize), + true, + s"All memory regions must be $pgSize-byte aligned" + ) + true.B + } else { +// TLBPageLookup(edge.manager.managers, xLen, p(CacheBlockBytes), pgSize)(r_pte.ppn << pgIdxBits).homogeneous + true.B + } + } + val pmaHomogeneous = VecInit(pmaPgLevelHomogeneous)(count) + val pmpHomogeneous = new PMPHomogeneityChecker(io.dpath.pmp).apply(r_pte.ppn << pgIdxBits, count) + val homogeneous = pmaHomogeneous && pmpHomogeneous + // response to tlb + for (i <- 0 until io.requestor.size) { + io.requestor(i).resp.valid := resp_valid(i) + io.requestor(i).resp.bits.ae_ptw := resp_ae_ptw + io.requestor(i).resp.bits.ae_final := resp_ae_final + io.requestor(i).resp.bits.pf := resp_pf + io.requestor(i).resp.bits.gf := resp_gf + io.requestor(i).resp.bits.hr := resp_hr + io.requestor(i).resp.bits.hw := resp_hw + io.requestor(i).resp.bits.hx := resp_hx + io.requestor(i).resp.bits.pte := r_pte + io.requestor(i).resp.bits.level := max_count + io.requestor(i).resp.bits.homogeneous := homogeneous || pageGranularityPMPs.B + io.requestor(i).resp.bits.fragmented_superpage := resp_fragmented_superpage && pageGranularityPMPs.B + io.requestor(i).resp.bits.gpa.valid := r_req.need_gpa + io.requestor(i).resp.bits.gpa.bits := + Cat( + Mux( + !stage2_final || !r_req.vstage1 || aux_count === (pgLevels - 1).U, + aux_pte.ppn, + VecInit(makeFragmentedSuperpagePPN(aux_pte.ppn))(aux_count) + ), + gpa_pgoff + ) + io.requestor(i).resp.bits.gpa_is_pte := !stage2_final + io.requestor(i).ptbr := io.dpath.ptbr + io.requestor(i).hgatp := io.dpath.hgatp + io.requestor(i).vsatp := io.dpath.vsatp +// io.requestor(i).customCSRs <> io.dpath.customCSRs + io.requestor(i).status := io.dpath.status + io.requestor(i).hstatus := io.dpath.hstatus + io.requestor(i).gstatus := io.dpath.gstatus + io.requestor(i).pmp := io.dpath.pmp + } + + // control state machine + val next_state = WireDefault(state) + state := OptimizationBarrier(next_state) + val do_switch = WireDefault(false.B) + + switch(state) { + is(s_ready) { + when(arb.io.out.fire) { + val satp_initial_count = pgLevels.U - minPgLevels.U - additionalPgLevels(satp) + val vsatp_initial_count = pgLevels.U - minPgLevels.U - additionalPgLevels(io.dpath.vsatp) + val hgatp_initial_count = pgLevels.U - minPgLevels.U - additionalPgLevels(io.dpath.hgatp) + val aux_ppn = Mux(arb.io.out.bits.bits.vstage1, io.dpath.vsatp.ppn, arb.io.out.bits.bits.addr) + + r_req := arb.io.out.bits.bits + r_req_dest := arb.io.chosen + next_state := Mux(arb.io.out.bits.valid, s_req, s_ready) + stage2 := arb.io.out.bits.bits.stage2 + stage2_final := arb.io.out.bits.bits.stage2 && !arb.io.out.bits.bits.vstage1 + count := Mux(arb.io.out.bits.bits.stage2, hgatp_initial_count, satp_initial_count) + aux_count := Mux(arb.io.out.bits.bits.vstage1, vsatp_initial_count, 0.U) + aux_pte.ppn := aux_ppn + aux_ppn_hi.foreach { _ := aux_ppn >> aux_pte.ppn.getWidth } + aux_pte.reserved_for_future := 0.U + resp_ae_ptw := false.B + resp_ae_final := false.B + resp_pf := false.B + resp_gf := false.B + resp_hr := true.B + resp_hw := true.B + resp_hx := true.B + resp_fragmented_superpage := false.B + r_hgatp := io.dpath.hgatp + + assert(!arb.io.out.bits.bits.need_gpa || arb.io.out.bits.bits.stage2) + } + } + is(s_req) { + when(stage2 && count === r_hgatp_initial_count) { + gpa_pgoff := Mux(aux_count === (pgLevels - 1).U, r_req.addr << log2Ceil(xLen / 8), stage2_pte_cache_addr) + } + // pte_cache hit + when(stage2_pte_cache_hit) { + aux_count := aux_count + 1.U + aux_pte.ppn := stage2_pte_cache_data + aux_ppn_hi.foreach { _ := 0.U } + aux_pte.reserved_for_future := 0.U + pte_hit := true.B + }.elsewhen(pte_cache_hit) { + count := count + 1.U + pte_hit := true.B + }.otherwise { + next_state := Mux(io.mem.req.ready, s_wait1, s_req) + } + } + is(s_wait1) { + // This Mux is for the l2_error case; the l2_hit && !l2_error case is overriden below + next_state := Mux(l2_hit, s_req, s_wait2) + } + is(s_wait2) { + next_state := s_wait3 + io.dpath.perf.pte_miss := count < (pgLevels - 1).U + when(io.mem.s2_xcpt.ae.ld) { + resp_ae_ptw := true.B + next_state := s_ready + resp_valid(r_req_dest) := true.B + } + } + is(s_fragment_superpage) { + next_state := s_ready + resp_valid(r_req_dest) := true.B + when(!homogeneous) { + count := (pgLevels - 1).U + resp_fragmented_superpage := true.B + } + when(do_both_stages) { + resp_fragmented_superpage := true.B + } + } + } + + val merged_pte = { + val superpage_masks = (0 until pgLevels).map(i => + ((BigInt(1) << pte.ppn.getWidth) - (BigInt(1) << (pgLevels - 1 - i) * pgLevelBits)).U + ) + val superpage_mask = VecInit(superpage_masks)(Mux(stage2_final, max_count, (pgLevels - 1).U)) + val stage1_ppns = (0 until pgLevels - 1).map(i => + Cat( + pte.ppn(pte.ppn.getWidth - 1, (pgLevels - i - 1) * pgLevelBits), + aux_pte.ppn((pgLevels - i - 1) * pgLevelBits - 1, 0) + ) + ) :+ pte.ppn + val stage1_ppn = VecInit(stage1_ppns)(count) + makePTE(stage1_ppn & superpage_mask, aux_pte) + } + + r_pte := OptimizationBarrier( + // l2tlb hit->find a leaf PTE(l2_pte), respond to L1TLB + Mux( + l2_hit && !l2_error, + l2_pte, + // S2 PTE cache hit -> proceed to the next level of walking, update the r_pte with hgatp + Mux( + state === s_req && stage2_pte_cache_hit, + makeHypervisorRootPTE(r_hgatp, stage2_pte_cache_data, l2_pte), + // pte cache hit->find a non-leaf PTE(pte_cache),continue to request mem + Mux( + state === s_req && pte_cache_hit, + makePTE(pte_cache_data, l2_pte), + // 2-stage translation + Mux( + do_switch, + makeHypervisorRootPTE(r_hgatp, pte.ppn, r_pte), + // when mem respond, store mem.resp.pte + Mux( + mem_resp_valid, + Mux(!traverse && r_req.vstage1 && stage2, merged_pte, pte), + // fragment_superpage + Mux( + state === s_fragment_superpage && !homogeneous && count =/= (pgLevels - 1).U, + makePTE(VecInit(makeFragmentedSuperpagePPN(r_pte.ppn))(count), r_pte), + // when tlb request come->request mem, use root address in satp(or vsatp,hgatp) + Mux( + arb.io.out.fire, + Mux( + arb.io.out.bits.bits.stage2, + makeHypervisorRootPTE(io.dpath.hgatp, io.dpath.vsatp.ppn, r_pte), + makePTE(satp.ppn, r_pte) + ), + r_pte + ) + ) + ) + ) + ) + ) + ) + ) + + when(l2_hit && !l2_error) { + assert(state === s_req || state === s_wait1) + next_state := s_ready + resp_valid(r_req_dest) := true.B + count := (pgLevels - 1).U + } + when(mem_resp_valid) { + assert(state === s_wait3) + next_state := s_req + when(traverse) { + when(do_both_stages && !stage2) { do_switch := true.B } + count := count + 1.U + }.otherwise { + val gf = stage2 && !stage2_final && !PTE.ur(pte) + val ae = pte.v && invalid_paddr + val pf = pte.v && pte.reserved_for_future =/= 0.U + val success = pte.v && !ae && !pf && !gf + + when(do_both_stages && !stage2_final && success) { + when(stage2) { + stage2 := false.B + count := aux_count + }.otherwise { + stage2_final := true.B + do_switch := true.B + } + }.otherwise { + // find a leaf pte, start l2 refill + l2_refill := success && count === (pgLevels - 1).U && !r_req.need_gpa && + (!r_req.vstage1 && !r_req.stage2 || + do_both_stages && aux_count === (pgLevels - 1).U && PTE.isFullPerm(pte)) + count := max_count + + when( + pageGranularityPMPs.B && !(count === (pgLevels - 1).U && (!do_both_stages || aux_count === (pgLevels - 1).U)) + ) { + next_state := s_fragment_superpage + }.otherwise { + next_state := s_ready + resp_valid(r_req_dest) := true.B + } + + resp_ae_ptw := ae && count < (pgLevels - 1).U && PTE.table(pte) + resp_ae_final := ae + resp_pf := pf && !stage2 + resp_gf := gf || (pf && stage2) + resp_hr := !stage2 || (!pf && !gf && PTE.ur(pte)) + resp_hw := !stage2 || (!pf && !gf && PTE.uw(pte)) + resp_hx := !stage2 || (!pf && !gf && PTE.ux(pte)) + } + } + } + when(io.mem.s2_nack) { + assert(state === s_wait2) + next_state := s_req + } + + when(do_switch) { + aux_count := Mux(traverse, count + 1.U, count) + count := r_hgatp_initial_count + aux_pte := Mux( + traverse, + pte, { + val s1_ppns = VecInit( + (0 until pgLevels - 1).map(i => + Cat( + pte.ppn(pte.ppn.getWidth - 1, (pgLevels - i - 1) * pgLevelBits), + padTo( + r_req + .addr((((pgLevels - i - 1) * pgLevelBits).min(vpnBits)) - 1, 0), + ((pgLevels - i - 1) * pgLevelBits) + ) + ) + ) :+ pte.ppn + ) + makePTE(s1_ppns(count), pte) + } + ) + aux_ppn_hi.foreach { _ := 0.U } + stage2 := true.B + } + } // leaving gated-clock domain + + /** Relace PTE.ppn with ppn */ + private def makePTE(ppn: UInt, default: PTE) = { + val pte = WireDefault(default) + pte.ppn := ppn + pte + } + + /** use hgatp and vpn to construct a new ppn */ + private def makeHypervisorRootPTE(hgatp: PTBR, vpn: UInt, default: PTE) = { + val count = pgLevels.U - minPgLevels.U - additionalPgLevels(hgatp) + val idxs = VecInit((0 to pgLevels - minPgLevels).map(i => (vpn >> (pgLevels - i) * pgLevelBits))) + val lsbs = WireDefault(UInt(maxHypervisorExtraAddrBits.W), idxs(count)) + val pte = WireDefault(default) + pte.ppn := Cat(hgatp.ppn >> maxHypervisorExtraAddrBits, lsbs) + pte + } + +} diff --git a/rocketv/src/PipelinedMultiplier.scala b/rocketv/src/PipelinedMultiplier.scala new file mode 100644 index 000000000..c4e03237b --- /dev/null +++ b/rocketv/src/PipelinedMultiplier.scala @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util._ + +object PipelinedMultiplierParameter { + implicit def rwP: upickle.default.ReadWriter[PipelinedMultiplierParameter] = + upickle.default.macroRW[PipelinedMultiplierParameter] +} + +case class PipelinedMultiplierParameter( + useAsyncReset: Boolean, + latency: Int, + xLen: Int) + extends SerializableModuleParameter { + + val nXpr: Int = 32 + val uopWidth: Int = 4 + + def FN_MUL = 0.U(4.W) + def FN_MULH = 1.U(4.W) + def FN_MULHSU = 2.U(4.W) + def FN_MULHU = 3.U(4.W) + + def DW_32 = false.B + def DW_64 = true.B +} +class PipelinedMultiplierInterface(parameter: PipelinedMultiplierParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + val req = Flipped(Valid(new MultiplierReq(parameter.xLen, log2Ceil(parameter.nXpr), parameter.uopWidth))) + val resp = Valid(new MultiplierResp(parameter.xLen, log2Ceil(parameter.nXpr))) +} + +@instantiable +class PipelinedMultiplier(val parameter: PipelinedMultiplierParameter) + extends FixedIORawModule(new PipelinedMultiplierInterface(parameter)) + with SerializableModule[PipelinedMultiplierParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + val width = parameter.xLen + val latency = parameter.latency + def N = BitPat.N() + def Y = BitPat.N() + def X = BitPat.dontCare(1) + def sextTo(x: UInt, n: Int): UInt = { + require(x.getWidth <= n) + if (x.getWidth == n) x + else Cat(Fill(n - x.getWidth, x(x.getWidth - 1)), x) + } + + val in = Pipe(io.req) + + val decode = List( + parameter.FN_MUL -> List(N, X, X), + parameter.FN_MULH -> List(Y, Y, Y), + parameter.FN_MULHU -> List(Y, N, N), + parameter.FN_MULHSU -> List(Y, Y, N) + ) + // TODO: move these decoding to Decoder. + val cmdHi :: lhsSigned :: rhsSigned :: Nil = + DecodeLogic(in.bits.fn, List(X, X, X), decode).map(_.asBool) + val cmdHalf = (width > 32).B && in.bits.dw === parameter.DW_32 + + val lhs = Cat(lhsSigned && in.bits.in1(width - 1), in.bits.in1).asSInt + val rhs = Cat(rhsSigned && in.bits.in2(width - 1), in.bits.in2).asSInt + val prod = lhs * rhs + val muxed = + Mux(cmdHi, prod(2 * width - 1, width), Mux(cmdHalf, sextTo(prod(width / 2 - 1, 0), width), prod(width - 1, 0))) + + val resp = Pipe(in, latency - 1) + io.resp.valid := resp.valid + io.resp.bits.tag := resp.bits.tag + io.resp.bits.data := Pipe(in.valid, muxed, latency - 1).bits + io.resp.bits.full_data := Pipe(in.valid, prod, latency - 1).bits.asUInt +} diff --git a/rocketv/src/PopCountAtLeast.scala b/rocketv/src/PopCountAtLeast.scala new file mode 100644 index 000000000..dc253e325 --- /dev/null +++ b/rocketv/src/PopCountAtLeast.scala @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.util.PopCount + +// TODO: upstream these utilities +object PopCountAtLeast { + private def two(x: UInt): (Bool, Bool) = x.getWidth match { + case 1 => (x.asBool, false.B) + case n => + val half = x.getWidth / 2 + val (leftOne, leftTwo) = two(x(half - 1, 0)) + val (rightOne, rightTwo) = two(x(x.getWidth - 1, half)) + (leftOne || rightOne, leftTwo || rightTwo || (leftOne && rightOne)) + } + def apply(x: UInt, n: Int): Bool = n match { + case 0 => true.B + case 1 => x.orR + case 2 => two(x)._2 + case 3 => PopCount(x) >= n.U + } +} diff --git a/rocketv/src/RVCExpander.scala b/rocketv/src/RVCExpander.scala new file mode 100644 index 000000000..b0e5b0795 --- /dev/null +++ b/rocketv/src/RVCExpander.scala @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util._ + +// TODO: add a clear documentation on this... +class RVCDecoder(x: UInt, xLen: Int, useAddiForMv: Boolean = false) { + def inst(bits: UInt, rd: UInt = x(11, 7), rs1: UInt = x(19, 15), rs2: UInt = x(24, 20), rs3: UInt = x(31, 27)) = { + val res = Wire(new ExpandedInstruction) + res.bits := bits + res.rd := rd + res.rs1 := rs1 + res.rs2 := rs2 + res.rs3 := rs3 + res + } + + def rs1p = Cat(1.U(2.W), x(9, 7)) + def rs2p = Cat(1.U(2.W), x(4, 2)) + def rs2 = x(6, 2) + def rd = x(11, 7) + def addi4spnImm = Cat(x(10, 7), x(12, 11), x(5), x(6), 0.U(2.W)) + def lwImm = Cat(x(5), x(12, 10), x(6), 0.U(2.W)) + def ldImm = Cat(x(6, 5), x(12, 10), 0.U(3.W)) + def lwspImm = Cat(x(3, 2), x(12), x(6, 4), 0.U(2.W)) + def ldspImm = Cat(x(4, 2), x(12), x(6, 5), 0.U(3.W)) + def swspImm = Cat(x(8, 7), x(12, 9), 0.U(2.W)) + def sdspImm = Cat(x(9, 7), x(12, 10), 0.U(3.W)) + def luiImm = Cat(Fill(15, x(12)), x(6, 2), 0.U(12.W)) + def addi16spImm = Cat(Fill(3, x(12)), x(4, 3), x(5), x(2), x(6), 0.U(4.W)) + def addiImm = Cat(Fill(7, x(12)), x(6, 2)) + def jImm = Cat(Fill(10, x(12)), x(8), x(10, 9), x(6), x(7), x(2), x(11), x(5, 3), 0.U(1.W)) + def bImm = Cat(Fill(5, x(12)), x(6, 5), x(2), x(11, 10), x(4, 3), 0.U(1.W)) + def shamt = Cat(x(12), x(6, 2)) + def x0 = 0.U(5.W) + def ra = 1.U(5.W) + def sp = 2.U(5.W) + + def q0 = { + def addi4spn = { + val opc = Mux(x(12, 5).orR, 0x13.U(7.W), 0x1f.U(7.W)) + inst(Cat(addi4spnImm, sp, 0.U(3.W), rs2p, opc), rs2p, sp, rs2p) + } + def ld = inst(Cat(ldImm, rs1p, 3.U(3.W), rs2p, 0x03.U(7.W)), rs2p, rs1p, rs2p) + def lw = inst(Cat(lwImm, rs1p, 2.U(3.W), rs2p, 0x03.U(7.W)), rs2p, rs1p, rs2p) + def fld = inst(Cat(ldImm, rs1p, 3.U(3.W), rs2p, 0x07.U(7.W)), rs2p, rs1p, rs2p) + def flw = { + if (xLen == 32) inst(Cat(lwImm, rs1p, 2.U(3.W), rs2p, 0x07.U(7.W)), rs2p, rs1p, rs2p) + else ld + } + def unimp = inst(Cat(lwImm >> 5, rs2p, rs1p, 2.U(3.W), lwImm(4, 0), 0x3f.U(7.W)), rs2p, rs1p, rs2p) + def sd = inst(Cat(ldImm >> 5, rs2p, rs1p, 3.U(3.W), ldImm(4, 0), 0x23.U(7.W)), rs2p, rs1p, rs2p) + def sw = inst(Cat(lwImm >> 5, rs2p, rs1p, 2.U(3.W), lwImm(4, 0), 0x23.U(7.W)), rs2p, rs1p, rs2p) + def fsd = inst(Cat(ldImm >> 5, rs2p, rs1p, 3.U(3.W), ldImm(4, 0), 0x27.U(7.W)), rs2p, rs1p, rs2p) + def fsw = { + if (xLen == 32) inst(Cat(lwImm >> 5, rs2p, rs1p, 2.U(3.W), lwImm(4, 0), 0x27.U(7.W)), rs2p, rs1p, rs2p) + else sd + } + Seq(addi4spn, fld, lw, flw, unimp, fsd, sw, fsw) + } + + def q1 = { + def addi = inst(Cat(addiImm, rd, 0.U(3.W), rd, 0x13.U(7.W)), rd, rd, rs2p) + def addiw = { + val opc = Mux(rd.orR, 0x1b.U(7.W), 0x1f.U(7.W)) + inst(Cat(addiImm, rd, 0.U(3.W), rd, opc), rd, rd, rs2p) + } + def jal = { + if (xLen == 32) inst(Cat(jImm(20), jImm(10, 1), jImm(11), jImm(19, 12), ra, 0x6f.U(7.W)), ra, rd, rs2p) + else addiw + } + def li = inst(Cat(addiImm, x0, 0.U(3.W), rd, 0x13.U(7.W)), rd, x0, rs2p) + def addi16sp = { + val opc = Mux(addiImm.orR, 0x13.U(7.W), 0x1f.U(7.W)) + inst(Cat(addi16spImm, rd, 0.U(3.W), rd, opc), rd, rd, rs2p) + } + def lui = { + val opc = Mux(addiImm.orR, 0x37.U(7.W), 0x3f.U(7.W)) + val me = inst(Cat(luiImm(31, 12), rd, opc), rd, rd, rs2p) + Mux(rd === x0 || rd === sp, addi16sp, me) + } + def j = inst(Cat(jImm(20), jImm(10, 1), jImm(11), jImm(19, 12), x0, 0x6f.U(7.W)), x0, rs1p, rs2p) + def beqz = inst(Cat(bImm(12), bImm(10, 5), x0, rs1p, 0.U(3.W), bImm(4, 1), bImm(11), 0x63.U(7.W)), rs1p, rs1p, x0) + def bnez = inst(Cat(bImm(12), bImm(10, 5), x0, rs1p, 1.U(3.W), bImm(4, 1), bImm(11), 0x63.U(7.W)), x0, rs1p, x0) + def arith = { + def srli = Cat(shamt, rs1p, 5.U(3.W), rs1p, 0x13.U(7.W)) + def srai = srli | (1 << 30).U + def andi = Cat(addiImm, rs1p, 7.U(3.W), rs1p, 0x13.U(7.W)) + def rtype = { + val funct = VecInit(Seq(0.U, 4.U, 6.U, 7.U, 0.U, 0.U, 2.U, 3.U))(Cat(x(12), x(6, 5))) + val sub = Mux(x(6, 5) === 0.U, (1 << 30).U, 0.U) + val opc = Mux(x(12), 0x3b.U(7.W), 0x33.U(7.W)) + Cat(rs2p, rs1p, funct, rs1p, opc) | sub + } + inst(VecInit(Seq(srli, srai, andi, rtype))(x(11, 10)), rs1p, rs1p, rs2p) + } + Seq(addi, jal, li, lui, arith, j, beqz, bnez) + } + + def q2 = { + val load_opc = Mux(rd.orR, 0x03.U(7.W), 0x1f.U(7.W)) + def slli = inst(Cat(shamt, rd, 1.U(3.W), rd, 0x13.U(7.W)), rd, rd, rs2) + def ldsp = inst(Cat(ldspImm, sp, 3.U(3.W), rd, load_opc), rd, sp, rs2) + def lwsp = inst(Cat(lwspImm, sp, 2.U(3.W), rd, load_opc), rd, sp, rs2) + def fldsp = inst(Cat(ldspImm, sp, 3.U(3.W), rd, 0x07.U(7.W)), rd, sp, rs2) + def flwsp = { + if (xLen == 32) inst(Cat(lwspImm, sp, 2.U(3.W), rd, 0x07.U(7.W)), rd, sp, rs2) + else ldsp + } + def sdsp = inst(Cat(sdspImm >> 5, rs2, sp, 3.U(3.W), sdspImm(4, 0), 0x23.U(7.W)), rd, sp, rs2) + def swsp = inst(Cat(swspImm >> 5, rs2, sp, 2.U(3.W), swspImm(4, 0), 0x23.U(7.W)), rd, sp, rs2) + def fsdsp = inst(Cat(sdspImm >> 5, rs2, sp, 3.U(3.W), sdspImm(4, 0), 0x27.U(7.W)), rd, sp, rs2) + def fswsp = { + if (xLen == 32) inst(Cat(swspImm >> 5, rs2, sp, 2.U(3.W), swspImm(4, 0), 0x27.U(7.W)), rd, sp, rs2) + else sdsp + } + def jalr = { + val mv = { + if (useAddiForMv) inst(Cat(rs2, 0.U(3.W), rd, 0x13.U(7.W)), rd, rs2, x0) + else inst(Cat(rs2, x0, 0.U(3.W), rd, 0x33.U(7.W)), rd, x0, rs2) + } + val add = inst(Cat(rs2, rd, 0.U(3.W), rd, 0x33.U(7.W)), rd, rd, rs2) + val jr = Cat(rs2, rd, 0.U(3.W), x0, 0x67.U(7.W)) + val reserved = Cat(jr >> 7, 0x1f.U(7.W)) + val jr_reserved = inst(Mux(rd.orR, jr, reserved), x0, rd, rs2) + val jr_mv = Mux(rs2.orR, mv, jr_reserved) + val jalr = Cat(rs2, rd, 0.U(3.W), ra, 0x67.U(7.W)) + val ebreak = Cat(jr >> 7, 0x73.U(7.W)) | (1 << 20).U + val jalr_ebreak = inst(Mux(rd.orR, jalr, ebreak), ra, rd, rs2) + val jalr_add = Mux(rs2.orR, add, jalr_ebreak) + Mux(x(12), jalr_add, jr_mv) + } + Seq(slli, fldsp, lwsp, flwsp, jalr, fsdsp, swsp, fswsp) + } + + def q3 = Seq.fill(8)(passthrough) + + def passthrough = inst(x) + + def decode = { + val s = q0 ++ q1 ++ q2 ++ q3 + VecInit(s)(Cat(x(1, 0), x(15, 13))) + } +} + +object RVCExpanderParameter { + implicit def rwP: upickle.default.ReadWriter[RVCExpanderParameter] = upickle.default.macroRW[RVCExpanderParameter] +} + +case class RVCExpanderParameter( + xLen: Int, + usingCompressed: Boolean) + extends SerializableModuleParameter { + val useAddiForMv: Boolean = false +} + +class RVCExpanderInterface(parameter: RVCExpanderParameter) extends Bundle { + val in = Input(UInt(32.W)) + val out = Output(new ExpandedInstruction) + val rvc = Output(Bool()) +} + +@instantiable +class RVCExpander(val parameter: RVCExpanderParameter) + extends FixedIORawModule(new RVCExpanderInterface(parameter)) + with SerializableModule[RVCExpanderParameter] { + val usingCompressed = parameter.usingCompressed + val useAddiForMv = parameter.useAddiForMv + val xLen = parameter.xLen + if (usingCompressed) { + io.rvc := io.in(1, 0) =/= 3.U + io.out := new RVCDecoder(io.in, xLen, useAddiForMv).decode + } else { + io.rvc := false.B + io.out := new RVCDecoder(io.in, xLen, useAddiForMv).passthrough + } +} diff --git a/rocketv/src/RVDecoderDB.scala b/rocketv/src/RVDecoderDB.scala new file mode 100644 index 000000000..8b0d3387f --- /dev/null +++ b/rocketv/src/RVDecoderDB.scala @@ -0,0 +1,949 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +// The compatibility layer to bridge rvdecoderdb and codegen instructions. +// In the future, this file is going to be removed. +object rvdecoderdbcompat { + val rvdecoderdbPath = org.chipsalliance.rvdecoderdb.extractResource(getClass.getClassLoader) + + val causes = org.chipsalliance.rvdecoderdb.causes(rvdecoderdbPath) + object Causes { + val misaligned_fetch = causes("misaligned fetch") + val fetch_access = causes("fetch access") + val illegal_instruction = causes("illegal instruction") + val breakpoint = causes("breakpoint") + val misaligned_load = causes("misaligned load") + val load_access = causes("load access") + val misaligned_store = causes("misaligned store") + val store_access = causes("store access") + val user_ecall = causes("user ecall") + val supervisor_ecall = causes("supervisor ecall") + val virtual_supervisor_ecall = causes("virtual supervisor ecall") + val machine_ecall = causes("machine ecall") + val fetch_page_fault = causes("fetch page fault") + val load_page_fault = causes("load page fault") + val store_page_fault = causes("store page fault") + val fetch_guest_page_fault = causes("fetch guest page fault") + val load_guest_page_fault = causes("load guest page fault") + val virtual_instruction = causes("virtual instruction") + val store_guest_page_fault = causes("store guest page fault") + val all = { + val res = collection.mutable.ArrayBuffer[Int]() + res += misaligned_fetch + res += fetch_access + res += illegal_instruction + res += breakpoint + res += misaligned_load + res += load_access + res += misaligned_store + res += store_access + res += user_ecall + res += supervisor_ecall + res += virtual_supervisor_ecall + res += machine_ecall + res += fetch_page_fault + res += load_page_fault + res += store_page_fault + res += fetch_guest_page_fault + res += load_guest_page_fault + res += virtual_instruction + res += store_guest_page_fault + res.toArray + } + } + + val csrs = org.chipsalliance.rvdecoderdb.csrs(rvdecoderdbPath).toMap + val csrs32 = org.chipsalliance.rvdecoderdb.csrs(rvdecoderdbPath).toMap + object CSRs { + val fflags = csrs("fflags") + val frm = csrs("frm") + val fcsr = csrs("fcsr") + val vstart = csrs("vstart") + val vxsat = csrs("vxsat") + val vxrm = csrs("vxrm") + val vcsr = csrs("vcsr") + val seed = csrs("seed") + val jvt = csrs("jvt") + val cycle = csrs("cycle") + val time = csrs("time") + val instret = csrs("instret") + val hpmcounter3 = csrs("hpmcounter3") + val hpmcounter4 = csrs("hpmcounter4") + val hpmcounter5 = csrs("hpmcounter5") + val hpmcounter6 = csrs("hpmcounter6") + val hpmcounter7 = csrs("hpmcounter7") + val hpmcounter8 = csrs("hpmcounter8") + val hpmcounter9 = csrs("hpmcounter9") + val hpmcounter10 = csrs("hpmcounter10") + val hpmcounter11 = csrs("hpmcounter11") + val hpmcounter12 = csrs("hpmcounter12") + val hpmcounter13 = csrs("hpmcounter13") + val hpmcounter14 = csrs("hpmcounter14") + val hpmcounter15 = csrs("hpmcounter15") + val hpmcounter16 = csrs("hpmcounter16") + val hpmcounter17 = csrs("hpmcounter17") + val hpmcounter18 = csrs("hpmcounter18") + val hpmcounter19 = csrs("hpmcounter19") + val hpmcounter20 = csrs("hpmcounter20") + val hpmcounter21 = csrs("hpmcounter21") + val hpmcounter22 = csrs("hpmcounter22") + val hpmcounter23 = csrs("hpmcounter23") + val hpmcounter24 = csrs("hpmcounter24") + val hpmcounter25 = csrs("hpmcounter25") + val hpmcounter26 = csrs("hpmcounter26") + val hpmcounter27 = csrs("hpmcounter27") + val hpmcounter28 = csrs("hpmcounter28") + val hpmcounter29 = csrs("hpmcounter29") + val hpmcounter30 = csrs("hpmcounter30") + val hpmcounter31 = csrs("hpmcounter31") + val vl = csrs("vl") + val vtype = csrs("vtype") + val vlenb = csrs("vlenb") + val sstatus = csrs("sstatus") + val sedeleg = csrs("sedeleg") + val sideleg = csrs("sideleg") + val sie = csrs("sie") + val stvec = csrs("stvec") + val scounteren = csrs("scounteren") + val senvcfg = csrs("senvcfg") + val sstateen0 = csrs("sstateen0") + val sstateen1 = csrs("sstateen1") + val sstateen2 = csrs("sstateen2") + val sstateen3 = csrs("sstateen3") + val sscratch = csrs("sscratch") + val sepc = csrs("sepc") + val scause = csrs("scause") + val stval = csrs("stval") + val sip = csrs("sip") + val stimecmp = csrs("stimecmp") + val siselect = csrs("siselect") + val sireg = csrs("sireg") + val stopei = csrs("stopei") + val satp = csrs("satp") + val scontext = csrs("scontext") + val vsstatus = csrs("vsstatus") + val vsie = csrs("vsie") + val vstvec = csrs("vstvec") + val vsscratch = csrs("vsscratch") + val vsepc = csrs("vsepc") + val vscause = csrs("vscause") + val vstval = csrs("vstval") + val vsip = csrs("vsip") + val vstimecmp = csrs("vstimecmp") + val vsiselect = csrs("vsiselect") + val vsireg = csrs("vsireg") + val vstopei = csrs("vstopei") + val vsatp = csrs("vsatp") + val hstatus = csrs("hstatus") + val hedeleg = csrs("hedeleg") + val hideleg = csrs("hideleg") + val hie = csrs("hie") + val htimedelta = csrs("htimedelta") + val hcounteren = csrs("hcounteren") + val hgeie = csrs("hgeie") + val hvien = csrs("hvien") + val hvictl = csrs("hvictl") + val henvcfg = csrs("henvcfg") + val hstateen0 = csrs("hstateen0") + val hstateen1 = csrs("hstateen1") + val hstateen2 = csrs("hstateen2") + val hstateen3 = csrs("hstateen3") + val htval = csrs("htval") + val hip = csrs("hip") + val hvip = csrs("hvip") + val hviprio1 = csrs("hviprio1") + val hviprio2 = csrs("hviprio2") + val htinst = csrs("htinst") + val hgatp = csrs("hgatp") + val hcontext = csrs("hcontext") + val hgeip = csrs("hgeip") + val vstopi = csrs("vstopi") + val scountovf = csrs("scountovf") + val stopi = csrs("stopi") + val utvt = csrs("utvt") + val unxti = csrs("unxti") + val uintstatus = csrs("uintstatus") + val uscratchcsw = csrs("uscratchcsw") + val uscratchcswl = csrs("uscratchcswl") + val stvt = csrs("stvt") + val snxti = csrs("snxti") + val sintstatus = csrs("sintstatus") + val sscratchcsw = csrs("sscratchcsw") + val sscratchcswl = csrs("sscratchcswl") + val mtvt = csrs("mtvt") + val mnxti = csrs("mnxti") + val mintstatus = csrs("mintstatus") + val mscratchcsw = csrs("mscratchcsw") + val mscratchcswl = csrs("mscratchcswl") + val mstatus = csrs("mstatus") + val misa = csrs("misa") + val medeleg = csrs("medeleg") + val mideleg = csrs("mideleg") + val mie = csrs("mie") + val mtvec = csrs("mtvec") + val mcounteren = csrs("mcounteren") + val mvien = csrs("mvien") + val mvip = csrs("mvip") + val menvcfg = csrs("menvcfg") + val mstateen0 = csrs("mstateen0") + val mstateen1 = csrs("mstateen1") + val mstateen2 = csrs("mstateen2") + val mstateen3 = csrs("mstateen3") + val mcountinhibit = csrs("mcountinhibit") + val mscratch = csrs("mscratch") + val mepc = csrs("mepc") + val mcause = csrs("mcause") + val mtval = csrs("mtval") + val mip = csrs("mip") + val mtinst = csrs("mtinst") + val mtval2 = csrs("mtval2") + val miselect = csrs("miselect") + val mireg = csrs("mireg") + val mtopei = csrs("mtopei") + val pmpcfg0 = csrs("pmpcfg0") + val pmpcfg1 = csrs("pmpcfg1") + val pmpcfg2 = csrs("pmpcfg2") + val pmpcfg3 = csrs("pmpcfg3") + val pmpcfg4 = csrs("pmpcfg4") + val pmpcfg5 = csrs("pmpcfg5") + val pmpcfg6 = csrs("pmpcfg6") + val pmpcfg7 = csrs("pmpcfg7") + val pmpcfg8 = csrs("pmpcfg8") + val pmpcfg9 = csrs("pmpcfg9") + val pmpcfg10 = csrs("pmpcfg10") + val pmpcfg11 = csrs("pmpcfg11") + val pmpcfg12 = csrs("pmpcfg12") + val pmpcfg13 = csrs("pmpcfg13") + val pmpcfg14 = csrs("pmpcfg14") + val pmpcfg15 = csrs("pmpcfg15") + val pmpaddr0 = csrs("pmpaddr0") + val pmpaddr1 = csrs("pmpaddr1") + val pmpaddr2 = csrs("pmpaddr2") + val pmpaddr3 = csrs("pmpaddr3") + val pmpaddr4 = csrs("pmpaddr4") + val pmpaddr5 = csrs("pmpaddr5") + val pmpaddr6 = csrs("pmpaddr6") + val pmpaddr7 = csrs("pmpaddr7") + val pmpaddr8 = csrs("pmpaddr8") + val pmpaddr9 = csrs("pmpaddr9") + val pmpaddr10 = csrs("pmpaddr10") + val pmpaddr11 = csrs("pmpaddr11") + val pmpaddr12 = csrs("pmpaddr12") + val pmpaddr13 = csrs("pmpaddr13") + val pmpaddr14 = csrs("pmpaddr14") + val pmpaddr15 = csrs("pmpaddr15") + val pmpaddr16 = csrs("pmpaddr16") + val pmpaddr17 = csrs("pmpaddr17") + val pmpaddr18 = csrs("pmpaddr18") + val pmpaddr19 = csrs("pmpaddr19") + val pmpaddr20 = csrs("pmpaddr20") + val pmpaddr21 = csrs("pmpaddr21") + val pmpaddr22 = csrs("pmpaddr22") + val pmpaddr23 = csrs("pmpaddr23") + val pmpaddr24 = csrs("pmpaddr24") + val pmpaddr25 = csrs("pmpaddr25") + val pmpaddr26 = csrs("pmpaddr26") + val pmpaddr27 = csrs("pmpaddr27") + val pmpaddr28 = csrs("pmpaddr28") + val pmpaddr29 = csrs("pmpaddr29") + val pmpaddr30 = csrs("pmpaddr30") + val pmpaddr31 = csrs("pmpaddr31") + val pmpaddr32 = csrs("pmpaddr32") + val pmpaddr33 = csrs("pmpaddr33") + val pmpaddr34 = csrs("pmpaddr34") + val pmpaddr35 = csrs("pmpaddr35") + val pmpaddr36 = csrs("pmpaddr36") + val pmpaddr37 = csrs("pmpaddr37") + val pmpaddr38 = csrs("pmpaddr38") + val pmpaddr39 = csrs("pmpaddr39") + val pmpaddr40 = csrs("pmpaddr40") + val pmpaddr41 = csrs("pmpaddr41") + val pmpaddr42 = csrs("pmpaddr42") + val pmpaddr43 = csrs("pmpaddr43") + val pmpaddr44 = csrs("pmpaddr44") + val pmpaddr45 = csrs("pmpaddr45") + val pmpaddr46 = csrs("pmpaddr46") + val pmpaddr47 = csrs("pmpaddr47") + val pmpaddr48 = csrs("pmpaddr48") + val pmpaddr49 = csrs("pmpaddr49") + val pmpaddr50 = csrs("pmpaddr50") + val pmpaddr51 = csrs("pmpaddr51") + val pmpaddr52 = csrs("pmpaddr52") + val pmpaddr53 = csrs("pmpaddr53") + val pmpaddr54 = csrs("pmpaddr54") + val pmpaddr55 = csrs("pmpaddr55") + val pmpaddr56 = csrs("pmpaddr56") + val pmpaddr57 = csrs("pmpaddr57") + val pmpaddr58 = csrs("pmpaddr58") + val pmpaddr59 = csrs("pmpaddr59") + val pmpaddr60 = csrs("pmpaddr60") + val pmpaddr61 = csrs("pmpaddr61") + val pmpaddr62 = csrs("pmpaddr62") + val pmpaddr63 = csrs("pmpaddr63") + val mseccfg = csrs("mseccfg") + val tselect = csrs("tselect") + val tdata1 = csrs("tdata1") + val tdata2 = csrs("tdata2") + val tdata3 = csrs("tdata3") + val tinfo = csrs("tinfo") + val tcontrol = csrs("tcontrol") + val mcontext = csrs("mcontext") + val mscontext = csrs("mscontext") + val dcsr = csrs("dcsr") + val dpc = csrs("dpc") + val dscratch0 = csrs("dscratch0") + val dscratch1 = csrs("dscratch1") + val mcycle = csrs("mcycle") + val minstret = csrs("minstret") + val mhpmcounter3 = csrs("mhpmcounter3") + val mhpmcounter4 = csrs("mhpmcounter4") + val mhpmcounter5 = csrs("mhpmcounter5") + val mhpmcounter6 = csrs("mhpmcounter6") + val mhpmcounter7 = csrs("mhpmcounter7") + val mhpmcounter8 = csrs("mhpmcounter8") + val mhpmcounter9 = csrs("mhpmcounter9") + val mhpmcounter10 = csrs("mhpmcounter10") + val mhpmcounter11 = csrs("mhpmcounter11") + val mhpmcounter12 = csrs("mhpmcounter12") + val mhpmcounter13 = csrs("mhpmcounter13") + val mhpmcounter14 = csrs("mhpmcounter14") + val mhpmcounter15 = csrs("mhpmcounter15") + val mhpmcounter16 = csrs("mhpmcounter16") + val mhpmcounter17 = csrs("mhpmcounter17") + val mhpmcounter18 = csrs("mhpmcounter18") + val mhpmcounter19 = csrs("mhpmcounter19") + val mhpmcounter20 = csrs("mhpmcounter20") + val mhpmcounter21 = csrs("mhpmcounter21") + val mhpmcounter22 = csrs("mhpmcounter22") + val mhpmcounter23 = csrs("mhpmcounter23") + val mhpmcounter24 = csrs("mhpmcounter24") + val mhpmcounter25 = csrs("mhpmcounter25") + val mhpmcounter26 = csrs("mhpmcounter26") + val mhpmcounter27 = csrs("mhpmcounter27") + val mhpmcounter28 = csrs("mhpmcounter28") + val mhpmcounter29 = csrs("mhpmcounter29") + val mhpmcounter30 = csrs("mhpmcounter30") + val mhpmcounter31 = csrs("mhpmcounter31") + val mhpmevent3 = csrs("mhpmevent3") + val mhpmevent4 = csrs("mhpmevent4") + val mhpmevent5 = csrs("mhpmevent5") + val mhpmevent6 = csrs("mhpmevent6") + val mhpmevent7 = csrs("mhpmevent7") + val mhpmevent8 = csrs("mhpmevent8") + val mhpmevent9 = csrs("mhpmevent9") + val mhpmevent10 = csrs("mhpmevent10") + val mhpmevent11 = csrs("mhpmevent11") + val mhpmevent12 = csrs("mhpmevent12") + val mhpmevent13 = csrs("mhpmevent13") + val mhpmevent14 = csrs("mhpmevent14") + val mhpmevent15 = csrs("mhpmevent15") + val mhpmevent16 = csrs("mhpmevent16") + val mhpmevent17 = csrs("mhpmevent17") + val mhpmevent18 = csrs("mhpmevent18") + val mhpmevent19 = csrs("mhpmevent19") + val mhpmevent20 = csrs("mhpmevent20") + val mhpmevent21 = csrs("mhpmevent21") + val mhpmevent22 = csrs("mhpmevent22") + val mhpmevent23 = csrs("mhpmevent23") + val mhpmevent24 = csrs("mhpmevent24") + val mhpmevent25 = csrs("mhpmevent25") + val mhpmevent26 = csrs("mhpmevent26") + val mhpmevent27 = csrs("mhpmevent27") + val mhpmevent28 = csrs("mhpmevent28") + val mhpmevent29 = csrs("mhpmevent29") + val mhpmevent30 = csrs("mhpmevent30") + val mhpmevent31 = csrs("mhpmevent31") + val mvendorid = csrs("mvendorid") + val marchid = csrs("marchid") + val mimpid = csrs("mimpid") + val mhartid = csrs("mhartid") + val mconfigptr = csrs("mconfigptr") + val mtopi = csrs("mtopi") + + val sieh = csrs32("sieh") + val siph = csrs32("siph") + val stimecmph = csrs32("stimecmph") + val vsieh = csrs32("vsieh") + val vsiph = csrs32("vsiph") + val vstimecmph = csrs32("vstimecmph") + val htimedeltah = csrs32("htimedeltah") + val hidelegh = csrs32("hidelegh") + val hvienh = csrs32("hvienh") + val henvcfgh = csrs32("henvcfgh") + val hviph = csrs32("hviph") + val hviprio1h = csrs32("hviprio1h") + val hviprio2h = csrs32("hviprio2h") + val hstateen0h = csrs32("hstateen0h") + val hstateen1h = csrs32("hstateen1h") + val hstateen2h = csrs32("hstateen2h") + val hstateen3h = csrs32("hstateen3h") + val cycleh = csrs32("cycleh") + val timeh = csrs32("timeh") + val instreth = csrs32("instreth") + val hpmcounter3h = csrs32("hpmcounter3h") + val hpmcounter4h = csrs32("hpmcounter4h") + val hpmcounter5h = csrs32("hpmcounter5h") + val hpmcounter6h = csrs32("hpmcounter6h") + val hpmcounter7h = csrs32("hpmcounter7h") + val hpmcounter8h = csrs32("hpmcounter8h") + val hpmcounter9h = csrs32("hpmcounter9h") + val hpmcounter10h = csrs32("hpmcounter10h") + val hpmcounter11h = csrs32("hpmcounter11h") + val hpmcounter12h = csrs32("hpmcounter12h") + val hpmcounter13h = csrs32("hpmcounter13h") + val hpmcounter14h = csrs32("hpmcounter14h") + val hpmcounter15h = csrs32("hpmcounter15h") + val hpmcounter16h = csrs32("hpmcounter16h") + val hpmcounter17h = csrs32("hpmcounter17h") + val hpmcounter18h = csrs32("hpmcounter18h") + val hpmcounter19h = csrs32("hpmcounter19h") + val hpmcounter20h = csrs32("hpmcounter20h") + val hpmcounter21h = csrs32("hpmcounter21h") + val hpmcounter22h = csrs32("hpmcounter22h") + val hpmcounter23h = csrs32("hpmcounter23h") + val hpmcounter24h = csrs32("hpmcounter24h") + val hpmcounter25h = csrs32("hpmcounter25h") + val hpmcounter26h = csrs32("hpmcounter26h") + val hpmcounter27h = csrs32("hpmcounter27h") + val hpmcounter28h = csrs32("hpmcounter28h") + val hpmcounter29h = csrs32("hpmcounter29h") + val hpmcounter30h = csrs32("hpmcounter30h") + val hpmcounter31h = csrs32("hpmcounter31h") + val mstatush = csrs32("mstatush") + val midelegh = csrs32("midelegh") + val mieh = csrs32("mieh") + val mvienh = csrs32("mvienh") + val mviph = csrs32("mviph") + val menvcfgh = csrs32("menvcfgh") + val mstateen0h = csrs32("mstateen0h") + val mstateen1h = csrs32("mstateen1h") + val mstateen2h = csrs32("mstateen2h") + val mstateen3h = csrs32("mstateen3h") + val miph = csrs32("miph") + val mhpmevent3h = csrs32("mhpmevent3h") + val mhpmevent4h = csrs32("mhpmevent4h") + val mhpmevent5h = csrs32("mhpmevent5h") + val mhpmevent6h = csrs32("mhpmevent6h") + val mhpmevent7h = csrs32("mhpmevent7h") + val mhpmevent8h = csrs32("mhpmevent8h") + val mhpmevent9h = csrs32("mhpmevent9h") + val mhpmevent10h = csrs32("mhpmevent10h") + val mhpmevent11h = csrs32("mhpmevent11h") + val mhpmevent12h = csrs32("mhpmevent12h") + val mhpmevent13h = csrs32("mhpmevent13h") + val mhpmevent14h = csrs32("mhpmevent14h") + val mhpmevent15h = csrs32("mhpmevent15h") + val mhpmevent16h = csrs32("mhpmevent16h") + val mhpmevent17h = csrs32("mhpmevent17h") + val mhpmevent18h = csrs32("mhpmevent18h") + val mhpmevent19h = csrs32("mhpmevent19h") + val mhpmevent20h = csrs32("mhpmevent20h") + val mhpmevent21h = csrs32("mhpmevent21h") + val mhpmevent22h = csrs32("mhpmevent22h") + val mhpmevent23h = csrs32("mhpmevent23h") + val mhpmevent24h = csrs32("mhpmevent24h") + val mhpmevent25h = csrs32("mhpmevent25h") + val mhpmevent26h = csrs32("mhpmevent26h") + val mhpmevent27h = csrs32("mhpmevent27h") + val mhpmevent28h = csrs32("mhpmevent28h") + val mhpmevent29h = csrs32("mhpmevent29h") + val mhpmevent30h = csrs32("mhpmevent30h") + val mhpmevent31h = csrs32("mhpmevent31h") + val mnscratch = csrs32("mnscratch") + val mnepc = csrs32("mnepc") + val mncause = csrs32("mncause") + val mnstatus = csrs32("mnstatus") + val mseccfgh = csrs32("mseccfgh") + val mcycleh = csrs32("mcycleh") + val minstreth = csrs32("minstreth") + val mhpmcounter3h = csrs32("mhpmcounter3h") + val mhpmcounter4h = csrs32("mhpmcounter4h") + val mhpmcounter5h = csrs32("mhpmcounter5h") + val mhpmcounter6h = csrs32("mhpmcounter6h") + val mhpmcounter7h = csrs32("mhpmcounter7h") + val mhpmcounter8h = csrs32("mhpmcounter8h") + val mhpmcounter9h = csrs32("mhpmcounter9h") + val mhpmcounter10h = csrs32("mhpmcounter10h") + val mhpmcounter11h = csrs32("mhpmcounter11h") + val mhpmcounter12h = csrs32("mhpmcounter12h") + val mhpmcounter13h = csrs32("mhpmcounter13h") + val mhpmcounter14h = csrs32("mhpmcounter14h") + val mhpmcounter15h = csrs32("mhpmcounter15h") + val mhpmcounter16h = csrs32("mhpmcounter16h") + val mhpmcounter17h = csrs32("mhpmcounter17h") + val mhpmcounter18h = csrs32("mhpmcounter18h") + val mhpmcounter19h = csrs32("mhpmcounter19h") + val mhpmcounter20h = csrs32("mhpmcounter20h") + val mhpmcounter21h = csrs32("mhpmcounter21h") + val mhpmcounter22h = csrs32("mhpmcounter22h") + val mhpmcounter23h = csrs32("mhpmcounter23h") + val mhpmcounter24h = csrs32("mhpmcounter24h") + val mhpmcounter25h = csrs32("mhpmcounter25h") + val mhpmcounter26h = csrs32("mhpmcounter26h") + val mhpmcounter27h = csrs32("mhpmcounter27h") + val mhpmcounter28h = csrs32("mhpmcounter28h") + val mhpmcounter29h = csrs32("mhpmcounter29h") + val mhpmcounter30h = csrs32("mhpmcounter30h") + val mhpmcounter31h = csrs32("mhpmcounter31h") + + val all = { + val res = collection.mutable.ArrayBuffer[Int]() + res += fflags + res += frm + res += fcsr + res += vstart + res += vxsat + res += vxrm + res += vcsr + res += seed + res += jvt + res += cycle + res += time + res += instret + res += hpmcounter3 + res += hpmcounter4 + res += hpmcounter5 + res += hpmcounter6 + res += hpmcounter7 + res += hpmcounter8 + res += hpmcounter9 + res += hpmcounter10 + res += hpmcounter11 + res += hpmcounter12 + res += hpmcounter13 + res += hpmcounter14 + res += hpmcounter15 + res += hpmcounter16 + res += hpmcounter17 + res += hpmcounter18 + res += hpmcounter19 + res += hpmcounter20 + res += hpmcounter21 + res += hpmcounter22 + res += hpmcounter23 + res += hpmcounter24 + res += hpmcounter25 + res += hpmcounter26 + res += hpmcounter27 + res += hpmcounter28 + res += hpmcounter29 + res += hpmcounter30 + res += hpmcounter31 + res += vl + res += vtype + res += vlenb + res += sstatus + res += sedeleg + res += sideleg + res += sie + res += stvec + res += scounteren + res += senvcfg + res += sstateen0 + res += sstateen1 + res += sstateen2 + res += sstateen3 + res += sscratch + res += sepc + res += scause + res += stval + res += sip + res += stimecmp + res += siselect + res += sireg + res += stopei + res += satp + res += scontext + res += vsstatus + res += vsie + res += vstvec + res += vsscratch + res += vsepc + res += vscause + res += vstval + res += vsip + res += vstimecmp + res += vsiselect + res += vsireg + res += vstopei + res += vsatp + res += hstatus + res += hedeleg + res += hideleg + res += hie + res += htimedelta + res += hcounteren + res += hgeie + res += hvien + res += hvictl + res += henvcfg + res += hstateen0 + res += hstateen1 + res += hstateen2 + res += hstateen3 + res += htval + res += hip + res += hvip + res += hviprio1 + res += hviprio2 + res += htinst + res += hgatp + res += hcontext + res += hgeip + res += vstopi + res += scountovf + res += stopi + res += utvt + res += unxti + res += uintstatus + res += uscratchcsw + res += uscratchcswl + res += stvt + res += snxti + res += sintstatus + res += sscratchcsw + res += sscratchcswl + res += mtvt + res += mnxti + res += mintstatus + res += mscratchcsw + res += mscratchcswl + res += mstatus + res += misa + res += medeleg + res += mideleg + res += mie + res += mtvec + res += mcounteren + res += mvien + res += mvip + res += menvcfg + res += mstateen0 + res += mstateen1 + res += mstateen2 + res += mstateen3 + res += mcountinhibit + res += mscratch + res += mepc + res += mcause + res += mtval + res += mip + res += mtinst + res += mtval2 + res += miselect + res += mireg + res += mtopei + res += pmpcfg0 + res += pmpcfg1 + res += pmpcfg2 + res += pmpcfg3 + res += pmpcfg4 + res += pmpcfg5 + res += pmpcfg6 + res += pmpcfg7 + res += pmpcfg8 + res += pmpcfg9 + res += pmpcfg10 + res += pmpcfg11 + res += pmpcfg12 + res += pmpcfg13 + res += pmpcfg14 + res += pmpcfg15 + res += pmpaddr0 + res += pmpaddr1 + res += pmpaddr2 + res += pmpaddr3 + res += pmpaddr4 + res += pmpaddr5 + res += pmpaddr6 + res += pmpaddr7 + res += pmpaddr8 + res += pmpaddr9 + res += pmpaddr10 + res += pmpaddr11 + res += pmpaddr12 + res += pmpaddr13 + res += pmpaddr14 + res += pmpaddr15 + res += pmpaddr16 + res += pmpaddr17 + res += pmpaddr18 + res += pmpaddr19 + res += pmpaddr20 + res += pmpaddr21 + res += pmpaddr22 + res += pmpaddr23 + res += pmpaddr24 + res += pmpaddr25 + res += pmpaddr26 + res += pmpaddr27 + res += pmpaddr28 + res += pmpaddr29 + res += pmpaddr30 + res += pmpaddr31 + res += pmpaddr32 + res += pmpaddr33 + res += pmpaddr34 + res += pmpaddr35 + res += pmpaddr36 + res += pmpaddr37 + res += pmpaddr38 + res += pmpaddr39 + res += pmpaddr40 + res += pmpaddr41 + res += pmpaddr42 + res += pmpaddr43 + res += pmpaddr44 + res += pmpaddr45 + res += pmpaddr46 + res += pmpaddr47 + res += pmpaddr48 + res += pmpaddr49 + res += pmpaddr50 + res += pmpaddr51 + res += pmpaddr52 + res += pmpaddr53 + res += pmpaddr54 + res += pmpaddr55 + res += pmpaddr56 + res += pmpaddr57 + res += pmpaddr58 + res += pmpaddr59 + res += pmpaddr60 + res += pmpaddr61 + res += pmpaddr62 + res += pmpaddr63 + res += mseccfg + res += tselect + res += tdata1 + res += tdata2 + res += tdata3 + res += tinfo + res += tcontrol + res += mcontext + res += mscontext + res += dcsr + res += dpc + res += dscratch0 + res += dscratch1 + res += mcycle + res += minstret + res += mhpmcounter3 + res += mhpmcounter4 + res += mhpmcounter5 + res += mhpmcounter6 + res += mhpmcounter7 + res += mhpmcounter8 + res += mhpmcounter9 + res += mhpmcounter10 + res += mhpmcounter11 + res += mhpmcounter12 + res += mhpmcounter13 + res += mhpmcounter14 + res += mhpmcounter15 + res += mhpmcounter16 + res += mhpmcounter17 + res += mhpmcounter18 + res += mhpmcounter19 + res += mhpmcounter20 + res += mhpmcounter21 + res += mhpmcounter22 + res += mhpmcounter23 + res += mhpmcounter24 + res += mhpmcounter25 + res += mhpmcounter26 + res += mhpmcounter27 + res += mhpmcounter28 + res += mhpmcounter29 + res += mhpmcounter30 + res += mhpmcounter31 + res += mhpmevent3 + res += mhpmevent4 + res += mhpmevent5 + res += mhpmevent6 + res += mhpmevent7 + res += mhpmevent8 + res += mhpmevent9 + res += mhpmevent10 + res += mhpmevent11 + res += mhpmevent12 + res += mhpmevent13 + res += mhpmevent14 + res += mhpmevent15 + res += mhpmevent16 + res += mhpmevent17 + res += mhpmevent18 + res += mhpmevent19 + res += mhpmevent20 + res += mhpmevent21 + res += mhpmevent22 + res += mhpmevent23 + res += mhpmevent24 + res += mhpmevent25 + res += mhpmevent26 + res += mhpmevent27 + res += mhpmevent28 + res += mhpmevent29 + res += mhpmevent30 + res += mhpmevent31 + res += mvendorid + res += marchid + res += mimpid + res += mhartid + res += mconfigptr + res += mtopi + res.toArray + } + val all32 = { + val res = collection.mutable.ArrayBuffer(all: _*) + res += sieh + res += siph + res += stimecmph + res += vsieh + res += vsiph + res += vstimecmph + res += htimedeltah + res += hidelegh + res += hvienh + res += henvcfgh + res += hviph + res += hviprio1h + res += hviprio2h + res += hstateen0h + res += hstateen1h + res += hstateen2h + res += hstateen3h + res += cycleh + res += timeh + res += instreth + res += hpmcounter3h + res += hpmcounter4h + res += hpmcounter5h + res += hpmcounter6h + res += hpmcounter7h + res += hpmcounter8h + res += hpmcounter9h + res += hpmcounter10h + res += hpmcounter11h + res += hpmcounter12h + res += hpmcounter13h + res += hpmcounter14h + res += hpmcounter15h + res += hpmcounter16h + res += hpmcounter17h + res += hpmcounter18h + res += hpmcounter19h + res += hpmcounter20h + res += hpmcounter21h + res += hpmcounter22h + res += hpmcounter23h + res += hpmcounter24h + res += hpmcounter25h + res += hpmcounter26h + res += hpmcounter27h + res += hpmcounter28h + res += hpmcounter29h + res += hpmcounter30h + res += hpmcounter31h + res += mstatush + res += midelegh + res += mieh + res += mvienh + res += mviph + res += menvcfgh + res += mstateen0h + res += mstateen1h + res += mstateen2h + res += mstateen3h + res += miph + res += mhpmevent3h + res += mhpmevent4h + res += mhpmevent5h + res += mhpmevent6h + res += mhpmevent7h + res += mhpmevent8h + res += mhpmevent9h + res += mhpmevent10h + res += mhpmevent11h + res += mhpmevent12h + res += mhpmevent13h + res += mhpmevent14h + res += mhpmevent15h + res += mhpmevent16h + res += mhpmevent17h + res += mhpmevent18h + res += mhpmevent19h + res += mhpmevent20h + res += mhpmevent21h + res += mhpmevent22h + res += mhpmevent23h + res += mhpmevent24h + res += mhpmevent25h + res += mhpmevent26h + res += mhpmevent27h + res += mhpmevent28h + res += mhpmevent29h + res += mhpmevent30h + res += mhpmevent31h + res += mnscratch + res += mnepc + res += mncause + res += mnstatus + res += mseccfgh + res += mcycleh + res += minstreth + res += mhpmcounter3h + res += mhpmcounter4h + res += mhpmcounter5h + res += mhpmcounter6h + res += mhpmcounter7h + res += mhpmcounter8h + res += mhpmcounter9h + res += mhpmcounter10h + res += mhpmcounter11h + res += mhpmcounter12h + res += mhpmcounter13h + res += mhpmcounter14h + res += mhpmcounter15h + res += mhpmcounter16h + res += mhpmcounter17h + res += mhpmcounter18h + res += mhpmcounter19h + res += mhpmcounter20h + res += mhpmcounter21h + res += mhpmcounter22h + res += mhpmcounter23h + res += mhpmcounter24h + res += mhpmcounter25h + res += mhpmcounter26h + res += mhpmcounter27h + res += mhpmcounter28h + res += mhpmcounter29h + res += mhpmcounter30h + res += mhpmcounter31h + res.toArray + } + } + + object CustomCSRs { + val mnscratch = 0x350 + val mnepc = 0x351 + val mncause = 0x352 + val mnstatus = 0x353 + val all = { + val res = collection.mutable.ArrayBuffer[Int]() + res += mnscratch + res += mnepc + res += mncause + res += mnstatus + res.toArray + } + val all32 = { + val res = collection.mutable.ArrayBuffer(all:_*) + res.toArray + } + } +} diff --git a/rocketv/src/Replacement.scala b/rocketv/src/Replacement.scala new file mode 100644 index 000000000..fabb2f331 --- /dev/null +++ b/rocketv/src/Replacement.scala @@ -0,0 +1,346 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu + +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.util._ +import chisel3.util.random.LFSR + +abstract class ReplacementPolicy { + def nBits: Int + def perSet: Boolean + def way: UInt + def miss: Unit + def hit: Unit + def access(touch_way: UInt): Unit + def access(touch_ways: Seq[Valid[UInt]]): Unit + def state_read: UInt + def get_next_state(state: UInt, touch_way: UInt): UInt + def get_next_state(state: UInt, touch_ways: Seq[Valid[UInt]]): UInt = { + touch_ways.foldLeft(state)((prev, touch_way) => Mux(touch_way.valid, get_next_state(prev, touch_way.bits), prev)) + } + def get_replace_way(state: UInt): UInt +} + +object Random +{ + def apply(mod: Int, random: UInt): UInt = { + if (isPow2(mod)) random(log2Ceil(mod)-1,0) + else PriorityEncoder(partition(apply(1 << log2Up(mod*8), random), mod)) + } + def apply(mod: Int): UInt = apply(mod, randomizer) + def oneHot(mod: Int, random: UInt): UInt = { + if (isPow2(mod)) UIntToOH(random(log2Up(mod)-1,0)) + else VecInit(PriorityEncoderOH(partition(apply(1 << log2Up(mod*8), random), mod))).asUInt + } + def oneHot(mod: Int): UInt = oneHot(mod, randomizer) + + private def randomizer = LFSR(16) + private def partition(value: UInt, slices: Int) = + Seq.tabulate(slices)(i => value < (((i + 1) << value.getWidth) / slices).U) +} + +object ReplacementPolicy { + def fromString(s: String, n_ways: Int): ReplacementPolicy = s.toLowerCase match { + case "random" => new RandomReplacement(n_ways) + case "lru" => new TrueLRU(n_ways) + case "plru" => new PseudoLRU(n_ways) + case t => throw new IllegalArgumentException(s"unknown Replacement Policy type $t") + } +} + +class RandomReplacement(n_ways: Int) extends ReplacementPolicy { + private val replace = Wire(Bool()) + replace := false.B + def nBits = 16 + def perSet = false + private val lfsr = LFSR(nBits, replace) + def state_read = WireDefault(lfsr) + + def way = Random(n_ways, lfsr) + def miss = replace := true.B + def hit = {} + def access(touch_way: UInt) = {} + def access(touch_ways: Seq[Valid[UInt]]) = {} + def get_next_state(state: UInt, touch_way: UInt) = 0.U //DontCare + def get_replace_way(state: UInt) = way +} + +abstract class SeqReplacementPolicy { + def access(set: UInt): Unit + def update(valid: Bool, hit: Bool, set: UInt, way: UInt): Unit + def way: UInt +} + +abstract class SetAssocReplacementPolicy { + def access(set: UInt, touch_way: UInt): Unit + def access(sets: Seq[UInt], touch_ways: Seq[Valid[UInt]]): Unit + def way(set: UInt): UInt +} + +class SeqRandom(n_ways: Int) extends SeqReplacementPolicy { + val logic = new RandomReplacement(n_ways) + def access(set: UInt) = { } + def update(valid: Bool, hit: Bool, set: UInt, way: UInt) = { + when (valid && !hit) { logic.miss } + } + def way = logic.way +} + +class TrueLRU(n_ways: Int) extends ReplacementPolicy { + // True LRU replacement policy, using a triangular matrix to track which sets are more recently used than others. + // The matrix is packed into a single UInt (or Bits). Example 4-way (6-bits): + // [5] - 3 more recent than 2 + // [4] - 3 more recent than 1 + // [3] - 2 more recent than 1 + // [2] - 3 more recent than 0 + // [1] - 2 more recent than 0 + // [0] - 1 more recent than 0 + def nBits = (n_ways * (n_ways-1)) / 2 + def perSet = true + private val state_reg = RegInit(0.U(nBits.W)) + def state_read = WireDefault(state_reg) + + private def extractMRUVec(state: UInt): Seq[UInt] = { + // Extract per-way information about which higher-indexed ways are more recently used + val moreRecentVec = Wire(Vec(n_ways-1, UInt(n_ways.W))) + var lsb = 0 + for (i <- 0 until n_ways-1) { + moreRecentVec(i) := Cat(state(lsb+n_ways-i-2,lsb), 0.U((i+1).W)) + lsb = lsb + (n_ways - i - 1) + } + moreRecentVec + } + + def get_next_state(state: UInt, touch_way: UInt): UInt = { + val nextState = Wire(Vec(n_ways-1, UInt(n_ways.W))) + val moreRecentVec = extractMRUVec(state) // reconstruct lower triangular matrix + val wayDec = UIntToOH(touch_way, n_ways) + + // Compute next value of triangular matrix + // set the touched way as more recent than every other way + nextState.zipWithIndex.foreach { case (e, i) => + e := Mux(i.U === touch_way, 0.U(n_ways.W), moreRecentVec(i) | wayDec) + } + + nextState.zipWithIndex.tail.foldLeft((nextState.head.apply(n_ways-1,1),0)) { case ((pe,pi),(ce,ci)) => (Cat(ce.apply(n_ways-1,ci+1), pe), ci) }._1 + } + + def access(touch_way: UInt): Unit = { + state_reg := get_next_state(state_reg, touch_way) + } + def access(touch_ways: Seq[Valid[UInt]]): Unit = { + when (VecInit(touch_ways.map(_.valid)).asUInt.orR) { + state_reg := get_next_state(state_reg, touch_ways) + } + // for (i <- 1 until touch_ways.size) { + // cover(PopCount(touch_ways.map(_.valid)) === i.U, s"LRU_UpdateCount$i", s"LRU Update $i simultaneous") + // } + } + + def get_replace_way(state: UInt): UInt = { + val moreRecentVec = extractMRUVec(state) // reconstruct lower triangular matrix + // For each way, determine if all other ways are more recent + val mruWayDec = (0 until n_ways).map { i => + val upperMoreRecent = (if (i == n_ways-1) true.B else moreRecentVec(i).apply(n_ways-1,i+1).andR) + val lowerMoreRecent = (if (i == 0) true.B else moreRecentVec.map(e => !e(i)).reduce(_ && _)) + upperMoreRecent && lowerMoreRecent + } + OHToUInt(mruWayDec) + } + + def way = get_replace_way(state_reg) + def miss = access(way) + def hit = {} + @deprecated("replace 'replace' with 'way' from abstract class ReplacementPolicy","Rocket Chip 2020.05") + def replace: UInt = way +} + +class PseudoLRU(n_ways: Int) extends ReplacementPolicy { + // Pseudo-LRU tree algorithm: https://en.wikipedia.org/wiki/Pseudo-LRU#Tree-PLRU + // + // + // - bits storage example for 4-way PLRU binary tree: + // bit[2]: ways 3+2 older than ways 1+0 + // / \ + // bit[1]: way 3 older than way 2 bit[0]: way 1 older than way 0 + // + // + // - bits storage example for 3-way PLRU binary tree: + // bit[1]: way 2 older than ways 1+0 + // \ + // bit[0]: way 1 older than way 0 + // + // + // - bits storage example for 8-way PLRU binary tree: + // bit[6]: ways 7-4 older than ways 3-0 + // / \ + // bit[5]: ways 7+6 > 5+4 bit[2]: ways 3+2 > 1+0 + // / \ / \ + // bit[4]: way 7>6 bit[3]: way 5>4 bit[1]: way 3>2 bit[0]: way 1>0 + + def nBits = n_ways - 1 + def perSet = true + private val state_reg = if (nBits == 0) Reg(UInt(0.W)) else RegInit(0.U(nBits.W)) + def state_read = WireDefault(state_reg) + + def access(touch_way: UInt): Unit = { + state_reg := get_next_state(state_reg, touch_way) + } + def access(touch_ways: Seq[Valid[UInt]]): Unit = { + when (VecInit(touch_ways.map(_.valid)).asUInt.orR) { + state_reg := get_next_state(state_reg, touch_ways) + } + // for (i <- 1 until touch_ways.size) { + // cover(PopCount(touch_ways.map(_.valid)) === i.U, s"PLRU_UpdateCount$i", s"PLRU Update $i simultaneous") + // } + } + + + /** @param state state_reg bits for this sub-tree + * @param touch_way touched way encoded value bits for this sub-tree + * @param tree_nways number of ways in this sub-tree + */ + def get_next_state(state: UInt, touch_way: UInt, tree_nways: Int): UInt = { + require(state.getWidth == (tree_nways-1), s"wrong state bits width ${state.getWidth} for $tree_nways ways") + require(touch_way.getWidth == (log2Ceil(tree_nways) max 1), s"wrong encoded way width ${touch_way.getWidth} for $tree_nways ways") + + if (tree_nways > 2) { + // we are at a branching node in the tree, so recurse + val right_nways: Int = 1 << (log2Ceil(tree_nways) - 1) // number of ways in the right sub-tree + val left_nways: Int = tree_nways - right_nways // number of ways in the left sub-tree + val set_left_older = !touch_way(log2Ceil(tree_nways)-1) + val left_subtree_state = if(tree_nways - 1 == right_nways) 0.U else state(tree_nways-3, right_nways-1) + val right_subtree_state = state(right_nways-2, 0) + + if (left_nways > 1) { + // we are at a branching node in the tree with both left and right sub-trees, so recurse both sub-trees + Cat(set_left_older, + Mux(set_left_older, + left_subtree_state, // if setting left sub-tree as older, do NOT recurse into left sub-tree + get_next_state(left_subtree_state, touch_way(log2Ceil(left_nways)-1,0), left_nways)), // recurse left if newer + Mux(set_left_older, + get_next_state(right_subtree_state, touch_way(log2Ceil(right_nways)-1,0), right_nways), // recurse right if newer + right_subtree_state)) // if setting right sub-tree as older, do NOT recurse into right sub-tree + } else { + // we are at a branching node in the tree with only a right sub-tree, so recurse only right sub-tree + Cat(set_left_older, + Mux(set_left_older, + get_next_state(right_subtree_state, touch_way(log2Ceil(right_nways)-1,0), right_nways), // recurse right if newer + right_subtree_state)) // if setting right sub-tree as older, do NOT recurse into right sub-tree + } + } else if (tree_nways == 2) { + // we are at a leaf node at the end of the tree, so set the single state bit opposite of the lsb of the touched way encoded value + !touch_way(0) + } else { // tree_nways <= 1 + // we are at an empty node in an empty tree for 1 way, so return single zero bit for Chisel (no zero-width wires) + 0.U(1.W) + } + } + + def get_next_state(state: UInt, touch_way: UInt): UInt = { + def padTo(x: UInt, n: Int): UInt = { + require(x.getWidth <= n) + if (x.getWidth == n) x + else Cat(0.U((n - x.getWidth).W), x) + } + + val touch_way_sized = if (touch_way.getWidth < log2Ceil(n_ways)) padTo(touch_way, log2Ceil(n_ways)) else touch_way(log2Ceil(n_ways)-1,0) + get_next_state(state, touch_way_sized, n_ways) + } + + + /** @param state state_reg bits for this sub-tree + * @param tree_nways number of ways in this sub-tree + */ + def get_replace_way(state: UInt, tree_nways: Int): UInt = { + require(state.getWidth == (tree_nways-1), s"wrong state bits width ${state.getWidth} for $tree_nways ways") + + // this algorithm recursively descends the binary tree, filling in the way-to-replace encoded value from msb to lsb + if (tree_nways > 2) { + // we are at a branching node in the tree, so recurse + val right_nways: Int = 1 << (log2Ceil(tree_nways) - 1) // number of ways in the right sub-tree + val left_nways: Int = tree_nways - right_nways // number of ways in the left sub-tree + val left_subtree_older = state(tree_nways-2) + val left_subtree_state = if(tree_nways - 1 == right_nways) 0.U else state(tree_nways-3, right_nways-1) + val right_subtree_state = state(right_nways-2, 0) + + if (left_nways > 1) { + // we are at a branching node in the tree with both left and right sub-trees, so recurse both sub-trees + Cat(left_subtree_older, // return the top state bit (current tree node) as msb of the way-to-replace encoded value + Mux(left_subtree_older, // if left sub-tree is older, recurse left, else recurse right + get_replace_way(left_subtree_state, left_nways), // recurse left + get_replace_way(right_subtree_state, right_nways))) // recurse right + } else { + // we are at a branching node in the tree with only a right sub-tree, so recurse only right sub-tree + Cat(left_subtree_older, // return the top state bit (current tree node) as msb of the way-to-replace encoded value + Mux(left_subtree_older, // if left sub-tree is older, return and do not recurse right + 0.U(1.W), + get_replace_way(right_subtree_state, right_nways))) // recurse right + } + } else if (tree_nways == 2) { + // we are at a leaf node at the end of the tree, so just return the single state bit as lsb of the way-to-replace encoded value + state(0) + } else { // tree_nways <= 1 + // we are at an empty node in an unbalanced tree for non-power-of-2 ways, so return single zero bit as lsb of the way-to-replace encoded value + 0.U(1.W) + } + } + + def get_replace_way(state: UInt): UInt = get_replace_way(state, n_ways) + + def way = get_replace_way(state_reg) + def miss = access(way) + def hit = {} +} + +class SeqPLRU(n_sets: Int, n_ways: Int) extends SeqReplacementPolicy { + val logic = new PseudoLRU(n_ways) + val state = SyncReadMem(n_sets, UInt(logic.nBits.W)) + val current_state = Wire(UInt(logic.nBits.W)) + val next_state = Wire(UInt(logic.nBits.W)) + val plru_way = logic.get_replace_way(current_state) + + def access(set: UInt) = { + current_state := state.read(set) + } + + def update(valid: Bool, hit: Bool, set: UInt, way: UInt) = { + val update_way = Mux(hit, way, plru_way) + next_state := logic.get_next_state(current_state, update_way) + when (valid) { state.write(set, next_state) } + } + + def way = plru_way +} + +class SetAssocLRU(n_sets: Int, n_ways: Int, policy: String) extends SetAssocReplacementPolicy { + val logic = policy.toLowerCase match { + case "plru" => new PseudoLRU(n_ways) + case "lru" => new TrueLRU(n_ways) + case t => throw new IllegalArgumentException(s"unknown Replacement Policy type $t") + } + val state_vec = + if (logic.nBits == 0) Reg(Vec(n_sets, UInt(logic.nBits.W))) // Work around elaboration error on following line + else RegInit(VecInit(Seq.fill(n_sets)(0.U(logic.nBits.W)))) + + def access(set: UInt, touch_way: UInt) = { + state_vec(set) := logic.get_next_state(state_vec(set), touch_way) + } + + def access(sets: Seq[UInt], touch_ways: Seq[Valid[UInt]]) = { + require(sets.size == touch_ways.size, "internal consistency check: should be same number of simultaneous updates for sets and touch_ways") + for (set <- 0 until n_sets) { + val set_touch_ways = (sets zip touch_ways).map { case (touch_set, touch_way) => + Pipe(touch_way.valid && (touch_set === set.U), touch_way.bits, 0)} + when (VecInit(set_touch_ways.map(_.valid)).asUInt.orR) { + state_vec(set) := logic.get_next_state(state_vec(set), set_touch_ways) + } + } + } + + def way(set: UInt) = logic.get_replace_way(state_vec(set)) +} diff --git a/rocketv/src/RocketCore.scala b/rocketv/src/RocketCore.scala new file mode 100644 index 000000000..911df4dbf --- /dev/null +++ b/rocketv/src/RocketCore.scala @@ -0,0 +1,1539 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.experimental.hierarchy.{Instance, Instantiate, instantiable} +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.probe.{Probe, ProbeValue, define} +import chisel3.util.circt.ClockGate +import chisel3.util.experimental.decode.DecodeBundle +import chisel3.util.{BitPat, Cat, DecoupledIO, Fill, MuxLookup, PriorityEncoder, PriorityMux, Queue, RegEnable, log2Ceil, log2Up} +import org.chipsalliance.rocketv.rvdecoderdbcompat.Causes +import org.chipsalliance.rvdecoderdb.Instruction + +class RocketProbe(param: RocketParameter) extends Bundle { + // reg file + val rfWen = Bool() + val rfWaddr = UInt(param.lgNXRegs.W) + val rfWdata = UInt(param.xLen.W) +} + +object RocketParameter { + implicit def rwP: upickle.default.ReadWriter[RocketParameter] = upickle.default.macroRW[RocketParameter] +} + +case class RocketParameter( + useAsyncReset: Boolean, + clockGate: Boolean, + instructionSets: Set[String], + vLen: Int, + usingUser: Boolean, + hartIdLen: Int, + nPMPs: Int, + asidBits: Int, + nBreakpoints: Int, + usingBTB: Boolean, + useBPWatch: Boolean, + mcontextWidth: Int, + scontextWidth: Int, + mulDivLantency: Int, + divUnroll: Int, + divEarlyOut: Boolean, + divEarlyOutGranularity: Int, + mulUnroll: Int, + mulEarlyOut: Boolean, + paddrBits: Int, + cacheBlockBytes: Int, + hasBeu: Boolean, + fastLoadByte: Boolean, + fastLoadWord: Boolean, + dcacheNSets: Int, + flushOnFenceI: Boolean, + usingT1: Boolean + ) + extends SerializableModuleParameter { + // interface to T1 + def usingVector = hasInstructionSet("rv_v") + + // fixed for now + def usingRVE = false + def usingDataScratchpad: Boolean = false + def hasDataECC: Boolean = false + def vmidBits = 0 + def nPerfCounters = 0 + + // calculated + def lgNXRegs = if (usingRVE) 4 else 5 + + def pipelinedMul: Boolean = usingMulDiv && mulUnroll == xLen + + def instructions: Seq[Instruction] = + org.chipsalliance.rvdecoderdb + .instructions( + org.chipsalliance.rvdecoderdb.extractResource(getClass.getClassLoader) + ) + .filter(instruction => + ( + instructionSets ++ + // Four mandatory instruction sets. + Seq("rv_i", "rv_zicsr", "rv_zifencei", "rv_system") + ).contains(instruction.instructionSet.name) + ) + .toSeq + .filter { + // special case for rv32 pseudo from rv64 + case i if i.pseudoFrom.isDefined && Seq("slli", "srli", "srai").contains(i.name) => true + case i if i.pseudoFrom.isDefined => false + case _ => true + } + .sortBy(i => (i.instructionSet.name, i.name)) + + def coreInstBytes = (if (usingCompressed) 16 else 32) / 8 + + private def hasInstructionSet(setName: String): Boolean = + instructions.flatMap(_.instructionSets.map(_.name)).contains(setName) + + private def hasInstruction(instName: String): Boolean = instructions.map(_.name).contains(instName) + + def xLen: Int = + (hasInstructionSet("rv32_i"), hasInstructionSet("rv64_i")) match { + case (true, true) => throw new Exception("cannot support both rv32 and rv64 together") + case (true, false) => 32 + case (false, true) => 64 + case (false, false) => throw new Exception("no basic instruction found.") + } + + def fLen: Option[Int] = + ( + hasInstructionSet("rv_f") || hasInstructionSet("rv64_f"), + hasInstructionSet("rv_d") || hasInstructionSet("rv64_d") + ) match { + case (false, false) => None + case (true, false) => Some(32) + case (false, true) => Some(64) + case (true, true) => Some(64) + } + + def minFLen: Option[Int] = + if (hasInstructionSet("rv_zfh") || hasInstructionSet("rv64_zfh") || hasInstructionSet("rv_d_zfh")) + Some(16) + else + fLen + + def usingMulDiv = hasInstructionSet("rv_m") || hasInstructionSet("rv64_m") + + def usingAtomics = hasInstructionSet("rv_a") || hasInstructionSet("rv64_a") + + def usingVM = hasInstructionSet("sfence.vma") + + def usingSupervisor = hasInstruction("sret") + + // static to false for now + def usingHypervisor = hasInstructionSet("rv_h") || hasInstructionSet("rv64_h") + + def usingDebug = hasInstructionSet("rv_sdext") + + def usingCompressed = hasInstructionSet("rv_c") + + def usingFPU = fLen.isDefined + + // static to false for now + def haveCease = hasInstruction("cease") + + // static to false for now + def usingNMI = hasInstructionSet("rv_smrnmi") + + // calculated parameter + def fetchWidth: Int = if (usingCompressed) 2 else 1 + + def resetVectorLen: Int = { + val externalLen = paddrBits + require(externalLen <= xLen, s"External reset vector length ($externalLen) must be <= XLEN ($xLen)") + require(externalLen <= vaddrBitsExtended, s"External reset vector length ($externalLen) must be <= virtual address bit width ($vaddrBitsExtended)") + externalLen + } + + val nLocalInterrupts: Int = 0 + + def pgIdxBits: Int = 12 + def pgLevels: Int = if (xLen == 64) 3 /* Sv39 */ else 2 /* Sv32 */ + def pgLevelBits: Int = 10 - log2Ceil(xLen / 32) + def maxSVAddrBits: Int = pgIdxBits + pgLevels * pgLevelBits + def maxHypervisorExtraAddrBits: Int = 2 + def hypervisorExtraAddrBits: Int = if (usingHypervisor) maxHypervisorExtraAddrBits else 0 + def maxHVAddrBits: Int = maxSVAddrBits + hypervisorExtraAddrBits + def vaddrBits: Int = if (usingVM) { + val v = maxHVAddrBits + require(v == xLen || xLen > v && v > paddrBits) + v + } else { + // since virtual addresses sign-extend but physical addresses + // zero-extend, make room for a zero sign bit for physical addresses + (paddrBits + 1) min xLen + } + def vpnBits: Int = vaddrBits - pgIdxBits + def ppnBits: Int = paddrBits - pgIdxBits + def vpnBitsExtended: Int = vpnBits + (if (vaddrBits < xLen) (if (usingHypervisor) 1 else 0) + 1 else 0) + + def vaddrBitsExtended: Int = vpnBitsExtended + pgIdxBits + // btb entries + def btbEntries: Int = 28 + def bhtHistoryLength: Option[Int] = Some(8) + def bhtCounterLength: Option[Int] = Some(1) + def coreInstBits: Int = if (usingCompressed) 16 else 32 + def coreMaxAddrBits: Int = paddrBits max vaddrBitsExtended + def lgCacheBlockBytes: Int = log2Ceil(cacheBlockBytes) + def blockOffBits = lgCacheBlockBytes + // todo: 64 -> dcacheParan.nset + def idxBits: Int = log2Ceil(dcacheNSets) + // dCache untage bits + def untagBits: Int = blockOffBits + idxBits + def dcacheReqTagBits: Int = 6 + def dcacheArbPorts: Int = 1 + (if(usingVM) 1 else 0) + (if(usingDataScratchpad) 1 else 0) + def coreDataBits: Int = xLen max fLen.getOrElse(0) + def coreDataBytes: Int = coreDataBits / 8 + def separateUncachedResp: Boolean = false + def minPgLevels: Int = { + val res = xLen match { + case 32 => 2 + case 64 => 3 + } + require(pgLevels >= res) + res + } + + def maxPAddrBits: Int = { + require(xLen == 32 || xLen == 64, s"Only XLENs of 32 or 64 are supported, but got $xLen") + xLen match { case 32 => 34; case 64 => 56 } + } + + val csrParameter: CSRParameter = CSRParameter( + useAsyncReset: Boolean, + vLen: Int, + xLen: Int, + fLen.getOrElse(0): Int, + hartIdLen: Int, + mcontextWidth: Int, + scontextWidth: Int, + asidBits: Int, + vmidBits: Int, + nPMPs: Int, + nPerfCounters: Int, + paddrBits: Int, + nBreakpoints: Int, + usingSupervisor: Boolean, + usingFPU: Boolean, + usingUser: Boolean, + usingVM: Boolean, + usingCompressed: Boolean, + usingAtomics: Boolean, + usingDebug: Boolean, + usingMulDiv: Boolean, + usingVector: Boolean + ) + val decoderParameter = DecoderParameter( + instructionSets, + pipelinedMul, + flushOnFenceI + ) + val iBufParameter: IBufParameter = IBufParameter( + useAsyncReset, + xLen, + usingCompressed, + vaddrBits, + btbEntries, + vaddrBitsExtended, + bhtHistoryLength, + bhtCounterLength, + fetchWidth + ) + val breakpointUnitParameter: BreakpointUnitParameter = BreakpointUnitParameter( + nBreakpoints, + xLen, + useBPWatch, + vaddrBits, + mcontextWidth, + scontextWidth + ) + val aluParameter: ALUParameter = ALUParameter(xLen) + val mulDivParameter: MulDivParameter = MulDivParameter( + useAsyncReset: Boolean, + mulDivLantency: Int, + xLen: Int, + divUnroll: Int, + divEarlyOut: Boolean, + divEarlyOutGranularity: Int, + mulUnroll: Int, + mulEarlyOut: Boolean) + val mulParameter: Option[PipelinedMultiplierParameter] = Option.when(usingMulDiv && mulUnroll == xLen)(PipelinedMultiplierParameter( + useAsyncReset: Boolean, + 2, + xLen: Int + )) +} + +/** The Interface of [[Rocket]]. + * The [[Rocket]] is the public + */ +class RocketInterface(parameter: RocketParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + val hartid = Flipped(UInt(parameter.hartIdLen.W)) + val interrupts = Flipped(new TileInterrupts(parameter.usingSupervisor, parameter.nLocalInterrupts, parameter.usingNMI, parameter.resetVectorLen)) + val buserror = Input(Bool()) + val imem = new FrontendIO( + parameter.vaddrBitsExtended, + parameter.vaddrBits, + parameter.asidBits, + parameter.btbEntries, + parameter.bhtHistoryLength, + parameter.bhtCounterLength, + parameter.coreInstBits, + parameter.fetchWidth + ) + + val dmem = new HellaCacheIO( + parameter.coreMaxAddrBits, + parameter.usingVM, + parameter.untagBits, + parameter.pgIdxBits, + parameter.dcacheReqTagBits, + parameter.dcacheArbPorts, + parameter.coreDataBytes, + parameter.paddrBits, + parameter.vaddrBitsExtended, + parameter.separateUncachedResp + ) + + val ptw = Flipped( + new DatapathPTWIO( + parameter.xLen, + parameter.maxPAddrBits, + parameter.pgIdxBits: Int, + parameter.vaddrBits: Int, + parameter.asidBits: Int, + parameter.nPMPs, + parameter.paddrBits: Int + ) + ) + val fpu = parameter.fLen.map(fLen => Flipped(new FPUCoreIO(parameter.hartIdLen, parameter.xLen, fLen))) + val t1 = Option.when(parameter.usingT1)(new RocketCoreToT1(parameter.xLen, parameter.vLen)) + val bpwatch = Output(Vec(parameter.nBreakpoints, new BPWatch)) + val cease = Output(Bool()) + val wfi = Output(Bool()) + val traceStall = Input(Bool()) + val rocketProbe = Output(Probe(new RocketProbe(parameter))) +} + +/** The [[Rocket]] is the next version of the RocketCore, + * All micro architectures are from the original RocketCore. + * The development of [[Rocket]] happens in the T1 project. + * It will be moved to the standalone pacakge until it get verified. + * + * Here are some basic idea of [[Rocket]], + * - it should be linkable by providing an verification constraint to other components. + * - open expose [[RocketParameter]] and [[RocketInterface]] to users, all internal API are subject to be changed. + * - There is no coherent support for the [[Rocket]] until chipsalliance having the CHI interconnect and cache IP. + * - The in-tile components contains Frontend, HellaCache, FPU, T1, but the memory subsystem only supports AXI. + */ +@instantiable +class Rocket(val parameter: RocketParameter) + extends FixedIORawModule(new RocketInterface(parameter)) + with SerializableModule[RocketParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + val csr: Instance[CSR] = Instantiate(new CSR(parameter.csrParameter)) + val decoder: Instance[Decoder] = Instantiate(new Decoder(parameter.decoderParameter)) + val instructionBuffer: Instance[IBuf] = Instantiate(new IBuf(parameter.iBufParameter)) + val breakpointUnit: Instance[BreakpointUnit] = Instantiate(new BreakpointUnit(parameter.breakpointUnitParameter)) + val alu: Instance[ALU] = Instantiate(new ALU(parameter.aluParameter)) + val mulDiv: Instance[MulDiv] = Instantiate(new MulDiv(parameter.mulDivParameter)) + val mul: Option[Instance[PipelinedMultiplier]] = parameter.mulParameter.map(p => Instantiate(new PipelinedMultiplier(p))) + + // compatibility mode. + object rocketParams { + def clockGate = parameter.clockGate + def lgPauseCycles = 5 + }; + def M_XRD = "b00000".U // int load + def M_XWR = "b00001".U // int store + def M_PFR = "b00010".U // prefetch with intent to read + def M_PFW = "b00011".U // prefetch with intent to write + def M_XA_SWAP = "b00100".U + def M_FLUSH_ALL = "b00101".U // flush all lines + def M_XLR = "b00110".U + def M_XSC = "b00111".U + def M_XA_ADD = "b01000".U + def M_XA_XOR = "b01001".U + def M_XA_OR = "b01010".U + def M_XA_AND = "b01011".U + def M_XA_MIN = "b01100".U + def M_XA_MAX = "b01101".U + def M_XA_MINU = "b01110".U + def M_XA_MAXU = "b01111".U + def M_PWR = "b10001".U // partial (masked) store + def M_SFENCE = "b10100".U // SFENCE.VMA + def M_HFENCEV = "b10101".U // HFENCE.VVMA + def M_HFENCEG = "b10110".U // HFENCE.GVMA + def M_WOK = "b10111".U // check write permissions but don't perform a write + def M_HLVX = "b10000".U // HLVX instruction + + def lgNXRegs = parameter.lgNXRegs + def coreDataBytes = parameter.coreDataBytes + def regAddrMask: Int = (1 << lgNXRegs) - 1 + def xLen: Int = parameter.xLen + def fLen: Option[Int] = parameter.fLen + def vaddrBits: Int = parameter.vaddrBits + def vaddrBitsExtended: Int = parameter.vaddrBitsExtended + def btbEntries: Int = parameter.btbEntries + def bhtHistoryLength: Option[Int] = parameter.bhtHistoryLength + def bhtCounterLength: Option[Int] = parameter.bhtCounterLength + def nBreakpoints: Int = parameter.nBreakpoints + def usingAtomics: Boolean = parameter.usingAtomics + def usingMulDiv: Boolean = parameter.usingMulDiv + def usingVector: Boolean = parameter.usingVector + def pipelinedMul: Boolean = parameter.pipelinedMul + def usingCompressed: Boolean = parameter.usingCompressed + def usingFPU: Boolean = parameter.usingFPU + def usingVM: Boolean = parameter.usingVM + def fastLoadByte: Boolean = parameter.fastLoadByte + def fastLoadWord: Boolean = parameter.fastLoadWord + def hypervisorExtraAddrBits: Int = parameter.hypervisorExtraAddrBits + def usingHypervisor: Boolean = parameter.usingHypervisor + def flushOnFenceI: Boolean = parameter.flushOnFenceI + def usingBTB: Boolean = parameter.usingBTB + def coreInstBytes: Int = parameter.coreInstBytes + def fetchWidth: Int = parameter.fetchWidth + def minFLen: Int = parameter.minFLen.getOrElse(0) + def hasDataECC: Boolean = parameter.hasDataECC + + // Signal outside from internal clock domain. + + val longLatencyStall = Reg(Bool()) + val idRegPause = Reg(Bool()) + val imemMightRequestReg = Reg(Bool()) + val clockEnable = WireDefault(true.B) + val clockEnableReg = RegInit(true.B) + val gatedClock = + Option.when(rocketParams.clockGate)(ClockGate(io.clock, clockEnable)).getOrElse(io.clock) + + csr.io.clock := gatedClock + csr.io.reset := io.reset + instructionBuffer.io.clock := gatedClock + instructionBuffer.io.reset := io.reset + mulDiv.io.clock := gatedClock + mulDiv.io.reset := io.reset + mul.foreach(_.io.clock := gatedClock) + mul.foreach(_.io.reset := io.reset) + // leaving gated-clock domain + val gatedDomain = withClock(gatedClock)(new Gated) + + class Gated { + // performance counters + def pipelineIDToWB[T <: Data](x: T): T = RegEnable(RegEnable(RegEnable(x, !ctrlKilled), exPcValid), memPcValid) + + // RF is not a Module. + val rf = new RegFile(regAddrMask, xLen) + + // wire definations. + + val idDecodeOutput: DecodeBundle = Wire(chiselTypeOf(decoder.io.output)) + + val exRegExceptionInterrupt: Bool = Reg(Bool()) + val exRegException: Bool = Reg(Bool()) + val exRegValid: Bool = Reg(Bool()) + val exRegRVC: Bool = Reg(Bool()) + val exRegBTBResponse: BTBResp = Reg(new BTBResp(vaddrBits, btbEntries, fetchWidth, bhtHistoryLength, bhtCounterLength)) + val exRegFlushPipe: Bool = Reg(Bool()) + val exRegLoadUse: Bool = Reg(Bool()) + val exRegCause: UInt = Reg(UInt()) + val exRegReplay: Bool = Reg(Bool()) + val exRegPC: UInt = Reg(UInt()) + // TODO: add real width here. + val exRegMemSize: UInt = Reg(UInt()) + // Option.when(usingHypervisor) + val exRegHLS: Bool = Reg(Bool()) + val exRegInstruction: UInt = Reg(UInt()) + val exRegRawInstruction: UInt = Reg(UInt()) + // TODO: what's this? + val exRegWphit: Vec[Bool] = Reg(Vec(nBreakpoints, Bool())) + val exRegDecodeOutput: DecodeBundle = Reg(chiselTypeOf(decoder.io.output)) + + val memRegExceptionInterrupt = Reg(Bool()) + val memRegValid = Reg(Bool()) + val memRegRVC = Reg(Bool()) + val memRegBTBResponse = Reg(new BTBResp( + vaddrBits, + btbEntries, + fetchWidth, + bhtHistoryLength, + bhtCounterLength + )) + val memRegException = Reg(Bool()) + val memRegReplay = Reg(Bool()) + val memRegFlushPipe = Reg(Bool()) + val memRegCause = Reg(UInt()) + val memRegSlowBypass = Reg(Bool()) + val memRegLoad = Reg(Bool()) + val memRegStore = Reg(Bool()) + val memRegSfence = Reg(Bool()) + val memRegPc = Reg(UInt()) + val memRegInstruction = Reg(UInt()) + val memRegMemSize = Reg(UInt()) + val memRegDecodeOutput: DecodeBundle = Reg(chiselTypeOf(decoder.io.output)) + + /** virtualization mode? */ + val memRegHlsOrDv = Reg(Bool()) + val memRegRawInstruction = Reg(UInt()) + val memRegWdata = Reg(UInt()) + val memRegRS2 = Reg(UInt()) + val memBranchTaken = Reg(Bool()) + val takePcMem = Wire(Bool()) + val memRegWphit = Reg(Vec(nBreakpoints, Bool())) + + val wbRegValid = Reg(Bool()) + val wbRegException = Reg(Bool()) + val wbRegReplay = Reg(Bool()) + val wbRegFlushPipe = Reg(Bool()) + val wbRegCause = Reg(UInt()) + val wbRegSfence = Reg(Bool()) + val wbRegPc = Reg(UInt()) + val wbRegDecodeOutput: DecodeBundle = Reg(chiselTypeOf(decoder.io.output)) + val wbRegMemSize = Reg(UInt()) + val wbRegHlsOrDv = Reg(Bool()) + val wbRegHfenceV = Reg(Bool()) + val wbRegHfenceG = Reg(Bool()) + val wbRegInstruction = Reg(UInt()) + val wbRegRawInstruction = Reg(UInt()) + val wbRegWdata = Reg(UInt()) + val wbRegRS2 = Reg(UInt()) + val wbRegWphit = Reg(Vec(nBreakpoints, Bool())) + val takePcWb = Wire(Bool()) + + val takePcMemWb = takePcWb || takePcMem + val takePc = takePcMemWb + + // From IBUF to ID + instructionBuffer.io.imem <> io.imem.resp + val instructionBufferOut = instructionBuffer.io.inst.head + // TODO: does these really has its meaning? I don't think so:( + val idExpandedInstruction: ExpandedInstruction = instructionBufferOut.bits.inst + val idRawInstruction: UInt = instructionBufferOut.bits.raw + val idInstruction: UInt = idExpandedInstruction.bits + idDecodeOutput := decoder.io.output + instructionBuffer.io.kill := takePc + // 5. Instruction goes to Rocket Decoder + decoder.io.instruction := idInstruction + + // Optional circuit: Optional add this circuit for RVE. + def decodeReg(x: UInt): (Bool, UInt) = ((if (x.getWidth - 1 < lgNXRegs) 0.U else x(x.getWidth - 1, lgNXRegs)).asBool, x(lgNXRegs - 1, 0)) + val (idRaddr3Illegal: Bool, idRaddr3: UInt) = decodeReg(idExpandedInstruction.rs3) + val (idRaddr2Illegal: Bool, idRaddr2: UInt) = decodeReg(idExpandedInstruction.rs2) + val (idRaddr1Illegal: Bool, idRaddr1: UInt) = decodeReg(idExpandedInstruction.rs1) + val (idWaddrIllegal: Bool, idWaddr: UInt) = decodeReg(idExpandedInstruction.rd) + + val idLoadUse: Bool = Wire(Bool()) + val idRegFence: Bool = RegInit(false.B) + // TODO: T1 needs to access RS1 and RS2 under some instructions. + // FP goes to a different path, parameter.decoderParameter.rfs1 is never used... + val idRen: Seq[Bool] = IndexedSeq(idDecodeOutput(parameter.decoderParameter.rxs1), idDecodeOutput(parameter.decoderParameter.rxs2)) + val idRaddr: Seq[UInt] = IndexedSeq(idRaddr1, idRaddr2) + // 6. Read RF out. + val idRs: Seq[UInt] = idRaddr.map(rf.read) + // instruction get killed at exec stage if true. + val ctrlKilled: Bool = Wire(Bool()) + + // TODO: additional decode out? + + def isOneOf(x:UInt, s: Seq[UInt]): Bool = VecInit(s.map(x === _)).asUInt.orR + + val idCsrEn: Bool = isOneOf(idDecodeOutput(parameter.decoderParameter.csr), Seq(parameter.csrParameter.S, parameter.csrParameter.C, parameter.csrParameter.W)) + val idSystemInstruction: Bool = idDecodeOutput(parameter.decoderParameter.csr) === parameter.csrParameter.I + val idCsrRen: Bool = isOneOf(idDecodeOutput(parameter.decoderParameter.csr), Seq(parameter.csrParameter.S, parameter.csrParameter.C)) && idExpandedInstruction.rs1 === 0.U + val idCsr = + Mux(idSystemInstruction && idDecodeOutput(parameter.decoderParameter.mem), parameter.csrParameter.N, Mux(idCsrRen, parameter.csrParameter.R, idDecodeOutput(parameter.decoderParameter.csr))) + val idCsrFlush = + idSystemInstruction || + (idCsrEn && !idCsrRen && csr.io.decode(0).writeFlush) || + Option.when(parameter.usingVector)(idDecodeOutput(parameter.decoderParameter.vectorCSR)).getOrElse(false.B) + val idRfIllegal: Bool = + idRaddr2Illegal && idDecodeOutput(parameter.decoderParameter.rxs2) || + idRaddr1Illegal && idDecodeOutput(parameter.decoderParameter.rxs1) || + idWaddrIllegal && idDecodeOutput(parameter.decoderParameter.wxd) + val idCsrIllegalRW: Bool = + idCsrEn && (csr.io.decode(0).readIllegal || !idCsrRen && csr.io.decode(0).writeIllegal) + val idSystemIllegal: Bool = + !instructionBufferOut.bits.rvc && (idSystemInstruction && csr.io.decode(0).systemIllegal) + + val idAtomicIllegal: Option[Bool] = + Option.when(usingAtomics)(idDecodeOutput(parameter.decoderParameter.amo) && !csr.io.status.isa('a' - 'a')) + val idMulDivIllegal: Option[Bool] = + Option.when(usingMulDiv)( + Option.when(pipelinedMul)(idDecodeOutput(parameter.decoderParameter.mul)).getOrElse(false.B) || + idDecodeOutput(parameter.decoderParameter.div) && !csr.io.status.isa('m' - 'a') + ) + val idCompressIllegal: Option[Bool] = + Option.when(usingCompressed)(instructionBufferOut.bits.rvc && !csr.io.status.isa('c' - 'a')) + val idFpIllegal: Option[Bool] = + io.fpu.map(fpu => idDecodeOutput(parameter.decoderParameter.fp) && (csr.io.decode(0).fpIllegal || fpu.illegal_rm)) + val idDpIllegal: Option[Bool] = Option.when(usingFPU)(idDecodeOutput(parameter.decoderParameter.dp) && !csr.io.status.isa('d' - 'a')) + + // TODO: vector illegal: + // - vector is not enabled but a vector instruction is decoded. + val idIllegalInstruction: Bool = + !idDecodeOutput(parameter.decoderParameter.isLegal) || + idRfIllegal || + idCsrIllegalRW || + idSystemIllegal || + idMulDivIllegal.getOrElse(false.B) || + idAtomicIllegal.getOrElse(false.B) || + idFpIllegal.getOrElse(false.B) || + idDpIllegal.getOrElse(false.B) || + idCompressIllegal.getOrElse(false.B) + val idVirtualInstruction: Bool = + idDecodeOutput(parameter.decoderParameter.isLegal) && + ( + (idCsrEn && + !(!idCsrRen && csr.io.decode(0).writeIllegal) && + csr.io.decode(0).virtualAccessIllegal) || ( + !instructionBufferOut.bits.rvc && + idSystemInstruction && + csr.io.decode(0).virtualSystemIllegal + ) + ) + + // stall decode for fences (now, for AMO.rl; later, for AMO.aq and FENCE) + val idAmoAquire: Bool = idInstruction(26) + val idAmoRelease: Bool = idInstruction(25) + // TODO: what's this? + val idFenceSucc: UInt = idInstruction(23, 20) + val idFenceNext: Bool = idDecodeOutput(parameter.decoderParameter.fence) || idDecodeOutput(parameter.decoderParameter.amo) && idAmoAquire + val idMemoryBusy: Bool = !io.dmem.ordered || io.dmem.req.valid + val idDoFence = + idMemoryBusy && + (idDecodeOutput(parameter.decoderParameter.amo) && idAmoRelease || + idDecodeOutput(parameter.decoderParameter.fenceI) || + idRegFence && idDecodeOutput(parameter.decoderParameter.mem)) + + // TODO: if vector is non-empty, don't take breakpoint. + breakpointUnit.io.status := csr.io.status + breakpointUnit.io.bp := csr.io.bp + breakpointUnit.io.pc := instructionBuffer.io.pc + breakpointUnit.io.ea := memRegWdata + breakpointUnit.io.mcontext := csr.io.mcontext + breakpointUnit.io.scontext := csr.io.scontext + + val idException0 = instructionBufferOut.bits.xcpt0 + val idException1 = instructionBufferOut.bits.xcpt1 + val (idException, idCause) = checkExceptions( + List( + (csr.io.interrupt, csr.io.interruptCause), + (breakpointUnit.io.debug_if, parameter.csrParameter.debugTriggerCause.U), + (breakpointUnit.io.xcpt_if, Causes.breakpoint.U), + (idException0.pf, Causes.fetch_page_fault.U), + (idException0.gf, Causes.fetch_guest_page_fault.U), + (idException0.ae, Causes.fetch_access.U), + (idException1.pf, Causes.fetch_page_fault.U), + (idException1.gf, Causes.fetch_guest_page_fault.U), + (idException1.ae, Causes.fetch_access.U), + (idVirtualInstruction, Causes.virtual_instruction.U), + (idIllegalInstruction, Causes.illegal_instruction.U) + ) + ) + + val idCoverCauses: Seq[(Int, String)] = List( + (parameter.csrParameter.debugTriggerCause, "DEBUG_TRIGGER"), + (Causes.breakpoint, "BREAKPOINT"), + (Causes.fetch_access, "FETCH_ACCESS"), + (Causes.illegal_instruction, "ILLEGAL_INSTRUCTION") + ) ++ Option.when(usingVM)((Causes.fetch_page_fault, "FETCH_PAGE_FAULT")) + + // Bypass signals + val dcacheBypassData: UInt = + if (fastLoadByte) io.dmem.resp.bits.data(xLen - 1, 0) + else if (fastLoadWord) io.dmem.resp.bits.data_word_bypass(xLen - 1, 0) + else wbRegWdata + // detect bypass opportunities + val exWaddr: UInt = exRegInstruction(11, 7) & regAddrMask.U + val memWaddr: UInt = memRegInstruction(11, 7) & regAddrMask.U + val wbWaddr: UInt = wbRegInstruction(11, 7) & regAddrMask.U + val bypassSources: Seq[(Bool, UInt, UInt)] = IndexedSeq( + (true.B, 0.U, 0.U), // treat reading x0 as a bypass + (exRegValid && exRegDecodeOutput(parameter.decoderParameter.wxd), exWaddr, memRegWdata), + (memRegValid && memRegDecodeOutput(parameter.decoderParameter.wxd) && !memRegDecodeOutput(parameter.decoderParameter.mem), memWaddr, wbRegWdata), + (memRegValid && memRegDecodeOutput(parameter.decoderParameter.wxd), memWaddr, dcacheBypassData) + ) + val idBypassSources: Seq[Seq[Bool]] = idRaddr.map(raddr => bypassSources.map(s => s._1 && s._2 === raddr)) + + // execute stage + val bypassMux: Vec[UInt] = VecInit(bypassSources.map(_._3)) + val exRegRsBypass: Vec[Bool] = Reg(Vec(idRaddr.size, Bool())) + val exRegRsLSB: Vec[UInt] = Reg(Vec(idRaddr.size, UInt(log2Ceil(bypassSources.size).W))) + val exRegRsMSB: Vec[UInt] = Reg(Vec(idRaddr.size, UInt())) + val exRs: Seq[UInt] = Seq.tabulate(idRaddr.size)(i => + Mux(exRegRsBypass(i), bypassMux(exRegRsLSB(i)), Cat(exRegRsMSB(i), exRegRsLSB(i))) + ) + val exImm: SInt = ImmGen(exRegDecodeOutput(parameter.decoderParameter.selImm), exRegInstruction) + + def A1_RS1 = 1.U(2.W) + def A1_PC = 2.U(2.W) + + def A2_ZERO = 0.U(2.W) + def A2_SIZE = 1.U(2.W) + def A2_RS2 = 2.U(2.W) + def A2_IMM = 3.U(2.W) + + val exOp1: SInt = + MuxLookup(exRegDecodeOutput(parameter.decoderParameter.selAlu1), 0.S)(Seq(A1_RS1 -> exRs(0).asSInt, A1_PC -> exRegPC.asSInt)) + val exOp2: SInt = MuxLookup(exRegDecodeOutput(parameter.decoderParameter.selAlu2), 0.S)( + Seq(A2_RS2 -> exRs(1).asSInt, A2_IMM -> exImm, A2_SIZE -> Mux(exRegRVC, 2.S, 4.S)) + ) + + alu.io.dw := exRegDecodeOutput(parameter.decoderParameter.aluDoubleWords) + alu.io.fn := exRegDecodeOutput(parameter.decoderParameter.aluFn) + alu.io.in2 := exOp2.asUInt + alu.io.in1 := exOp1.asUInt + + // multiplier and divider + // TODO: waive them if !usingMulDiv + mulDiv.io.req.valid := exRegValid && Option.when(usingMulDiv)(exRegDecodeOutput(parameter.decoderParameter.div)).getOrElse(false.B) + mulDiv.io.req.bits.dw := exRegDecodeOutput(parameter.decoderParameter.aluDoubleWords) + mulDiv.io.req.bits.fn := exRegDecodeOutput(parameter.decoderParameter.aluFn) + mulDiv.io.req.bits.in1 := exRs(0) + mulDiv.io.req.bits.in2 := exRs(1) + mulDiv.io.req.bits.tag := exWaddr + mul.foreach { m => + m.io.req.valid := exRegValid && exRegDecodeOutput(parameter.decoderParameter.mul) + m.io.req.bits := mulDiv.io.req.bits + } + + exRegValid := !ctrlKilled + exRegReplay := !takePc && instructionBufferOut.valid && instructionBufferOut.bits.replay + exRegException := !ctrlKilled && idException + exRegExceptionInterrupt := !takePc && instructionBufferOut.valid && csr.io.interrupt + + // ID goes to EX + when(!ctrlKilled) { + exRegDecodeOutput := idDecodeOutput + exRegRVC := instructionBufferOut.bits.rvc + exRegDecodeOutput(parameter.decoderParameter.csr) := idCsr + when(idDecodeOutput(parameter.decoderParameter.fence) && idFenceSucc === 0.U) { idRegPause := true.B } + when(idFenceNext) { idRegFence := true.B } + when(idException) { // pass PC down ALU writeback pipeline for badaddr + exRegDecodeOutput(parameter.decoderParameter.aluFn) := parameter.aluParameter.FN_ADD + exRegDecodeOutput(parameter.decoderParameter.aluDoubleWords) := true.B + exRegDecodeOutput(parameter.decoderParameter.selAlu1) := A1_RS1 // badaddr := instruction + exRegDecodeOutput(parameter.decoderParameter.selAlu2) := A2_ZERO + when(idException1.asUInt.orR) { // badaddr := PC+2 + exRegDecodeOutput(parameter.decoderParameter.selAlu1) := A1_PC + exRegDecodeOutput(parameter.decoderParameter.selAlu2) := A2_SIZE + exRegRVC := true.B + } + when(breakpointUnit.io.xcpt_if || idException0.asUInt.orR) { // badaddr := PC + exRegDecodeOutput(parameter.decoderParameter.selAlu1) := A1_PC + exRegDecodeOutput(parameter.decoderParameter.selAlu2) := A2_ZERO + } + } + exRegFlushPipe := idDecodeOutput(parameter.decoderParameter.fenceI) || idCsrFlush + exRegLoadUse := idLoadUse + + exRegHLS := + usingHypervisor.B && + idSystemInstruction && + isOneOf(idDecodeOutput(parameter.decoderParameter.memCommand), Seq(M_XRD, M_XWR, M_HLVX)) + exRegMemSize := Mux(usingHypervisor.B && idSystemInstruction, idInstruction(27, 26), idInstruction(13, 12)) + when(isOneOf(idDecodeOutput(parameter.decoderParameter.memCommand), Seq(M_SFENCE, M_HFENCEV, M_HFENCEG, M_FLUSH_ALL)) ) { + exRegMemSize := Cat(idRaddr2 =/= 0.U, idRaddr1 =/= 0.U) + } + when(idDecodeOutput(parameter.decoderParameter.memCommand) === M_SFENCE && csr.io.status.v) { + exRegDecodeOutput(parameter.decoderParameter.memCommand) := M_HFENCEV + } + + if (flushOnFenceI) { + when(idDecodeOutput(parameter.decoderParameter.fenceI)) { + exRegMemSize := 0.U + } + } + + Seq.tabulate(idRaddr.size) { i => + val doBypass = idBypassSources(i).reduce(_ || _) + val bypassSource = PriorityEncoder(idBypassSources(i)) + exRegRsBypass(i) := doBypass + exRegRsLSB(i) := bypassSource + when(idRen(i) && !doBypass) { + exRegRsLSB(i) := idRs(i)(log2Ceil(bypassSources.size) - 1, 0) + exRegRsMSB(i) := idRs(i) >> log2Ceil(bypassSources.size) + } + } + when(idIllegalInstruction || idVirtualInstruction) { + val inst = Mux(instructionBufferOut.bits.rvc, idRawInstruction(15, 0), idRawInstruction) + exRegRsBypass(0) := false.B + exRegRsLSB(0) := inst(log2Ceil(bypassSources.size) - 1, 0) + exRegRsMSB(0) := inst >> log2Ceil(bypassSources.size) + } + } + // ID goes to EX but with interrupt or replay + when(!ctrlKilled || csr.io.interrupt || instructionBufferOut.bits.replay) { + exRegCause := idCause + exRegInstruction := idInstruction + exRegRawInstruction := idRawInstruction + exRegPC := instructionBuffer.io.pc + exRegBTBResponse := instructionBuffer.io.btb_resp + exRegWphit := breakpointUnit.io.bpwatch.map { bpw => bpw.ivalid(0) } + } + // replay inst in ex stage? + val exPcValid: Bool = exRegValid || exRegReplay || exRegExceptionInterrupt + val wbDcacheMiss: Bool = wbRegDecodeOutput(parameter.decoderParameter.mem) && !io.dmem.resp.valid + val replayExStructural: Bool = exRegDecodeOutput(parameter.decoderParameter.mem) && !io.dmem.req.ready || Option + .when(usingMulDiv)(exRegDecodeOutput(parameter.decoderParameter.div)) + .getOrElse(false.B) && !mulDiv.io.req.ready + val replayExLoadUse: Bool = wbDcacheMiss && exRegLoadUse + val replayEx: Bool = exRegReplay || (exRegValid && (replayExStructural || replayExLoadUse)) + val ctrlKillx: Bool = takePcMemWb || replayEx || !exRegValid + // detect 2-cycle load-use delay for LB/LH/SC + val exSlowBypass: Bool = exRegDecodeOutput(parameter.decoderParameter.memCommand) === M_XSC || exRegMemSize < 2.U + val exSfence: Bool = + usingVM.B && + exRegDecodeOutput(parameter.decoderParameter.mem) && + (exRegDecodeOutput(parameter.decoderParameter.memCommand) === M_SFENCE || + exRegDecodeOutput(parameter.decoderParameter.memCommand) === M_HFENCEV || + exRegDecodeOutput(parameter.decoderParameter.memCommand) === M_HFENCEG) + + val (exException: Bool, exCause: UInt) = checkExceptions( + List((exRegExceptionInterrupt || exRegException, exRegCause)) + ) + val exCoverCauses: Seq[(Int, String)] = idCoverCauses +// coverExceptions(exException, exCause, "EXECUTE", exCoverCauses) + + // memory stage + val memPcValid: Bool = memRegValid || memRegReplay || memRegExceptionInterrupt + val memBranchTarget: SInt = memRegPc.asSInt + + Mux( + memRegDecodeOutput(parameter.decoderParameter.isBranch) && memBranchTaken, + ImmGen(ImmGen.IMM_SB, memRegInstruction), + Mux(memRegDecodeOutput(parameter.decoderParameter.isJal), ImmGen(ImmGen.IMM_UJ, memRegInstruction), Mux(memRegRVC, 2.S, 4.S)) + ) + val memNextPC: UInt = (Mux( + memRegDecodeOutput(parameter.decoderParameter.isJalr) || memRegSfence, + encodeVirtualAddress(memRegWdata, memRegWdata).asSInt, + memBranchTarget + ) & (-2).S).asUInt + val memWrongNpc: Bool = + Mux( + exPcValid, + memNextPC =/= exRegPC, + Mux( + instructionBufferOut.valid || instructionBuffer.io.imem.valid, + memNextPC =/= instructionBuffer.io.pc, + true.B + ) + ) + val memNpcMisaligned: Bool = !csr.io.status.isa('c' - 'a') && memNextPC(1) && !memRegSfence + val memIntWdata: UInt = Mux( + !memRegException && (memRegDecodeOutput(parameter.decoderParameter.isJalr) ^ memNpcMisaligned), + memBranchTarget, + memRegWdata.asSInt + ).asUInt + val memCfi: Bool = + memRegDecodeOutput(parameter.decoderParameter.isBranch) || memRegDecodeOutput(parameter.decoderParameter.isJalr) || memRegDecodeOutput(parameter.decoderParameter.isJal) + val memCfiTaken: Bool = + (memRegDecodeOutput(parameter.decoderParameter.isBranch) && memBranchTaken) || memRegDecodeOutput( + parameter.decoderParameter.isJalr + ) || memRegDecodeOutput(parameter.decoderParameter.isJal) + val memDirectionMisprediction: Bool = + memRegDecodeOutput(parameter.decoderParameter.isBranch) && memBranchTaken =/= (usingBTB.B && memRegBTBResponse.taken) + val memMisprediction: Bool = if (usingBTB) memWrongNpc else memCfiTaken + takePcMem := memRegValid && !memRegException && (memMisprediction || memRegSfence) + + memRegValid := !ctrlKillx + memRegReplay := !takePcMemWb && replayEx + memRegException := !ctrlKillx && exException + memRegExceptionInterrupt := !takePcMemWb && exRegExceptionInterrupt + + // on pipeline flushes, cause mem_npc to hold the sequential npc, which + // will drive the W-stage npc mux + when(memRegValid && memRegFlushPipe) { + memRegSfence := false.B + }.elsewhen(exPcValid) { + memRegDecodeOutput := exRegDecodeOutput + memRegRVC := exRegRVC + + def isAMOLogical(cmd: UInt) = isOneOf(cmd, Seq(M_XA_SWAP, M_XA_XOR, M_XA_OR, M_XA_AND)) + def isAMOArithmetic(cmd: UInt) = isOneOf(cmd, Seq(M_XA_ADD, M_XA_MIN, M_XA_MAX, M_XA_MINU, M_XA_MAXU)) + def isAMO(cmd: UInt) = isAMOLogical(cmd) || isAMOArithmetic(cmd) + def isRead(cmd: UInt) = isOneOf(cmd, Seq(M_XRD, M_HLVX, M_XLR, M_XSC)) || isAMO(cmd) + def isWrite(cmd: UInt) = cmd === M_XWR || cmd === M_PWR || cmd === M_XSC || isAMO(cmd) + + memRegLoad := exRegDecodeOutput(parameter.decoderParameter.mem) && isRead(exRegDecodeOutput(parameter.decoderParameter.memCommand)) + memRegStore := exRegDecodeOutput(parameter.decoderParameter.mem) && isWrite(exRegDecodeOutput(parameter.decoderParameter.memCommand)) + memRegSfence := exSfence + memRegBTBResponse := exRegBTBResponse + memRegFlushPipe := exRegFlushPipe + memRegSlowBypass := exSlowBypass + memRegWphit := exRegWphit + + memRegCause := exCause + memRegInstruction := exRegInstruction + memRegRawInstruction := exRegRawInstruction + memRegMemSize := exRegMemSize + memRegHlsOrDv := io.dmem.req.bits.dv + memRegPc := exRegPC + // IDecode ensured they are 1H + memRegWdata := alu.io.out + memBranchTaken := alu.io.cmp_out + + when( + exRegDecodeOutput(parameter.decoderParameter.rxs2) && (exRegDecodeOutput(parameter.decoderParameter.mem) || exSfence) + ) { + val size = exRegMemSize + memRegRS2 := new StoreGen(size, 0.U, exRs(1), coreDataBytes).data + }.elsewhen(exRegDecodeOutput(parameter.decoderParameter.rxs2) && Option.when(usingVector)(exRegDecodeOutput(parameter.decoderParameter.vector)).getOrElse(false.B)) { + // for setvl + memRegRS2 := exRs(1) + } + when(exRegDecodeOutput(parameter.decoderParameter.isJalr) && csr.io.status.debug) { + // flush I$ on D-mode JALR to effect uncached fetch without D$ flush + memRegDecodeOutput(parameter.decoderParameter.fenceI) := true.B + memRegFlushPipe := true.B + } + } + + val memBreakpoint = (memRegLoad && breakpointUnit.io.xcpt_ld) || (memRegStore && breakpointUnit.io.xcpt_st) + val memDebugBreakpoint = (memRegLoad && breakpointUnit.io.debug_ld) || (memRegStore && breakpointUnit.io.debug_st) + val (memLoadStoreException, memLoadStoreCause) = checkExceptions( + List((memDebugBreakpoint, parameter.csrParameter.debugTriggerCause.U), (memBreakpoint, Causes.breakpoint.U)) + ) + + val (memException, memCause) = checkExceptions( + List( + (memRegExceptionInterrupt || memRegException, memRegCause), + (memRegValid && memNpcMisaligned, Causes.misaligned_fetch.U), + (memRegValid && memLoadStoreException, memLoadStoreCause) + ) + ) + +// val memCoverCauses = (exCoverCauses ++ List( +// (CSR.debugTriggerCause, "DEBUG_TRIGGER"), +// (Causes.breakpoint, "BREAKPOINT"), +// (Causes.misaligned_fetch, "MISALIGNED_FETCH") +// )).distinct +// coverExceptions(memException, memCause, "MEMORY", memCoverCauses) + + val dcacheKillMem = + memRegValid && memRegDecodeOutput(parameter.decoderParameter.wxd) && io.dmem.replay_next // structural hazard on writeback port + // TODO: vectorKillMem? + val fpuKillMem = io.fpu.map(fpu => memRegValid && memRegDecodeOutput(parameter.decoderParameter.fp) && fpu.nack_mem) + val replayMem = dcacheKillMem || memRegReplay || fpuKillMem.getOrElse(false.B) + val killmCommon = dcacheKillMem || takePcWb || memRegException || !memRegValid + mulDiv.io.kill := killmCommon && RegNext(mulDiv.io.req.fire) + val ctrlKillm = killmCommon || memException || fpuKillMem.getOrElse(false.B) + + // writeback stage + wbRegValid := !ctrlKillm + wbRegReplay := replayMem && !takePcWb + wbRegException := memException && !takePcWb + wbRegFlushPipe := !ctrlKillm && memRegFlushPipe + when(memPcValid) { + wbRegDecodeOutput := memRegDecodeOutput + wbRegSfence := memRegSfence + wbRegWdata := io.fpu + .map(fpu => + Mux( + !memRegException && memRegDecodeOutput(parameter.decoderParameter.fp) && memRegDecodeOutput(parameter.decoderParameter.wxd), + fpu.toint_data, + memIntWdata + ) + ) + .getOrElse(memIntWdata) + when(memRegSfence || Option.when(usingVector)(memRegDecodeOutput(parameter.decoderParameter.vector)).getOrElse(false.B)) { + wbRegRS2 := memRegRS2 + } + wbRegCause := memCause + wbRegInstruction := memRegInstruction + wbRegRawInstruction := memRegRawInstruction + wbRegMemSize := memRegMemSize + wbRegHlsOrDv := memRegHlsOrDv + wbRegHfenceV := memRegDecodeOutput(parameter.decoderParameter.memCommand) === M_HFENCEV + wbRegHfenceG := memRegDecodeOutput(parameter.decoderParameter.memCommand) === M_HFENCEG + wbRegPc := memRegPc + + wbRegWphit.lazyZip(memRegWphit).lazyZip(breakpointUnit.io.bpwatch).foreach {case (wbRegWphit, memRegWphit, bpw) => + wbRegWphit := memRegWphit || ((bpw.rvalid(0) && memRegLoad) || (bpw.wvalid(0) && memRegStore)) + } + } + + val (wbException, wbCause) = checkExceptions( + List( + (wbRegException, wbRegCause), + (wbRegValid && wbRegDecodeOutput(parameter.decoderParameter.mem) && io.dmem.s2_xcpt.pf.st, Causes.store_page_fault.U), + (wbRegValid && wbRegDecodeOutput(parameter.decoderParameter.mem) && io.dmem.s2_xcpt.pf.ld, Causes.load_page_fault.U), + (wbRegValid && wbRegDecodeOutput(parameter.decoderParameter.mem) && io.dmem.s2_xcpt.gf.st, Causes.store_guest_page_fault.U), + (wbRegValid && wbRegDecodeOutput(parameter.decoderParameter.mem) && io.dmem.s2_xcpt.gf.ld, Causes.load_guest_page_fault.U), + (wbRegValid && wbRegDecodeOutput(parameter.decoderParameter.mem) && io.dmem.s2_xcpt.ae.st, Causes.store_access.U), + (wbRegValid && wbRegDecodeOutput(parameter.decoderParameter.mem) && io.dmem.s2_xcpt.ae.ld, Causes.load_access.U), + (wbRegValid && wbRegDecodeOutput(parameter.decoderParameter.mem) && io.dmem.s2_xcpt.ma.st, Causes.misaligned_store.U), + (wbRegValid && wbRegDecodeOutput(parameter.decoderParameter.mem) && io.dmem.s2_xcpt.ma.ld, Causes.misaligned_load.U) + ) + ) + + val wbCoverCauses = Seq( + (Causes.misaligned_store, "MISALIGNED_STORE"), + (Causes.misaligned_load, "MISALIGNED_LOAD"), + (Causes.store_access, "STORE_ACCESS"), + (Causes.load_access, "LOAD_ACCESS") + ) ++ + Option + .when(usingVM)( + Seq( + (Causes.store_page_fault, "STORE_PAGE_FAULT"), + (Causes.load_page_fault, "LOAD_PAGE_FAULT") + ) + ) + .getOrElse(Seq()) ++ + Option + .when(usingHypervisor)( + Seq( + (Causes.store_guest_page_fault, "STORE_GUEST_PAGE_FAULT"), + (Causes.load_guest_page_fault, "LOAD_GUEST_PAGE_FAULT") + ) + ) + .getOrElse(Seq()) +// coverExceptions(wbException, wbCause, "WRITEBACK", wbCoverCauses) + + val wbPcValid: Bool = wbRegValid || wbRegReplay || wbRegException + val wbWxd: Bool = wbRegValid && wbRegDecodeOutput(parameter.decoderParameter.wxd) + val wbSetSboard: Bool = + wbDcacheMiss || + Option.when(usingMulDiv)(wbRegDecodeOutput(parameter.decoderParameter.div)).getOrElse(false.B) || + Option + .when(usingVector) { + // 8. set Int scoreboard + wbRegDecodeOutput(parameter.decoderParameter.wxd) && wbRegDecodeOutput(parameter.decoderParameter.vector) && !wbRegDecodeOutput(parameter.decoderParameter.vectorCSR) + } + .getOrElse(false.B) + val replayWbCommon: Bool = io.dmem.s2_nack || wbRegReplay + val replayWbCsr: Bool = wbRegValid && csr.io.rwStall + val replayWb: Bool = replayWbCommon || replayWbCsr + takePcWb := replayWb || wbException || csr.io.eret || wbRegFlushPipe + + // writeback arbitration + val dmemResponseXpu: Bool = !io.dmem.resp.bits.tag(0).asBool + val dmemResponseFpu: Bool = io.dmem.resp.bits.tag(0).asBool + val dmemResponseWaddr: UInt = io.dmem.resp.bits.tag(5, 1) + val dmemResponseValid: Bool = io.dmem.resp.valid && io.dmem.resp.bits.has_data + val dmemResponseReplay: Bool = dmemResponseValid && io.dmem.resp.bits.replay + + mulDiv.io.resp.ready := !wbWxd + val longlatencyWdata: UInt = WireDefault(mulDiv.io.resp.bits.data) + val longlatencyWaddress: UInt = WireDefault(mulDiv.io.resp.bits.tag) + val longLatencyWenable: Bool = WireDefault(mulDiv.io.resp.fire) + + when(dmemResponseReplay && dmemResponseXpu) { + mulDiv.io.resp.ready := false.B + longlatencyWaddress := dmemResponseWaddr + longLatencyWenable := true.B + } + + val wbValid = wbRegValid && !replayWb && !wbException + val wbWen = wbValid && wbRegDecodeOutput(parameter.decoderParameter.wxd) + // RF is at WB stage + val rfWen = wbWen || longLatencyWenable + val rfWaddr = Mux(longLatencyWenable, longlatencyWaddress, wbWaddr) + val rfWdata = Mux( + dmemResponseValid && dmemResponseXpu, + io.dmem.resp.bits.data(xLen - 1, 0), + Mux( + longLatencyWenable, + longlatencyWdata, + Mux( + (wbRegDecodeOutput(parameter.decoderParameter.csr) =/= parameter.csrParameter.N) || Option.when(usingVector)(wbRegDecodeOutput(parameter.decoderParameter.vectorCSR)).getOrElse(false.B), + csr.io.rw.rdata, + Mux( + Option.when(usingMulDiv && pipelinedMul)(wbRegDecodeOutput(parameter.decoderParameter.mul)).getOrElse(false.B), + mul.map(_.io.resp.bits.data).getOrElse(wbRegWdata), + wbRegWdata + ) + ) + ) + ) + when(rfWen) { rf.write(rfWaddr, rfWdata) } + + // hook up control/status regfile + csr.io.ungatedClock := io.clock + csr.io.decode(0).inst := idInstruction + csr.io.exception := wbException + csr.io.cause := wbCause + csr.io.retire := wbValid + csr.io.inst(0) := ( + if (usingCompressed) + Cat(Mux(wbRegRawInstruction(1, 0).andR, wbRegInstruction >> 16, 0.U), wbRegRawInstruction(15, 0)) + else wbRegInstruction + ) + csr.io.interrupts.tileInterrupts := io.interrupts + csr.io.interrupts.buserror.foreach( _ := io.buserror ) + csr.io.hartid := io.hartid + io.fpu.map { fpu => + fpu.fcsr_rm := csr.io.fcsrRm + csr.io.fcsrFlags := fpu.fcsr_flags + fpu.time := csr.io.time(31, 0) + fpu.hartid := io.hartid + }.getOrElse { + csr.io.fcsrFlags := DontCare + } + csr.io.pc := wbRegPc + val tvalDmemAddr = !wbRegException + val tvalAnyAddr = tvalDmemAddr || + isOneOf(wbRegCause, Seq( + Causes.breakpoint.U, + Causes.fetch_access.U, + Causes.fetch_page_fault.U, + Causes.fetch_guest_page_fault.U + )) + val tvalInstruction = wbRegCause === Causes.illegal_instruction.U + val tvalValid = wbException && (tvalAnyAddr || tvalInstruction) + csr.io.gva := wbException && (tvalAnyAddr && csr.io.status.v || tvalDmemAddr && wbRegHlsOrDv) + csr.io.tval := Mux(tvalValid, encodeVirtualAddress(wbRegWdata, wbRegWdata), 0.U) + csr.io.htval := { + val htvalValidImem = wbRegException && wbRegCause === Causes.fetch_guest_page_fault.U + val htvalImem = Mux(htvalValidImem, io.imem.gpa.bits, 0.U) + assert(!htvalValidImem || io.imem.gpa.valid) + + val htvalValidDmem = + wbException && tvalDmemAddr && io.dmem.s2_xcpt.gf.asUInt.orR && !io.dmem.s2_xcpt.pf.asUInt.orR + val htvalDmem = Mux(htvalValidDmem, io.dmem.s2_gpa, 0.U) + + (htvalDmem | htvalImem) >> hypervisorExtraAddrBits + } + io.ptw.ptbr := csr.io.ptbr + io.ptw.hgatp := csr.io.hgatp + io.ptw.vsatp := csr.io.vsatp +// io.ptw.customCSRs.csrs.zip(csr.io.customCSRs).foreach { case (lhs, rhs) => lhs <> rhs } + io.ptw.status := csr.io.status + io.ptw.hstatus := csr.io.hstatus + io.ptw.gstatus := csr.io.gstatus + io.ptw.pmp := csr.io.pmp + csr.io.rw.addr := wbRegInstruction(31, 20) + csr.io.rw.cmd := parameter.csrParameter.maskCmd(wbRegValid, wbRegDecodeOutput(parameter.decoderParameter.csr)) + csr.io.rw.wdata := wbRegWdata + csr.io.vectorCsr.foreach(_ := wbRegDecodeOutput(parameter.decoderParameter.vectorCSR)) + csr.io.wbRegRS2.foreach(_ := wbRegRS2) + + io.bpwatch.zip(wbRegWphit).zip(csr.io.bp) + io.bpwatch.lazyZip(wbRegWphit).lazyZip(csr.io.bp).foreach { + case (iobpw, wphit, bp) => + iobpw.valid := wphit + iobpw.action := bp.control.action + // tie off bpwatch valids + iobpw.rvalid := false.B + iobpw.wvalid := false.B + iobpw.ivalid := false.B + } + + val hazardTargets = Seq( + (idDecodeOutput(parameter.decoderParameter.rxs1) && idRaddr1 =/= 0.U, idRaddr1), + (idDecodeOutput(parameter.decoderParameter.rxs2) && idRaddr2 =/= 0.U, idRaddr2), + (idDecodeOutput(parameter.decoderParameter.wxd) && idWaddr =/= 0.U, idWaddr) + ) + val fpHazardTargets = io.fpu.map(fpu => + Seq( + (fpu.dec.ren1, idRaddr1), + (fpu.dec.ren2, idRaddr2), + (fpu.dec.ren3, idRaddr3), + (fpu.dec.wen, idWaddr) + ) + ) + + val scoreboard: Scoreboard = new Scoreboard(32, true) + scoreboard.clear(longLatencyWenable, longlatencyWaddress) + def idScoreboardClearBypass(r: UInt): Bool = { + // ll_waddr arrives late when D$ has ECC, so reshuffle the hazard check + if (!hasDataECC) longLatencyWenable && longlatencyWaddress === r + else + mulDiv.io.resp.fire && mulDiv.io.resp.bits.tag === r || dmemResponseReplay && dmemResponseXpu && dmemResponseWaddr === r + } + val idScoreboardHazard: Bool = + checkHazards(hazardTargets, rd => scoreboard.read(rd) && !idScoreboardClearBypass(rd)) + scoreboard.set(wbSetSboard && wbWen, wbWaddr) + + // stall for RAW/WAW hazards on CSRs, loads, AMOs, and mul/div in execute stage. + val exCannotBypass: Bool = + exRegDecodeOutput(parameter.decoderParameter.csr) =/= parameter.csrParameter.N || + exRegDecodeOutput(parameter.decoderParameter.isJalr) || + exRegDecodeOutput(parameter.decoderParameter.mem) || + Option.when(usingMulDiv && pipelinedMul)(exRegDecodeOutput(parameter.decoderParameter.mul)).getOrElse(false.B) || + Option.when(usingMulDiv)(exRegDecodeOutput(parameter.decoderParameter.div)).getOrElse(false.B) || + Option.when(usingFPU)(exRegDecodeOutput(parameter.decoderParameter.fp)).getOrElse(false.B) + val dataHazardEx: Bool = exRegDecodeOutput(parameter.decoderParameter.wxd) && checkHazards(hazardTargets, _ === exWaddr) + val fpDataHazardEx: Option[Bool] = fpHazardTargets.map(fpHazardTargets => + idDecodeOutput(parameter.decoderParameter.fp) && exRegDecodeOutput(parameter.decoderParameter.wfd) && checkHazards(fpHazardTargets, _ === exWaddr) + ) + val idExHazard: Bool = exRegValid && (dataHazardEx && exCannotBypass || fpDataHazardEx.getOrElse(false.B)) + + // stall for RAW/WAW hazards on CSRs, LB/LH, and mul/div in memory stage. + // TODO: what's BH? + val memMemCmdBh: Bool = + if (fastLoadWord) (!fastLoadByte).B && memRegSlowBypass + else true.B + val memCannotBypass: Bool = + memRegDecodeOutput(parameter.decoderParameter.csr) =/= parameter.csrParameter.N || + memRegDecodeOutput(parameter.decoderParameter.mem) && memMemCmdBh || + Option.when(usingMulDiv && pipelinedMul)(memRegDecodeOutput(parameter.decoderParameter.mul)).getOrElse(false.B) || + Option.when(usingMulDiv)(memRegDecodeOutput(parameter.decoderParameter.div)).getOrElse(false.B) || + Option.when(usingFPU)(memRegDecodeOutput(parameter.decoderParameter.fp)).getOrElse(false.B) + val dataHazardMem: Bool = memRegDecodeOutput(parameter.decoderParameter.wxd) && checkHazards(hazardTargets, _ === memWaddr) + val fpDataHazardMem: Option[Bool] = fpHazardTargets.map(fpHazardTargets => + idDecodeOutput(parameter.decoderParameter.fp) && + memRegDecodeOutput(parameter.decoderParameter.wfd) && + checkHazards(fpHazardTargets, _ === memWaddr) + ) + val idMemHazard: Bool = memRegValid && (dataHazardMem && memCannotBypass || fpDataHazardMem.getOrElse(false.B)) + idLoadUse := memRegValid && dataHazardMem && memRegDecodeOutput(parameter.decoderParameter.mem) + // stall for RAW/WAW hazards on load/AMO misses and mul/div in writeback. + val dataHazardWb: Bool = wbRegDecodeOutput(parameter.decoderParameter.wxd) && checkHazards(hazardTargets, _ === wbWaddr) + val fpDataHazardWb: Bool = fpHazardTargets + .map(fpHazardTargets => + idDecodeOutput(parameter.decoderParameter.fp) && + wbRegDecodeOutput(parameter.decoderParameter.wfd) && + checkHazards(fpHazardTargets, _ === wbWaddr) + ) + .getOrElse(false.B) + val idWbHazard: Bool = wbRegValid && (dataHazardWb && wbSetSboard || fpDataHazardWb) + val idStallFpu: Bool = + io.fpu + .zip(fpHazardTargets) + .map { + case (fpu, fpHazardTargets) => + val fpScoreboard = new Scoreboard(32) + // 8. set FP scoreboard + fpScoreboard.set(((wbDcacheMiss || Option.when(usingVector)(wbRegDecodeOutput(parameter.decoderParameter.vector)).getOrElse(false.B)) && wbRegDecodeOutput(parameter.decoderParameter.wfd) || fpu.sboard_set) && wbValid, wbWaddr) + fpScoreboard.clear(dmemResponseReplay && dmemResponseFpu, dmemResponseWaddr) + io.t1.foreach(t1 => fpScoreboard.clear(t1.retire.rd.valid && t1.retire.rd.bits.isFp, t1.retire.rd.bits.rdAddress)) + fpScoreboard.clear(fpu.sboard_clr, fpu.sboard_clra) + checkHazards(fpHazardTargets, fpScoreboard.read) + } + .getOrElse(false.B) + + val dcacheBlocked: Bool = { + // speculate that a blocked D$ will unblock the cycle after a Grant + val blocked = Reg(Bool()) + blocked := !io.dmem.req.ready && io.dmem.clock_enabled && !io.dmem.perf.grant && (blocked || io.dmem.req.valid || io.dmem.s2_nack) + blocked && !io.dmem.perf.grant + } + + // vector stall + val vectorLSUEmpty: Option[Bool] = Option.when(usingVector)(Wire(Bool())) + val vectorQueueFull: Option[Bool] = Option.when(usingVector)(Wire(Bool())) + val vectorStall: Option[Bool] = Option.when(usingVector) { + val vectorLSUNotClear = + (exRegValid && exRegDecodeOutput(parameter.decoderParameter.vectorLSU)) || + (memRegValid && memRegDecodeOutput(parameter.decoderParameter.vectorLSU)) || + (wbRegValid && wbRegDecodeOutput(parameter.decoderParameter.vectorLSU)) || + !vectorLSUEmpty.get + // Vector instruction queue is full + // TODO: need cover. + (idDecodeOutput(parameter.decoderParameter.vector) && vectorQueueFull.get) || + // There is an outstanding LSU. + (idDecodeOutput(parameter.decoderParameter.mem) && !idDecodeOutput(parameter.decoderParameter.vector) && vectorLSUNotClear) + } + + // TODO: vector stall + val ctrlStalld: Bool = + idExHazard || idMemHazard || idWbHazard || idScoreboardHazard || idDoFence || idRegPause || + csr.io.csrStall || csr.io.singleStep && (exRegValid || memRegValid || wbRegValid) || + idCsrEn && csr.io.decode(0).fpCsr && !io.fpu.map(_.fcsr_rdy).getOrElse(false.B) || io.traceStall || + !clockEnable || + Option.when(usingFPU)(idDecodeOutput(parameter.decoderParameter.fp) && idStallFpu).getOrElse(false.B) || + idDecodeOutput(parameter.decoderParameter.mem) && dcacheBlocked || // reduce activity during D$ misses + Option + .when(usingMulDiv)( + idDecodeOutput( + parameter.decoderParameter.div + ) && (!(mulDiv.io.req.ready || (mulDiv.io.resp.valid && !wbWxd)) || mulDiv.io.req.valid) + ) + .getOrElse(false.B) || // reduce odds of replay + // TODO: vectorStall is large, we may need it to gate the scalar core. + vectorStall.getOrElse(false.B) + + ctrlKilled := + // IBUF not bubble + !instructionBuffer.io.inst(0).valid || + // Miss + instructionBufferOut.bits.replay || + // flush + takePcMemWb || + // + ctrlStalld || + csr.io.interrupt + + io.imem.req.valid := takePc + io.imem.req.bits.speculative := !takePcWb + // flush or branch misprediction + io.imem.req.bits.pc := Mux( + wbException || csr.io.eret, + csr.io.evec, // exception or [m|s]ret + Mux( + replayWb, + wbRegPc, // replay + memNextPC + ) + ) + io.imem.flush_icache := wbRegValid && wbRegDecodeOutput(parameter.decoderParameter.fenceI) && !io.dmem.s2_nack + io.imem.might_request := { + imemMightRequestReg := exPcValid || memPcValid /*|| io.ptw.customCSRs.disableICacheClockGate*/ + imemMightRequestReg + } + io.imem.progress := RegNext(wbRegValid && !replayWbCommon) + io.imem.sfence.valid := wbRegValid && wbRegSfence + io.imem.sfence.bits.rs1 := wbRegMemSize(0) + io.imem.sfence.bits.rs2 := wbRegMemSize(1) + io.imem.sfence.bits.addr := wbRegWdata + io.imem.sfence.bits.asid := wbRegRS2 + io.imem.sfence.bits.hv := wbRegHfenceV + io.imem.sfence.bits.hg := wbRegHfenceG + io.ptw.sfence := io.imem.sfence + + instructionBufferOut.ready := !ctrlStalld + + io.imem.btb_update.valid := memRegValid && !takePcWb && memWrongNpc && (!memCfi || memCfiTaken) + io.imem.btb_update.bits.isValid := memCfi + io.imem.btb_update.bits.cfiType := + Mux( + (memRegDecodeOutput(parameter.decoderParameter.isJal) || memRegDecodeOutput(parameter.decoderParameter.isJalr)) && memWaddr(0), + CFIType.call, + Mux( + memRegDecodeOutput(parameter.decoderParameter.isJalr) && (memRegInstruction(19, 15) & regAddrMask.U) === BitPat("b00?01"), + CFIType.ret, + Mux(memRegDecodeOutput(parameter.decoderParameter.isJal) || memRegDecodeOutput(parameter.decoderParameter.isJalr), CFIType.jump, CFIType.branch) + ) + ) + io.imem.btb_update.bits.target := io.imem.req.bits.pc + io.imem.btb_update.bits.br_pc := (if (usingCompressed) memRegPc + Mux(memRegRVC, 0.U, 2.U) else memRegPc) + io.imem.btb_update.bits.pc := ~(~io.imem.btb_update.bits.br_pc | (coreInstBytes * fetchWidth - 1).U) + io.imem.btb_update.bits.prediction := memRegBTBResponse + io.imem.btb_update.bits.taken := DontCare + + io.imem.bht_update.valid := memRegValid && !takePcWb + io.imem.bht_update.bits.pc := io.imem.btb_update.bits.pc + io.imem.bht_update.bits.taken := memBranchTaken + io.imem.bht_update.bits.mispredict := memWrongNpc + io.imem.bht_update.bits.branch := memRegDecodeOutput(parameter.decoderParameter.isBranch) + io.imem.bht_update.bits.prediction := memRegBTBResponse.bht + + // Connect RAS in Frontend + io.imem.ras_update := DontCare + + io.fpu.foreach { fpu => + fpu.valid := !ctrlKilled && idDecodeOutput(parameter.decoderParameter.fp) + fpu.killx := ctrlKillx + fpu.killm := killmCommon + fpu.inst := idInstruction + fpu.fromint_data := exRs(0) + fpu.dmem_resp_val := dmemResponseValid && dmemResponseFpu + fpu.dmem_resp_data := (if (minFLen == 32) io.dmem.resp.bits.data_word_bypass else io.dmem.resp.bits.data) + fpu.dmem_resp_type := io.dmem.resp.bits.size + fpu.dmem_resp_tag := dmemResponseWaddr +// fpu.keep_clock_enabled := io.ptw.customCSRs.disableCoreClockGate + fpu.keep_clock_enabled := false.B + } + + // TODO: T1 only logic + io.t1.foreach { t1 => + // T1 Issue + val maxCount: Int = 32 + val t1IssueQueue = Module(new Queue(chiselTypeOf(t1.issue.bits), maxCount)) + t1IssueQueue.io.enq.valid := wbRegValid && !replayWbCommon && wbRegDecodeOutput(parameter.decoderParameter.vector) + t1IssueQueue.io.enq.bits.instruction := wbRegInstruction + t1IssueQueue.io.enq.bits.rs1Data := wbRegWdata + t1IssueQueue.io.enq.bits.rs2Data := wbRegRS2 + t1IssueQueue.io.enq.bits.vtype := csr.io.csrToVector.get.vtype + t1IssueQueue.io.enq.bits.vl := csr.io.csrToVector.get.vl + t1IssueQueue.io.enq.bits.vstart := csr.io.csrToVector.get.vstart + t1IssueQueue.io.enq.bits.vcsr := csr.io.csrToVector.get.vcsr + t1.issue.valid := t1IssueQueue.io.deq.valid + t1.issue.bits := t1IssueQueue.io.deq.bits + t1IssueQueue.io.deq.ready := t1.issue.ready + // For each different retirements, it should maintain different scoreboard + val t1CSRRetireQueue: Queue[T1CSRRetire] = Module(new Queue(chiselTypeOf(t1.retire.csr.bits), maxCount)) + val t1XRDRetireQueue: Queue[T1RdRetire] = Module(new Queue(chiselTypeOf(t1.retire.rd.bits), maxCount)) + + val countWidth = log2Up(maxCount) + def counterManagement(size: Int, margin: Int = 0)(grant: Bool, release: Bool, flush: Option[Bool] = None) = { + val counter: UInt = RegInit(0.U(size.W)) + val nextCount = counter + Mux(grant, 1.U(size.W), (-1.S(size.W)).asUInt) + val updateCounter = grant ^ release + when(updateCounter) { + counter := nextCount + } + flush.foreach(f => when(f)(counter := 0.U)) + val empty = (updateCounter && nextCount === 0.U) || counter === 0.U + val fullCounter: Int = (1 << size) - 1 - margin + val full = (updateCounter && nextCount >= fullCounter.U) || counter >= fullCounter.U + (empty, full) + } + // T1 Memory Scoreboard + val t1MemoryGrant: Bool = t1IssueQueue.io.enq.valid && wbRegDecodeOutput(parameter.decoderParameter.vectorLSU) + val t1MemoryRelease: Bool = t1.retire.mem.fire + // todo: handle vector lsu in pipe + // +1: There are instructions that will enter t1 + val (lsuEmpty, _) = counterManagement(countWidth + 1)(t1MemoryGrant, t1MemoryRelease) + // T1 CSR Scoreboard + // todo: add wbRegDecodeOutput(vectorWriteCsr) + val t1CSRGrant: Bool = false.B + val t1CSRRelease: Bool = false.B // t1CSRRetireQueue.io.deq.fire + val (t1CSREmpty, _) = counterManagement(countWidth + 1)(t1CSRGrant, t1CSRRelease) + // T1 XRD Scoreboard? + + // Maintain vector counter + // There may be 4 instructions in the pipe + val (_, vectorFull) = counterManagement(countWidth, 4)(t1IssueQueue.io.enq.valid, t1.issue.fire) + vectorLSUEmpty.foreach(_ := lsuEmpty) + vectorQueueFull.foreach(_ := vectorFull) + + t1XRDRetireQueue.io.enq.valid := t1.retire.rd.valid + t1XRDRetireQueue.io.enq.bits := t1.retire.rd.bits + t1CSRRetireQueue.io.enq.valid := t1.retire.csr.valid + t1CSRRetireQueue.io.enq.bits := t1.retire.csr.bits + // todo: write csr here + t1CSRRetireQueue.io.deq.ready := true.B + + val vectorTryToWriteRd = t1XRDRetireQueue.io.deq.valid && !t1XRDRetireQueue.io.deq.bits.isFp + val vectorTryToWriteFP = t1XRDRetireQueue.io.deq.valid && t1XRDRetireQueue.io.deq.bits.isFp + t1XRDRetireQueue.io.deq.ready := (!(wbWxd || (dmemResponseReplay && dmemResponseXpu)) || !vectorTryToWriteRd) && (!(dmemResponseReplay && dmemResponseFpu) || !vectorTryToWriteFP) + + when(t1.retire.rd.fire && vectorTryToWriteRd) { + longlatencyWdata := t1.retire.rd.bits.rdData + longlatencyWaddress := t1.retire.rd.bits.rdAddress + longLatencyWenable := true.B + } + io.fpu.foreach { fpu => + when(!(dmemResponseValid && dmemResponseFpu)) { + fpu.dmem_resp_val := t1.retire.mem.fire && vectorTryToWriteFP + fpu.dmem_resp_data := t1.retire.rd.bits.rdData + // todo: 32 bit only + fpu.dmem_resp_type := 2.U + // todo: connect tag + fpu.dmem_resp_tag := 0.U + } + } + } + + io.dmem.req.valid := exRegValid && exRegDecodeOutput(parameter.decoderParameter.mem) + val ex_dcache_tag = Cat(exWaddr, Option.when(usingFPU)(exRegDecodeOutput(parameter.decoderParameter.fp)).getOrElse(false.B)) +// require(coreParams.dcacheReqTagBits >= ex_dcache_tag.getWidth) + io.dmem.req.bits.tag := ex_dcache_tag + io.dmem.req.bits.cmd := exRegDecodeOutput(parameter.decoderParameter.memCommand) + io.dmem.req.bits.size := exRegMemSize + io.dmem.req.bits.signed := !Mux(exRegHLS, exRegInstruction(20), exRegInstruction(14)) + io.dmem.req.bits.phys := false.B + io.dmem.req.bits.addr := encodeVirtualAddress(exRs(0), alu.io.adder_out) + io.dmem.req.bits.idx.foreach(_ := io.dmem.req.bits.addr) + io.dmem.req.bits.dprv := Mux(exRegHLS, csr.io.hstatus.spvp, csr.io.status.dprv) + io.dmem.req.bits.dv := exRegHLS || csr.io.status.dv + io.dmem.req.bits.no_alloc := DontCare + io.dmem.req.bits.no_xcpt := DontCare + io.dmem.req.bits.data := DontCare + io.dmem.req.bits.mask := DontCare + io.dmem.s1_data.data := io.fpu + .map(fpu => Mux(memRegDecodeOutput(parameter.decoderParameter.fp), Fill(xLen.max(fLen.get) / fLen.get, fpu.store_data), memRegRS2)) + .getOrElse(memRegRS2) + io.dmem.s1_data.mask := DontCare + + io.dmem.s1_kill := killmCommon || memLoadStoreException || fpuKillMem.getOrElse(false.B) + io.dmem.s2_kill := false.B + // don't let D$ go to sleep if we're probably going to use it soon + io.dmem.keep_clock_enabled := instructionBufferOut.valid && idDecodeOutput(parameter.decoderParameter.mem) && !csr.io.csrStall + + // gate the clock + val unpause: Bool = + csr.io.time(rocketParams.lgPauseCycles - 1, 0) === 0.U || csr.io.inhibitCycle || io.dmem.perf.release || takePc + when(unpause) { idRegPause := false.B } + io.cease := csr.io.status.cease && !clockEnableReg + io.wfi := csr.io.status.wfi + if (rocketParams.clockGate) { + longLatencyStall := csr.io.csrStall || io.dmem.perf.blocked || idRegPause && !unpause + clockEnable := clockEnableReg || exPcValid || (!longLatencyStall && io.imem.resp.valid) + clockEnableReg := + exPcValid || memPcValid || wbPcValid || // instruction in flight +// io.ptw.customCSRs.disableCoreClockGate || // chicken bit + !mulDiv.io.req.ready || // mul/div in flight + io.fpu.map(!_.fcsr_rdy).getOrElse(false.B) || // long-latency FPU in flight + io.dmem.replay_next || // long-latency load replaying + (!longLatencyStall && (instructionBufferOut.valid || io.imem.resp.valid)) // instruction pending + + assert(!(exPcValid || memPcValid || wbPcValid) || clockEnable) + } + + // evaluate performance counters + val icacheBlocked = !(io.imem.resp.valid || RegNext(io.imem.resp.valid)) + // todo: perfEvents here. +// csr.io.counters.foreach { c => c.inc := RegNext(perfEvents.evaluate(c.eventSel)) } + + // probe xrf write + val probeWire = Wire(new RocketProbe(parameter)) + define(io.rocketProbe, ProbeValue(probeWire)) + probeWire.rfWen := rfWen + probeWire.rfWaddr := rfWaddr + probeWire.rfWdata := rfWdata + } + + def checkExceptions(x: Seq[(Bool, UInt)]) = + (x.map(_._1).reduce(_ || _), PriorityMux(x)) + + def checkHazards(targets: Seq[(Bool, UInt)], cond: UInt => Bool) = + targets.map(h => h._1 && cond(h._2)).reduce(_ || _) + + def encodeVirtualAddress(a0: UInt, ea: UInt) = if (vaddrBitsExtended == vaddrBits) ea + else { + // efficient means to compress 64-bit VA into vaddrBits+1 bits + // (VA is bad if VA(vaddrBits) != VA(vaddrBits-1)) + val b = vaddrBitsExtended - 1 + val a = (a0 >> b).asSInt + val msb = Mux(a === 0.S || a === -1.S, ea(b), !ea(b - 1)) + Cat(msb, ea(b - 1, 0)) + } + + class Scoreboard(n: Int, zero: Boolean = false) { + def set(en: Bool, addr: UInt): Unit = update(en, _next | mask(en, addr)) + def clear(en: Bool, addr: UInt): Unit = update(en, _next & ~mask(en, addr)) + def read(addr: UInt): Bool = r(addr) + def readBypassed(addr: UInt): Bool = _next(addr) + + private val _r = RegInit(0.U(n.W)) + private val r = if (zero) (_r >> 1 << 1) else _r + private var _next = r + private var ens = false.B + private def mask(en: Bool, addr: UInt) = Mux(en, 1.U << addr, 0.U) + private def update(en: Bool, update: UInt) = { + _next = update + ens = ens || en + when(ens) { _r := _next } + } + } + +} + +class RegFile(n: Int, w: Int, zero: Boolean = false) { + val rf: Mem[UInt] = Mem(n, UInt(w.W)) + private def access(addr: UInt): UInt = rf(~addr(log2Ceil(n) - 1, 0)) + private val reads = collection.mutable.ArrayBuffer[(UInt, UInt)]() + private var canRead = true + def read(addr: UInt) = { + require(canRead) + reads += addr -> Wire(UInt()) + reads.last._2 := Mux(zero.B && addr === 0.U, 0.U, access(addr)) + reads.last._2 + } + def write(addr: UInt, data: UInt) = { + canRead = false + when(addr =/= 0.U) { + access(addr) := data + for ((raddr, rdata) <- reads) + when(addr === raddr) { rdata := data } + } + } +} diff --git a/rocketv/src/RocketTile.scala b/rocketv/src/RocketTile.scala new file mode 100644 index 000000000..33c2082d7 --- /dev/null +++ b/rocketv/src/RocketTile.scala @@ -0,0 +1,483 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.experimental.hierarchy.{Instance, Instantiate} +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.probe.{Probe, define} +import chisel3.util.experimental.BitSet +import chisel3.util.log2Ceil +import org.chipsalliance.amba.axi4.bundle.{AXI4BundleParameter, AXI4ROIrrevocable, AXI4RWIrrevocable} +import org.chipsalliance.rvdecoderdb.Instruction + +object RocketTileParameter { + implicit def bitSetP: upickle.default.ReadWriter[BitSet] = upickle.default + .readwriter[String] + .bimap[BitSet]( + bs => bs.terms.map("b" + _.rawString).mkString("\n"), + str => if (str.isEmpty) BitSet.empty else BitSet.fromString(str) + ) + + implicit def rwP: upickle.default.ReadWriter[RocketTileParameter] = upickle.default.macroRW[RocketTileParameter] +} + +/** + * Core: + * isa: parse from isa string + * vlen: parse from isa string, e.g. rv32imfd_zvl64b_zve32f + * priv: m|s|u + * + * Memory: + * AXI width + * PMA config + * + * uarch: + * - clockGate: sync + * - hartIdLen: log2 hart size, 1 + * - fenceIFlushDCache: flush DCache on fence.i: true + * - nPMPs: pmp region size, 8 + * - asidBits: ASID length, 0 + * - nBreakpoints: todo, 0 + * - useBPWatch: todo, false + * - mcontextWidth: todo, 0 + * - scontextWidth: todo, 0 + * - hasBeu: has bus error unit, false + * + * - fastLoadByte: todo, true + * - fastLoadWord: todo, false + * - if (fastLoadByte) io.dmem.resp.bits.data(xLen-1, 0) + * - else if (fastLoadWord) io.dmem.resp.bits.data_word_bypass(xLen-1, 0) + * - else wb_reg_wdata + * + * - mulDivLatency: + * - divUnroll: + * - divEarlyOut: + * - divEarlyOutGranularity: + * - mulUnroll: + * - mulEarlyOut: + * + * - itlbNSets: ??? + * - itlbNWays: ??? + * - itlbNSectors: ??? + * - itlbNSuperpageEntries: ??? + * + * - usingBTB: + * - btbEntries: 28 + * - btbNMatchBits: 14 + * - btbUpdatesOutOfOrder: false + * - nPages: 6 + * - nRAS: 6 + * - usingBHT: + * - nEntries: 512 + * - counterLength: 1 + * - historyLength: 8 + * - historyBits: 3 + * + * - icache/dcache size: 16K, 32K + * - cacheBlockBytes: 32 + * - cache way: 4 + * - cache banksize: 32 + * - iCachePrefetch: false, todo, AXI Hint. + */ +case class RocketTileParameter( + useAsyncReset: Boolean, + clockGate: Boolean, + instructionSets: Set[String], + priv: String, + hartIdLen: Int, + useBPWatch: Boolean, + mcontextWidth: Int, + scontextWidth: Int, + asidBits: Int, + resetVectorBits: Int, + nBreakpoints: Int, + dtlbNWays: Int, + dtlbNSets: Int, + itlbNSets: Int, + itlbNWays: Int, + itlbNSectors: Int, + itlbNSuperpageEntries: Int, + nPTECacheEntries: Int, + nL2TLBWays: Int, + nL2TLBEntries: Int, + paddrBits: Int, + cacheBlockBytes: Int, + nPMPs: Int, + legal: BitSet, + cacheable: BitSet, + read: BitSet, + write: BitSet, + putPartial: BitSet, + logic: BitSet, + arithmetic: BitSet, + exec: BitSet, + sideEffects: BitSet, + btbEntries: Int, + btbNMatchBits: Int, + btbUpdatesOutOfOrder: Boolean, + nPages: Int, + nRAS: Int, + bhtParameter: Option[BHTParameter], + mulDivLatency: Int, + divUnroll: Int, + divEarlyOut: Boolean, + divEarlyOutGranularity: Int, + mulUnroll: Int, + mulEarlyOut: Boolean, + sfmaLatency: Int, + dfmaLatency: Int, + divSqrt: Boolean, + flushOnFenceI: Boolean, + fastLoadByte: Boolean, + fastLoadWord: Boolean, + dcacheNSets: Int, + dcacheNWays: Int, + dcacheRowBits: Int, + maxUncachedInFlight: Int, + separateUncachedResp: Boolean, + iCacheNSets: Int, + iCacheNWays: Int, + iCachePrefetch: Boolean) + extends SerializableModuleParameter { + + // calculate + def usingUser: Boolean = priv.contains("u") + + def usingSupervisor: Boolean = priv.contains("s") + + def vLen: Option[Int] = instructionSets.collectFirst { + case s"zvl${vlen}b" => vlen.toInt + } + + // static for now + def hasBeu: Boolean = false + def usingNMI: Boolean = false + def usingHypervisor: Boolean = false + def usingDataScratchpad: Boolean = false + def nLocalInterrupts: Int = 0 + def dcacheArbPorts: Int = 2 + def tagECC: Option[String] = None + def dataECC: Option[String] = None + def pgLevelBits: Int = 10 - log2Ceil(xLen / 32) + def instructions: Seq[Instruction] = + org.chipsalliance.rvdecoderdb + .instructions( + org.chipsalliance.rvdecoderdb.extractResource(getClass.getClassLoader) + ) + .filter(instruction => + ( + instructionSets ++ + // Four mandatory instruction sets. + Seq("rv_i", "rv_zicsr", "rv_zifencei", "rv_system") + ).contains(instruction.instructionSet.name) + ) + .toSeq + .filter { + // special case for rv32 pseudo from rv64 + case i if i.pseudoFrom.isDefined && Seq("slli", "srli", "srai").contains(i.name) => true + case i if i.pseudoFrom.isDefined => false + case _ => true + } + .sortBy(i => (i.instructionSet.name, i.name)) + private def hasInstructionSet(setName: String): Boolean = + instructions.flatMap(_.instructionSets.map(_.name)).contains(setName) + def usingBTB: Boolean = btbEntries > 0 + def xLen: Int = + (hasInstructionSet("rv32_i"), hasInstructionSet("rv64_i")) match { + case (true, true) => throw new Exception("cannot support both rv32 and rv64 together") + case (true, false) => 32 + case (false, true) => 64 + case (false, false) => throw new Exception("no basic instruction found.") + } + def fLen: Option[Int] = + ( + hasInstructionSet("rv_f") || hasInstructionSet("rv64_f"), + hasInstructionSet("rv_d") || hasInstructionSet("rv64_d") + ) match { + case (false, false) => None + case (true, false) => Some(32) + case (false, true) => Some(64) + case (true, true) => Some(64) + } + + def usingVM = hasInstructionSet("sfence.vma") + + def pgLevels: Int = xLen match { + case 32 => 2 + case 64 => 3 + } + + def usingAtomics = hasInstructionSet("rv_a") || hasInstructionSet("rv64_a") + + def usingCompressed = hasInstructionSet("rv_c") + + def minFLen: Option[Int] = + if (hasInstructionSet("rv_zfh") || hasInstructionSet("rv64_zfh") || hasInstructionSet("rv_d_zfh")) + Some(16) + else + fLen + + def rocketParameter: RocketParameter = RocketParameter( + useAsyncReset, + clockGate, + instructionSets, + vLen.getOrElse(0), + usingUser, + hartIdLen, + nPMPs, + asidBits, + nBreakpoints, + usingBTB, + useBPWatch, + mcontextWidth, + scontextWidth, + mulDivLatency, + divUnroll, + divEarlyOut, + divEarlyOutGranularity, + mulUnroll, + mulEarlyOut, + paddrBits, + cacheBlockBytes, + hasBeu, + fastLoadByte, + fastLoadWord, + dcacheNSets, + flushOnFenceI, + usingT1 = false + ) + + def hellaCacheParameter: HellaCacheParameter = HellaCacheParameter( + useAsyncReset: Boolean, + clockGate: Boolean, + xLen: Int, + fLen.getOrElse(0): Int, + usingVM: Boolean, + paddrBits: Int, + cacheBlockBytes: Int, + dcacheNWays: Int, + dcacheNSets: Int, + dcacheRowBits: Int, + dtlbNSets: Int, + dtlbNWays: Int, + tagECC: Option[String], + dataECC: Option[String], + maxUncachedInFlight: Int, + separateUncachedResp: Boolean, + legal: BitSet, + cacheable: BitSet, + read: BitSet, + write: BitSet, + putPartial: BitSet, + logic: BitSet, + arithmetic: BitSet, + exec: BitSet, + sideEffects: BitSet + ) + + def hellaCacheArbiterParameter: HellaCacheArbiterParameter = HellaCacheArbiterParameter( + useAsyncReset: Boolean, + xLen: Int, + fLen.getOrElse(0): Int, + paddrBits: Int, + cacheBlockBytes: Int, + dcacheNSets: Int, + usingVM: Boolean, + separateUncachedResp: Boolean + ) + + def ptwParameter: PTWParameter = PTWParameter( + useAsyncReset: Boolean, + clockGate: Boolean, + usingVM: Boolean, + usingHypervisor: Boolean, + xLen: Int, + fLen.getOrElse(0): Int, + paddrBits: Int, + asidBits: Int, + pgLevels: Int, + nPTECacheEntries: Int, + nL2TLBWays: Int, + nL2TLBEntries: Int, + nPMPs: Int + ) + + def frontendParameter: FrontendParameter = FrontendParameter( + useAsyncReset: Boolean, + clockGate: Boolean, + xLen: Int, + usingAtomics: Boolean, + usingDataScratchpad: Boolean, + usingVM: Boolean, + usingCompressed: Boolean, + usingBTB: Boolean, + itlbNSets: Int, + itlbNWays: Int, + itlbNSectors: Int, + itlbNSuperpageEntries: Int, + cacheBlockBytes: Int, + iCacheNSets: Int, + iCacheNWays: Int, + iCachePrefetch: Boolean, + btbEntries: Int, + btbNMatchBits: Int, + btbUpdatesOutOfOrder: Boolean, + nPages: Int, + nRAS: Int, + nPMPs: Int, + paddrBits: Int, + pgLevels: Int, + asidBits: Int, + bhtParameter: Option[BHTParameter], + legal: BitSet, + cacheable: BitSet, + read: BitSet, + write: BitSet, + putPartial: BitSet, + logic: BitSet, + arithmetic: BitSet, + exec: BitSet, + sideEffects: BitSet + ) + + def fpuParameter: Option[FPUParameter] = fLen.zip(minFLen).map { + case (fLen, minFLen) => + FPUParameter( + useAsyncReset: Boolean, + clockGate: Boolean, + xLen: Int, + fLen: Int, + minFLen: Int, + sfmaLatency: Int, + dfmaLatency: Int, + divSqrt: Boolean, + hartIdLen: Int + ) + } + + def instructionFetchParameter: AXI4BundleParameter = frontendParameter.instructionFetchParameter + + def itimParameter: Option[AXI4BundleParameter] = frontendParameter.itimParameter + + def loadStoreParameter: AXI4BundleParameter = hellaCacheParameter.loadStoreParameter + + def dtimParameter: Option[AXI4BundleParameter] = hellaCacheParameter.dtimParameter +} + +class RocketTileInterface(parameter: RocketTileParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + + val hartid = Flipped(UInt(parameter.hartIdLen.W)) + val resetVector = Input(Const(UInt(parameter.resetVectorBits.W))) + + val debug: Bool = Input(Bool()) + val mtip: Bool = Input(Bool()) + val msip: Bool = Input(Bool()) + val meip: Bool = Input(Bool()) + val seip: Option[Bool] = Option.when(parameter.usingSupervisor)(Bool()) + val lip: Vec[Bool] = Vec(parameter.nLocalInterrupts, Bool()) + val nmi = Option.when(parameter.usingNMI)(Bool()) + val nmiInterruptVector = Option.when(parameter.usingNMI)(UInt(parameter.resetVectorBits.W)) + val nmiIxceptionVector = Option.when(parameter.usingNMI)(UInt(parameter.resetVectorBits.W)) + // TODO: buserror should be handled by NMI + val buserror: Bool = Input(Bool()) + val wfi: Bool = Output(Bool()) + val halt: Bool = Output(Bool()) + + val instructionFetchAXI: AXI4ROIrrevocable = + org.chipsalliance.amba.axi4.bundle.AXI4ROIrrevocable(parameter.instructionFetchParameter) + val itimAXI: Option[AXI4RWIrrevocable] = + parameter.itimParameter.map(p => Flipped(org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(p))) + + val loadStoreAXI: AXI4RWIrrevocable = + org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(parameter.loadStoreParameter) + val dtimAXI: Option[AXI4RWIrrevocable] = + parameter.dtimParameter.map(p => Flipped(org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(p))) + + val rocketProbe = Output(Probe(new RocketProbe(parameter.rocketParameter))) +} + +class RocketTile(val parameter: RocketTileParameter) + extends FixedIORawModule(new RocketTileInterface(parameter)) + with SerializableModule[RocketTileParameter] { + val rocket: Instance[Rocket] = Instantiate(new Rocket(parameter.rocketParameter)) + val frontend: Instance[Frontend] = Instantiate(new Frontend(parameter.frontendParameter)) + val hellaCache: Instance[HellaCache] = Instantiate(new HellaCache(parameter.hellaCacheParameter)) + val hellaCacheArbiter: Instance[HellaCacheArbiter] = Instantiate( + new HellaCacheArbiter(parameter.hellaCacheArbiterParameter) + ) + val ptw: Instance[PTW] = Instantiate(new PTW(parameter.ptwParameter)) + val fpu: Option[Instance[FPU]] = parameter.fpuParameter.map(fpuParameter => Instantiate(new FPU(fpuParameter))) + + rocket.io.clock := io.clock + rocket.io.reset := io.reset + rocket.io.hartid := io.hartid + rocket.io.interrupts.debug := io.debug + rocket.io.interrupts.mtip := io.mtip + rocket.io.interrupts.msip := io.msip + rocket.io.interrupts.meip := io.meip + rocket.io.interrupts.seip.foreach(_ := io.seip.get) + rocket.io.interrupts.lip := io.lip + rocket.io.interrupts.nmi.foreach { nmi => + nmi.rnmi := io.nmi.get + nmi.rnmi_interrupt_vector := io.nmiInterruptVector.get + nmi.rnmi_exception_vector := io.nmiIxceptionVector.get + } + // @todo make it optional + rocket.io.buserror := io.buserror + io.wfi := rocket.io.wfi + io.loadStoreAXI <> hellaCache.io.loadStoreAXI + io.dtimAXI.zip(hellaCache.io.dtimAXI).foreach { case (io, hellaCache) => io <> hellaCache } + io.instructionFetchAXI <> frontend.io.instructionFetchAXI + io.itimAXI.zip(frontend.io.itimAXI).foreach { case (io, frontend) => io <> frontend } + // design for halt and beu, only use the halt function for now. + io.halt := Seq(frontend.io.nonDiplomatic.errors.uncorrectable, hellaCache.io.errors.uncorrectable) + .flatMap(_.map(_.valid)) + .foldLeft(false.B)(_ || _) + + // rocket core io + rocket.io.imem <> frontend.io.nonDiplomatic.cpu + hellaCacheArbiter.io.requestor(0) <> rocket.io.dmem + rocket.io.ptw <> ptw.io.dpath + rocket.io.fpu.zip(fpu.map(_.io.core)).foreach { case (core, fpu) => core <> fpu } + // used by trace module + rocket.io.bpwatch := DontCare + // don't use for now, this is design for report the custom cease status. + // rocket.io.cease + // it will be used in the future w/ trace support. + rocket.io.traceStall := false.B + + // frontend io + frontend.io.clock := io.clock + frontend.io.reset := io.reset + frontend.io.resetVector := io.resetVector + ptw.io.requestor(0) <> frontend.io.nonDiplomatic.ptw + + // hellacache io + hellaCache.io.clock := io.clock + hellaCache.io.reset := io.reset + ptw.io.requestor(1) <> hellaCache.io.ptw + hellaCache.io.cpu <> hellaCacheArbiter.io.mem + + // ptw io + ptw.io.clock := io.clock + ptw.io.reset := io.reset + hellaCacheArbiter.io.requestor(1) <> ptw.io.mem + + // hellacache arbiter io + hellaCacheArbiter.io.clock := io.clock + hellaCacheArbiter.io.reset := io.reset + + fpu.foreach { fpu => + fpu.io.clock := io.clock + fpu.io.reset := io.reset + // @todo: remove it from FPU. + fpu.io.cp_req <> DontCare + fpu.io.cp_resp <> DontCare + } + + // probe + define(io.rocketProbe, rocket.io.rocketProbe) +} diff --git a/rocketv/src/TLB.scala b/rocketv/src/TLB.scala new file mode 100644 index 000000000..3ea02f4f9 --- /dev/null +++ b/rocketv/src/TLB.scala @@ -0,0 +1,678 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.experimental.hierarchy.{Instance, Instantiate, instantiable} +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util.{Cat, Decoupled, Enum, Fill, Mux1H, OHToUInt, PopCount, PriorityEncoder, UIntToOH, Valid, log2Ceil} + +object TLBParameter { + implicit def rwP: upickle.default.ReadWriter[TLBParameter] = upickle.default.macroRW[TLBParameter] +} + +case class TLBParameter( + useAsyncReset: Boolean, + xLen: Int, + nSets: Int, + nWays: Int, + nSectors: Int, + nSuperpageEntries: Int, + asidBits: Int, + pgLevels: Int, + usingHypervisor: Boolean, + usingAtomics: Boolean, + usingDataScratchpad: Boolean, + usingAtomicsOnlyForIO: Boolean, + usingVM: Boolean, + usingAtomicsInCache: Boolean, + nPMPs: Int, + pmaCheckerParameter: PMACheckerParameter, + paddrBits: Int, + isITLB: Boolean + ) extends SerializableModuleParameter { + require(nWays > nSectors, s"nWays: ${nWays} > nSectors: ${nSectors}") + // D$: log2Ceil(coreDataBytes), I$: log2Ceil(fetchBytes) + def lgMaxSize = log2Ceil(xLen / 8) + + def pmpCheckerParameter: PMPCheckerParameter = PMPCheckerParameter(nPMPs, paddrBits, lgMaxSize, pmpGranularity) + + def vpnBits: Int = vaddrBits - pgIdxBits + + def ppnBits: Int = paddrBits - pgIdxBits + + private def vpnBitsExtended: Int = vpnBits + (if (vaddrBits < xLen) 1 + (if (usingHypervisor) 1 else 0) else 0) + + def vaddrBitsExtended: Int = vpnBitsExtended + pgIdxBits + + def maxSVAddrBits: Int = pgIdxBits + pgLevels * pgLevelBits + + def maxHVAddrBits: Int = maxSVAddrBits + hypervisorExtraAddrBits + + def vaddrBits: Int = if (usingVM) { + val v = maxHVAddrBits + require(v == xLen || xLen > v && v > paddrBits) + v + } else { + // since virtual addresses sign-extend but physical addresses + // zero-extend, make room for a zero sign bit for physical addresses + (paddrBits + 1).min(xLen) + } + + def minPgLevels: Int = { + val res = xLen match { + case 32 => 2 + case 64 => 3 + } + require(pgLevels >= res) + res + } + + def pgLevelBits: Int = 10 - log2Ceil(xLen / 32) + + def maxHypervisorExtraAddrBits: Int = 2 + + def hypervisorExtraAddrBits: Int = { + if (usingHypervisor) maxHypervisorExtraAddrBits + else 0 + } + + def maxPAddrBits: Int = xLen match { + case 32 => 34 + case 64 => 56 + } + + def pgIdxBits: Int = 12 + + def pmpGranularity: Int = if (usingHypervisor) 4096 else 4 +} + +class TLBInterface(parameter: TLBParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + + /** request from Core */ + val req = Flipped(Decoupled(new TLBReq(parameter.lgMaxSize, parameter.vaddrBitsExtended))) + + /** response to Core */ + val resp = Output(new TLBResp(parameter.paddrBits, parameter.vaddrBitsExtended)) + + /** SFence Input */ + val sfence = Flipped(Valid(new SFenceReq(parameter.vaddrBits, parameter.asidBits))) + + /** IO to PTW */ + val ptw = new TLBPTWIO( + parameter.nPMPs, + parameter.vpnBits, + parameter.paddrBits, + parameter.vaddrBits, + parameter.pgLevels, + parameter.xLen, + parameter.maxPAddrBits, + parameter.pgIdxBits + ) + + /** suppress a TLB refill, one cycle after a miss */ + val kill = Input(Bool()) +} + +@instantiable +class TLB(val parameter: TLBParameter) + extends FixedIORawModule(new TLBInterface(parameter)) + with SerializableModule[TLBParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + val pmpGranularity = parameter.pmpGranularity + val vaddrBits = parameter.vaddrBits + val vaddrBitsExtended = parameter.vaddrBitsExtended + val pgIdxBits = parameter.pgIdxBits + val pgLevels = parameter.pgLevels + val minPgLevels = parameter.minPgLevels + val pgLevelBits = parameter.pgLevelBits + val hypervisorExtraAddrBits = parameter.hypervisorExtraAddrBits + val vpnBits = parameter.vpnBits + val ppnBits = parameter.ppnBits + val usingHypervisor = parameter.usingHypervisor + val usingAtomics = parameter.usingAtomics + val usingVM = parameter.usingVM + val usingDataScratchpad = parameter.usingDataScratchpad + val usingAtomicsOnlyForIO = parameter.usingAtomicsOnlyForIO + val instruction = parameter.isITLB + val usingAtomicsInCache = parameter.usingAtomicsInCache + val lgMaxSize = parameter.lgMaxSize + def M_XLR = "b00110".U + def M_XSC = "b00111".U + + def M_XA_SWAP = "b00100".U + def M_XA_XOR = "b01001".U + def M_XA_OR = "b01010".U + def M_XA_AND = "b01011".U + def M_XA_ADD = "b01000".U + def M_XA_MIN = "b01100".U + def M_XA_MAX = "b01101".U + def M_XA_MINU = "b01110".U + def M_XA_MAXU = "b01111".U + def M_PWR = "b10001".U // partial (masked) store + def M_XRD = "b00000".U; // int load + def M_HLVX = "b10000".U // HLVX instruction + def M_XWR = "b00001".U; // int store + def M_FLUSH_ALL = "b00101".U + def M_WOK = "b10111".U // check write permissions but don't perform a write + + // compatibility mode + object cfg { + val nSets: Int = parameter.nSets + val nWays: Int = parameter.nWays + val nSectors: Int = parameter.nSectors + val nSuperpageEntries: Int = parameter.nSuperpageEntries + } + object PopCountAtLeast { + private def two(x: UInt): (Bool, Bool) = x.getWidth match { + case 1 => (x.asBool, false.B) + case n => + val half = x.getWidth / 2 + val (leftOne, leftTwo) = two(x(half - 1, 0)) + val (rightOne, rightTwo) = two(x(x.getWidth - 1, half)) + (leftOne || rightOne, leftTwo || rightTwo || (leftOne && rightOne)) + } + def apply(x: UInt, n: Int): Bool = n match { + case 0 => true.B + case 1 => x.orR + case 2 => two(x)._2 + case 3 => PopCount(x) >= n.U + } + } + + // end + + val pmp: Instance[PMPChecker] = Instantiate(new PMPChecker(parameter.pmpCheckerParameter)) + + // io.ptw.customCSRs := DontCare + + val pageGranularityPMPs = pmpGranularity >= (1 << parameter.pgIdxBits) + val vpn = io.req.bits.vaddr(vaddrBits - 1, pgIdxBits) + + /** index for sectored_Entry */ + val memIdx = if (log2Ceil(cfg.nSets) == 0) 0.U else vpn(log2Ceil(cfg.nSectors) + log2Ceil(cfg.nSets) - 1, log2Ceil(cfg.nSectors)) + + /** TLB Entry */ + // val superpage: Boolean = false, val superpageOnly: Boolean = false + val sectored_entries = Reg( + Vec(cfg.nSets, Vec(cfg.nWays / cfg.nSectors, new TLBEntry(cfg.nSectors, pgLevels, vpnBits, ppnBits))) + ) + + /** Superpage Entry */ + // val superpage: Boolean = true, val superpageOnly: Boolean = true + val superpage_entries = Reg(Vec(cfg.nSuperpageEntries, new TLBEntry(1, pgLevels, vpnBits, ppnBits))) + + /** Special Entry + * + * If PMP granularity is less than page size, thus need additional "special" entry manage PMP. + */ + // val superpage: Boolean = true, val superpageOnly: Boolean = false + val special_entry = Option.when(!pageGranularityPMPs)(Reg(new TLBEntry(1, pgLevels, vpnBits, ppnBits))) + def ordinary_entries = sectored_entries(memIdx) ++ superpage_entries + def all_entries = ordinary_entries ++ special_entry + def allEntries = + sectored_entries(memIdx).map(tlb => (tlb, false, false)) ++ + superpage_entries.map(tlb => (tlb, true, true)) ++ + special_entry.map(tlb => (tlb, true, false)) + + def all_real_entries = sectored_entries.flatten ++ superpage_entries ++ special_entry + + val s_ready :: s_request :: s_wait :: s_wait_invalidate :: Nil = Enum(4) + val state = RegInit(s_ready) + // use vpn as refill_tag + val r_refill_tag = Reg(UInt(vpnBits.W)) + val r_superpage_repl_addr = Reg(UInt(log2Ceil(superpage_entries.size).W)) + val r_sectored_repl_addr = Reg(UInt(log2Ceil(sectored_entries.head.size).W)) + val r_sectored_hit = Reg(Valid(UInt(log2Ceil(sectored_entries.head.size).W))) + val r_superpage_hit = Reg(Valid(UInt(log2Ceil(superpage_entries.size).W))) + val r_vstage1_en = Reg(Bool()) + val r_stage2_en = Reg(Bool()) + val r_need_gpa = Reg(Bool()) + val r_gpa_valid = Reg(Bool()) + val r_gpa = Reg(UInt(vaddrBits.W)) + val r_gpa_vpn = Reg(UInt(vpnBits.W)) + val r_gpa_is_pte = Reg(Bool()) + + /** privilege mode */ + val priv = io.req.bits.prv + val priv_v = usingHypervisor.B && io.req.bits.v + val priv_s = priv(0) + // user mode and supervisor mode + val priv_uses_vm = priv <= PRV.S.U + val satp = Mux(priv_v, io.ptw.vsatp, io.ptw.ptbr) + val stage1_en = usingVM.B && satp.mode(satp.mode.getWidth - 1) + + /** VS-stage translation enable */ + val vstage1_en = usingHypervisor.B && priv_v && io.ptw.vsatp.mode(io.ptw.vsatp.mode.getWidth - 1) + + /** G-stage translation enable */ + val stage2_en = usingHypervisor.B && priv_v && io.ptw.hgatp.mode(io.ptw.hgatp.mode.getWidth - 1) + + /** Enable Virtual Memory when: + * 1. statically configured + * 1. satp highest bits enabled + * i. RV32: + * - 0 -> Bare + * - 1 -> SV32 + * i. RV64: + * - 0000 -> Bare + * - 1000 -> SV39 + * - 1001 -> SV48 + * - 1010 -> SV57 + * - 1011 -> SV64 + * 1. In virtualization mode, vsatp highest bits enabled + * 1. priv mode in U and S. + * 1. in H & M mode, disable VM. + * 1. no passthrough(micro-arch defined.) + * + * @see RV-priv spec 4.1.11 Supervisor Address Translation and Protection (satp) Register + * @see RV-priv spec 8.2.18 Virtual Supervisor Address Translation and Protection Register (vsatp) + */ + val vm_enabled = (stage1_en || stage2_en) && priv_uses_vm && !io.req.bits.passthrough + + // flush guest entries on vsatp.MODE Bare <-> SvXX transitions + val v_entries_use_stage1 = RegInit(false.B) + val vsatp_mode_mismatch = priv_v && (vstage1_en =/= v_entries_use_stage1) && !io.req.bits.passthrough + + // share a single physical memory attribute checker (unshare if critical path) + val refill_ppn = io.ptw.resp.bits.pte.ppn(ppnBits - 1, 0) + + /** refill signal */ + val do_refill = usingVM.B && io.ptw.resp.valid + + def isOneOf(x: UInt, s: Seq[UInt]): Bool = VecInit(s.map(x === _)).asUInt.orR + + /** sfence invalidate refill */ + val invalidate_refill = isOneOf(state, Seq(s_request /* don't care */, s_wait_invalidate)) || io.sfence.valid + // PMP + val mpu_ppn = Mux[UInt]( + do_refill, + refill_ppn, + Mux( + vm_enabled && special_entry.nonEmpty.B, + special_entry.map(e => TLBEntry.ppn(e, vpn, TLBEntry.getData(e, vpn), usingVM, pgLevelBits, true, false)).getOrElse(0.U), + io.req.bits.vaddr >> pgIdxBits + ) + ) + val mpu_physaddr = Cat(mpu_ppn, io.req.bits.vaddr(pgIdxBits - 1, 0)) + val mpu_priv = + Mux[UInt](usingVM.B && (do_refill || io.req.bits.passthrough /* PTW */ ), PRV.S.U, Cat(io.ptw.status.debug, priv)) + pmp.io.addr := mpu_physaddr + pmp.io.size := io.req.bits.size + pmp.io.pmp := (io.ptw.pmp: Seq[PMP]) + pmp.io.prv := mpu_priv + // PMA + val pma = Instantiate(new PMAChecker(parameter.pmaCheckerParameter)) + // check exist a slave can consume this address. + pma.io.paddr := mpu_physaddr + // todo: using DataScratchpad doesn't support cacheable. + def checkCacheable: Bool = pma.io.resp.cacheable + def checkR: Bool = pma.io.resp.r + def checkW: Bool = pma.io.resp.w + def checkPP: Bool = pma.io.resp.pp + def checkAL: Bool = pma.io.resp.al + def checkAA: Bool = pma.io.resp.aa + def checkX: Bool = pma.io.resp.x + def checkEFF: Bool = pma.io.resp.eff + + // In M mode, if access DM address(debug module program buffer) + // @todo val homogeneous = TLBPageLookup(edge.manager.managers, xLen, p(CacheBlockBytes), BigInt(1) << pgIdxBits)(mpu_physaddr).homogeneous + val homogeneous = true.B + // val deny_access_to_debug = mpu_priv <= PRV.M.U && p(DebugModuleKey).map(dmp => dmp.address.contains(mpu_physaddr)).getOrElse(false.B) + val deny_access_to_debug: Bool = false.B + val cacheable: Bool = checkCacheable && (instruction || !usingDataScratchpad).B + val prot_r: Bool = checkR && !deny_access_to_debug && pmp.io.r + val prot_w: Bool = checkW && !deny_access_to_debug && pmp.io.w + val prot_pp: Bool = checkPP + val prot_al: Bool = checkAL + val prot_aa: Bool = checkAA + val prot_x: Bool = checkX && !deny_access_to_debug && pmp.io.x + val prot_eff: Bool = checkEFF + + // hit check + val sector_hits = sectored_entries(memIdx).map(tlbEntry => TLBEntry.sectorHit(tlbEntry, vpn, priv_v)) + val superpage_hits = superpage_entries.map(tlbEntry => TLBEntry.hit(tlbEntry, vpn, priv_v, usingVM, pgLevelBits, hypervisorExtraAddrBits, superpage = true, superpageOnly = true)) + val hitsVec = VecInit(allEntries.map{case (tlbEntry, superpage, superpageOnly) => vm_enabled && TLBEntry.hit(tlbEntry, vpn, priv_v, usingVM: Boolean, pgLevelBits: Int, hypervisorExtraAddrBits: Int, superpage, superpageOnly)}) + val real_hits = hitsVec.asUInt + val hits = Cat(!vm_enabled, real_hits) + + // use ptw response to refill + // permission bit arrays + when(do_refill) { + val pte = io.ptw.resp.bits.pte + val refill_v = r_vstage1_en || r_stage2_en + val newEntry = Wire(new TLBEntryData(ppnBits)) + newEntry.ppn := pte.ppn + newEntry.c := cacheable + newEntry.u := pte.u + newEntry.g := pte.g && pte.v + newEntry.ae_ptw := io.ptw.resp.bits.ae_ptw + newEntry.ae_final := io.ptw.resp.bits.ae_final + newEntry.ae_stage2 := io.ptw.resp.bits.ae_final && io.ptw.resp.bits.gpa_is_pte && r_stage2_en + newEntry.pf := io.ptw.resp.bits.pf + newEntry.gf := io.ptw.resp.bits.gf + newEntry.hr := io.ptw.resp.bits.hr + newEntry.hw := io.ptw.resp.bits.hw + newEntry.hx := io.ptw.resp.bits.hx + newEntry.sr := PTE.sr(pte) + newEntry.sw := PTE.sw(pte) + newEntry.sx := PTE.sx(pte) + newEntry.pr := prot_r + newEntry.pw := prot_w + newEntry.px := prot_x + newEntry.ppp := prot_pp + newEntry.pal := prot_al + newEntry.paa := prot_aa + newEntry.eff := prot_eff + newEntry.fragmented_superpage := io.ptw.resp.bits.fragmented_superpage + // refill special_entry + when(special_entry.nonEmpty.B && !io.ptw.resp.bits.homogeneous) { + special_entry.foreach(tlbEntry => TLBEntry.insert(tlbEntry, r_refill_tag, refill_v, io.ptw.resp.bits.level, newEntry, superpageOnly = false)) + }.elsewhen(io.ptw.resp.bits.level < (pgLevels - 1).U) { + val waddr = Mux(r_superpage_hit.valid && usingHypervisor.B, r_superpage_hit.bits, r_superpage_repl_addr) + for ((e, i) <- superpage_entries.zipWithIndex) when(r_superpage_repl_addr === i.U) { + TLBEntry.insert(e, r_refill_tag, refill_v, io.ptw.resp.bits.level, newEntry, superpageOnly = true) + when(invalidate_refill) { + TLBEntry.invalidate(e) + } + } + // refill sectored_hit + }.otherwise { + val r_memIdx = if(log2Ceil(cfg.nSets) == 0) 0.U else (r_refill_tag(log2Ceil(cfg.nSectors) + log2Ceil(cfg.nSets) - 1, log2Ceil(cfg.nSectors))) + val waddr = Mux(r_sectored_hit.valid, r_sectored_hit.bits, r_sectored_repl_addr) + for ((e, i) <- sectored_entries(r_memIdx).zipWithIndex) when(waddr === i.U) { + when(!r_sectored_hit.valid) { TLBEntry.invalidate(e) } + TLBEntry.insert(e, r_refill_tag, refill_v, 0.U, newEntry, superpageOnly = false) + when(invalidate_refill) { TLBEntry.invalidate(e) } + } + } + + r_gpa_valid := io.ptw.resp.bits.gpa.valid + r_gpa := io.ptw.resp.bits.gpa.bits + r_gpa_is_pte := io.ptw.resp.bits.gpa_is_pte + } + + // get all entries data. + val entries = all_entries.map(tlbEntry => TLBEntry.getData(tlbEntry, vpn)) + val normal_entries = entries.take(ordinary_entries.size) + // parallel query PPN from [[all_entries]], if VM not enabled return VPN instead + val ppn = Mux1H( + hitsVec :+ !vm_enabled, + allEntries.zip(entries).map { case ((entry, superpage, superpageOnly), data) => TLBEntry.ppn(entry, vpn, data, usingVM, pgLevelBits: Int, superpage, superpageOnly) } :+ vpn(ppnBits - 1, 0) + ) + + val nPhysicalEntries = 1 + special_entry.size + // generally PTW misaligned load exception. + val ptw_ae_array = Cat(false.B, VecInit(entries.map(_.ae_ptw)).asUInt) + val final_ae_array = Cat(false.B, VecInit(entries.map(_.ae_final)).asUInt) + val ptw_pf_array = Cat(false.B, VecInit(entries.map(_.pf)).asUInt) + val ptw_gf_array = Cat(false.B, VecInit(entries.map(_.gf)).asUInt) + val sum = Mux(priv_v, io.ptw.gstatus.sum, io.ptw.status.sum) + // if in hypervisor/machine mode, cannot read/write user entries. + // if in superviosr/user mode, "If the SUM bit in the sstatus register is set, supervisor mode software may also access pages with U=1.(from spec)" + val priv_rw_ok = Mux(!priv_s || sum, VecInit(entries.map(_.u)).asUInt, 0.U) | Mux(priv_s, ~VecInit(entries.map(_.u)).asUInt, 0.U) + // if in hypervisor/machine mode, other than user pages, all pages are executable. + // if in superviosr/user mode, only user page can execute. + val priv_x_ok = Mux(priv_s, ~VecInit(entries.map(_.u)).asUInt, VecInit(entries.map(_.u)).asUInt) + val stage1_bypass = + Fill(entries.size, usingHypervisor.B) & (Fill(entries.size, !stage1_en) | VecInit(entries.map(_.ae_stage2)).asUInt) + val mxr = io.ptw.status.mxr | Mux(priv_v, io.ptw.gstatus.mxr, false.B) + // "The vsstatus field MXR, which makes execute-only pages readable, only overrides VS-stage page protection.(from spec)" + val r_array = + Cat(true.B, (priv_rw_ok & (VecInit(entries.map(_.sr)).asUInt | Mux(mxr, VecInit(entries.map(_.sx)).asUInt, 0.U))) | stage1_bypass) + val w_array = Cat(true.B, (priv_rw_ok & VecInit(entries.map(_.sw)).asUInt) | stage1_bypass) + val x_array = Cat(true.B, (priv_x_ok & VecInit(entries.map(_.sx)).asUInt) | stage1_bypass) + val stage2_bypass = Fill(entries.size, !stage2_en) + val hr_array = + Cat(true.B, VecInit(entries.map(_.hr)).asUInt | Mux(io.ptw.status.mxr, VecInit(entries.map(_.hx)).asUInt, 0.U) | stage2_bypass) + val hw_array = Cat(true.B, VecInit(entries.map(_.hw)).asUInt | stage2_bypass) + val hx_array = Cat(true.B, VecInit(entries.map(_.hx)).asUInt | stage2_bypass) + // These array is for each TLB entries. + // user mode can read: PMA OK, TLB OK, AE OK + val pr_array = Cat(Fill(nPhysicalEntries, prot_r), VecInit(normal_entries.map(_.pr)).asUInt) & ~(ptw_ae_array | final_ae_array) + // user mode can write: PMA OK, TLB OK, AE OK + val pw_array = Cat(Fill(nPhysicalEntries, prot_w), VecInit(normal_entries.map(_.pw)).asUInt) & ~(ptw_ae_array | final_ae_array) + // user mode can write: PMA OK, TLB OK, AE OK + val px_array = Cat(Fill(nPhysicalEntries, prot_x), VecInit(normal_entries.map(_.px)).asUInt) & ~(ptw_ae_array | final_ae_array) + // put effect + val eff_array = Cat(Fill(nPhysicalEntries, prot_eff), VecInit(normal_entries.map(_.eff)).asUInt) + // cacheable + val c_array = Cat(Fill(nPhysicalEntries, cacheable), VecInit(normal_entries.map(_.c)).asUInt) + // put partial + val ppp_array = Cat(Fill(nPhysicalEntries, prot_pp), VecInit(normal_entries.map(_.ppp)).asUInt) + // atomic arithmetic + val paa_array = Cat(Fill(nPhysicalEntries, prot_aa), VecInit(normal_entries.map(_.paa)).asUInt) + // atomic logic + val pal_array = Cat(Fill(nPhysicalEntries, prot_al), VecInit(normal_entries.map(_.pal)).asUInt) + val ppp_array_if_cached = ppp_array | c_array + val paa_array_if_cached = paa_array | (if (usingAtomicsInCache) c_array else 0.U) + val pal_array_if_cached = pal_array | (if (usingAtomicsInCache) c_array else 0.U) + val prefetchable_array = Cat((cacheable && homogeneous) << (nPhysicalEntries - 1), VecInit(normal_entries.map(_.c)).asUInt) + + // vaddr misaligned: vaddr[1:0]=b00 + val misaligned = (io.req.bits.vaddr & (UIntToOH(io.req.bits.size) - 1.U)).orR + def badVA(guestPA: Boolean): Bool = { + val additionalPgLevels = PTBR.additionalPgLevels(if (guestPA) io.ptw.hgatp else satp, pgLevels, minPgLevels) + val extraBits = if (guestPA) hypervisorExtraAddrBits else 0 + val signed = !guestPA + val nPgLevelChoices = pgLevels - minPgLevels + 1 + val minVAddrBits = pgIdxBits + minPgLevels * pgLevelBits + extraBits + VecInit((for (i <- 0 until nPgLevelChoices) yield { + val mask = ((BigInt(1) << vaddrBitsExtended) - (BigInt(1) << (minVAddrBits + i * pgLevelBits - (if(signed) 1 else 0)))).U + val maskedVAddr = io.req.bits.vaddr & mask + additionalPgLevels === i.U && !(maskedVAddr === 0.U || signed.B && maskedVAddr === mask) + })).asUInt.orR + } + val bad_gpa = + if (!usingHypervisor) false.B + else vm_enabled && !stage1_en && badVA(true) + val bad_va = + if (!usingVM || (minPgLevels == pgLevels && vaddrBits == vaddrBitsExtended)) false.B + else vm_enabled && stage1_en && badVA(false) + + val cmd_lrsc = usingAtomics.B && isOneOf(io.req.bits.cmd, Seq(M_XLR, M_XSC)) + def isAMOLogical(cmd: UInt) = isOneOf(cmd, Seq(M_XA_SWAP, M_XA_XOR, M_XA_OR, M_XA_AND)) + val cmd_amo_logical = usingAtomics.B && isAMOLogical(io.req.bits.cmd) + def isAMOArithmetic(cmd: UInt) = isOneOf(cmd, Seq(M_XA_ADD, M_XA_MIN, M_XA_MAX, M_XA_MINU, M_XA_MAXU)) + val cmd_amo_arithmetic = usingAtomics.B && isAMOArithmetic(io.req.bits.cmd) + val cmd_put_partial = io.req.bits.cmd === M_PWR + def isAMO(cmd: UInt) = isAMOLogical(cmd) || isAMOArithmetic(cmd) + def isRead(cmd: UInt) = isOneOf(cmd, Seq(M_XRD, M_HLVX, M_XLR, M_XSC)) || isAMO(cmd) + val cmd_read = isRead(io.req.bits.cmd) + val cmd_readx = usingHypervisor.B && io.req.bits.cmd === M_HLVX + def isWrite(cmd: UInt) = cmd === M_XWR || cmd === M_PWR || cmd === M_XSC || isAMO(cmd) + val cmd_write = isWrite(io.req.bits.cmd) + val cmd_write_perms = cmd_write || + isOneOf(io.req.bits.cmd, Seq(M_FLUSH_ALL, M_WOK)) // not a write, but needs write permissions + + val lrscAllowed = Mux((usingDataScratchpad || usingAtomicsOnlyForIO).B, 0.U, c_array) + val ae_array = + Mux(misaligned, eff_array, 0.U) | + Mux(cmd_lrsc, ~lrscAllowed, 0.U) + + // access exception needs SoC information from PMA + val ae_ld_array = Mux(cmd_read, ae_array | ~pr_array, 0.U) + val ae_st_array = + Mux(cmd_write_perms, ae_array | ~pw_array, 0.U) | + Mux(cmd_put_partial, ~ppp_array_if_cached, 0.U) | + Mux(cmd_amo_logical, ~pal_array_if_cached, 0.U) | + Mux(cmd_amo_arithmetic, ~paa_array_if_cached, 0.U) + val must_alloc_array = + Mux(cmd_put_partial, ~ppp_array, 0.U) | + Mux(cmd_amo_logical, ~pal_array, 0.U) | + Mux(cmd_amo_arithmetic, ~paa_array, 0.U) | + Mux(cmd_lrsc, ~0.U(pal_array.getWidth.W), 0.U) + val pf_ld_array = + Mux(cmd_read, ((~Mux(cmd_readx, x_array, r_array) & ~ptw_ae_array) | ptw_pf_array) & ~ptw_gf_array, 0.U) + val pf_st_array = Mux(cmd_write_perms, ((~w_array & ~ptw_ae_array) | ptw_pf_array) & ~ptw_gf_array, 0.U) + val pf_inst_array = ((~x_array & ~ptw_ae_array) | ptw_pf_array) & ~ptw_gf_array + val gf_ld_array = Mux(priv_v && cmd_read, ~Mux(cmd_readx, hx_array, hr_array) & ~ptw_ae_array, 0.U) + val gf_st_array = Mux(priv_v && cmd_write_perms, ~hw_array & ~ptw_ae_array, 0.U) + val gf_inst_array = Mux(priv_v, ~hx_array & ~ptw_ae_array, 0.U) + + val gpa_hits = { + val need_gpa_mask = if (instruction) gf_inst_array else gf_ld_array | gf_st_array + val hit_mask = Fill(ordinary_entries.size, r_gpa_valid && r_gpa_vpn === vpn) | Fill(all_entries.size, !vstage1_en) + hit_mask | ~need_gpa_mask(all_entries.size - 1, 0) + } + + val tlb_hit_if_not_gpa_miss = real_hits.orR + val tlb_hit = (real_hits & gpa_hits).orR + // leads to s_request + val tlb_miss = vm_enabled && !vsatp_mode_mismatch && !bad_va && !tlb_hit + + val sectored_plru = new SetAssocLRU(cfg.nSets, sectored_entries.head.size, "plru") + val superpage_plru = new PseudoLRU(superpage_entries.size) + when(io.req.valid && vm_enabled) { + // replace + when(VecInit(sector_hits).asUInt.orR) { sectored_plru.access(memIdx, OHToUInt(sector_hits)) } + when(VecInit(superpage_hits).asUInt.orR) { superpage_plru.access(OHToUInt(superpage_hits)) } + } + + // Superpages create the possibility that two entries in the TLB may match. + // This corresponds to a software bug, but we can't return complete garbage; + // we must return either the old translation or the new translation. This + // isn't compatible with the Mux1H approach. So, flush the TLB and report + // a miss on duplicate entries. + val multipleHits = PopCountAtLeast(real_hits, 2) + + // only pull up req.ready when this is s_ready state. + io.req.ready := state === s_ready + // page fault + io.resp.pf.ld := (bad_va && cmd_read) || (pf_ld_array & hits).orR + io.resp.pf.st := (bad_va && cmd_write_perms) || (pf_st_array & hits).orR + io.resp.pf.inst := bad_va || (pf_inst_array & hits).orR + // guest page fault + io.resp.gf.ld := (bad_gpa && cmd_read) || (gf_ld_array & hits).orR + io.resp.gf.st := (bad_gpa && cmd_write_perms) || (gf_st_array & hits).orR + io.resp.gf.inst := bad_gpa || (gf_inst_array & hits).orR + // access exception + io.resp.ae.ld := (ae_ld_array & hits).orR + io.resp.ae.st := (ae_st_array & hits).orR + io.resp.ae.inst := (~px_array & hits).orR + // misaligned + io.resp.ma.ld := misaligned && cmd_read + io.resp.ma.st := misaligned && cmd_write + io.resp.ma.inst := false.B // this is up to the pipeline to figure out + io.resp.cacheable := (c_array & hits).orR + io.resp.must_alloc := (must_alloc_array & hits).orR + + // io.resp.prefetchable := (prefetchable_array & hits).orR && edge.manager.managers + // .forall(m => !m.supportsAcquireB || m.supportsHint) + // .B + // prefetch range + io.resp.prefetchable := (prefetchable_array & hits).orR + io.resp.miss := do_refill || vsatp_mode_mismatch || tlb_miss || multipleHits + io.resp.paddr := Cat(ppn, io.req.bits.vaddr(pgIdxBits - 1, 0)) + io.resp.gpa_is_pte := vstage1_en && r_gpa_is_pte + io.resp.gpa := { + val page = Mux(!vstage1_en, Cat(bad_gpa, vpn), r_gpa >> pgIdxBits) + val offset = Mux(io.resp.gpa_is_pte, r_gpa(pgIdxBits - 1, 0), io.req.bits.vaddr(pgIdxBits - 1, 0)) + Cat(page, offset) + } + + io.ptw.req.valid := state === s_request + io.ptw.req.bits.valid := !io.kill + io.ptw.req.bits.bits.addr := r_refill_tag + io.ptw.req.bits.bits.vstage1 := r_vstage1_en + io.ptw.req.bits.bits.stage2 := r_stage2_en + io.ptw.req.bits.bits.need_gpa := r_need_gpa + + if (usingVM) { + when(io.ptw.req.fire && io.ptw.req.bits.valid) { + r_gpa_valid := false.B + r_gpa_vpn := r_refill_tag + } + + val sfence = io.sfence.valid + // this is [[s_ready]] + // handle miss/hit at the first cycle. + // if miss, request PTW(L2TLB). + when(io.req.fire && tlb_miss) { + state := s_request + r_refill_tag := vpn + r_need_gpa := tlb_hit_if_not_gpa_miss + r_vstage1_en := vstage1_en + r_stage2_en := stage2_en + r_superpage_repl_addr := replacementEntry(superpage_entries, superpage_plru.way) + r_sectored_repl_addr := replacementEntry(sectored_entries(memIdx), sectored_plru.way(memIdx)) + r_sectored_hit.valid := VecInit(sector_hits).asUInt.orR + r_sectored_hit.bits := OHToUInt(sector_hits) + r_superpage_hit.valid := VecInit(superpage_hits).asUInt.orR + r_superpage_hit.bits := OHToUInt(superpage_hits) + } + // Handle SFENCE.VMA when send request to PTW. + // SFENCE.VMA io.ptw.req.ready kill + // ? ? 1 + // 0 0 0 + // 0 1 0 -> s_wait + // 1 0 0 -> s_wait_invalidate + // 1 0 0 -> s_ready + when(state === s_request) { + // SFENCE.VMA will kill TLB entries based on rs1 and rs2. It will take 1 cycle. + when(sfence) { state := s_ready } + // here should be io.ptw.req.fire, but assert(io.ptw.req.ready === true.B) + // fire -> s_wait + when(io.ptw.req.ready) { state := Mux(sfence, s_wait_invalidate, s_wait) } + // If CPU kills request(frontend.s2_redirect) + when(io.kill) { state := s_ready } + } + // sfence in refill will results in invalidate + when(state === s_wait && sfence) { + state := s_wait_invalidate + } + // after CPU acquire response, go back to s_ready. + when(io.ptw.resp.valid) { + state := s_ready + } + + // SFENCE processing logic. + when(sfence) { + assert(!io.sfence.bits.rs1 || (io.sfence.bits.addr >> pgIdxBits) === vpn) + val hv = usingHypervisor.B && io.sfence.bits.hv + val hg = usingHypervisor.B && io.sfence.bits.hg + sectored_entries.flatten.foreach{ e => + when(!hg && io.sfence.bits.rs1) { TLBEntry.invalidateVPN(e, vpn, hv, usingVM, pgLevelBits, hypervisorExtraAddrBits, superpage = false, superpageOnly = false) } + .elsewhen(!hg && io.sfence.bits.rs2) { TLBEntry.invalidateNonGlobal(e, hv) } + .otherwise { TLBEntry.invalidateNonGlobal(e, hv || hg) } + } + superpage_entries.foreach { e => + when(!hg && io.sfence.bits.rs1) { TLBEntry.invalidateVPN(e, vpn, hv, usingVM, pgLevelBits, hypervisorExtraAddrBits, superpage = true, superpageOnly = true) } + .elsewhen(!hg && io.sfence.bits.rs2) { TLBEntry.invalidateNonGlobal(e, hv) } + .otherwise { TLBEntry.invalidateNonGlobal(e, hv || hg) } + } + special_entry.foreach { e => + when(!hg && io.sfence.bits.rs1) { TLBEntry.invalidateVPN(e, vpn, hv, usingVM, pgLevelBits, hypervisorExtraAddrBits, superpage = true, superpageOnly = false) } + .elsewhen(!hg && io.sfence.bits.rs2) { TLBEntry.invalidateNonGlobal(e, hv) } + .otherwise { TLBEntry.invalidateNonGlobal(e, hv || hg) } + } + } + when(io.req.fire && vsatp_mode_mismatch) { + all_real_entries.foreach(tlbEntry => TLBEntry.invalidate(tlbEntry, true.B)) + v_entries_use_stage1 := vstage1_en + } + when(multipleHits || io.reset.asBool) { + all_real_entries.foreach(tlbEntry => TLBEntry.invalidate(tlbEntry)) + } + } + + /** Decides which entry to be replaced + * + * If there is a invalid entry, replace it with priorityencoder; + * if not, replace the alt entry + * + * @return mask for TLBEntry replacement + */ + def replacementEntry(set: Seq[TLBEntry], alt: UInt) = { + val valids = VecInit(set.map(_.valid.asUInt.orR)).asUInt + Mux(valids.andR, alt, PriorityEncoder(~valids)) + } +} diff --git a/rocketv/src/csr/V.scala b/rocketv/src/csr/V.scala new file mode 100644 index 000000000..944448f13 --- /dev/null +++ b/rocketv/src/csr/V.scala @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu + +package org.chipsalliance.rocketv.csr + +import chisel3._ +import chisel3.util.log2Ceil + +// context for Vector +class V(vlen: Int, hypervisor: Boolean) { + require(Module.currentModule.isDefined) + def vlWidth: Int = log2Ceil(vlen) + 1 + def vlenbWidth = log2Ceil(vlen / 8) + val contents: Seq[String] = Seq( + "misa.V", + // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#32-vector-context-status-in-mstatus + "mstatus.VS", + // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#33-vector-context-status-in-vsstatus + "vsstatus.VS", + // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#341-vector-selected-element-width-vsew20 + "vsew", + // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#342-vector-register-grouping-vlmul20 + "vlmul", + // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#343-vector-tail-agnostic-and-vector-mask-agnostic-vta-and-vma + "vta", + "vma", + // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#344-vector-type-illegal-vill + "vill", + // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#35-vector-length-register-vl + "vl", + // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#36-vector-byte-length-vlenb + "vlenb", + // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#37-vector-start-index-csr-vstart + "vstart", + // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#38-vector-fixed-point-rounding-mode-register-vxrm + "vxrm", + // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#39-vector-fixed-point-saturation-flag-vxsat + "vxsat" + ) + def chiselType(content: String): Data = content match { + case "misa.V" => Bool() + case "mstatus.VS" => UInt(2.W) + case "vsstatus.VS" => UInt(2.W) + case "vlmul" => UInt(3.W) + case "vsew" => UInt(3.W) + case "vta" => Bool() + case "vma" => Bool() + case "vill" => Bool() + case "vl" => UInt(vlWidth.W) + case "vlenb" => UInt(vlenbWidth.W) + case "vstart" => UInt(vlWidth.W) + case "vxrm" => UInt(2.W) + case "vxsat" => UInt(2.W) + } + // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#311-state-of-vector-extension-at-reset + def reset(content: String): Option[UInt] = content match { + // 1 -> Initial; 2 -> Clean; 3 -> Dirty + case "mstatus.VS" => Some(0.U) + // It is recommended that at reset, vtype.vill is set, the remaining bits in vtype are zero, and vl is set to zero. + case "vlmul" => Some(0.U) + case "vsew" => Some(0.U) + case "vta" => Some(false.B) + case "vma" => Some(false.B) + case "vill" => Some(true.B) + // The vector extension must have a consistent state at reset. In particular, vtype and vl must have values that can be read and then restored with a single vsetvl instruction. + case "vl" => Some(0.U) + // The vstart, vxrm, vxsat CSRs can have arbitrary values at reset. + case _ => None + } + def constant(content: String): Option[UInt] = content match { + // MISA in Rocket is not writable. + case "misa.V" => Some(true.B) + case "vlenb" => Some((vlen / 8).U) + case _ => None + } + + val states: Map[String, UInt] = + (Seq( + "mstatus.VS", + "vsew", + "vlmul", + "vta", + "vma", + "vill", + "vl", + "vstart", + "vxrm", + "vxsat" + ) ++ Option.when(hypervisor)( + // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#33-vector-context-status-in-vsstatus + "vsstatus.VS" + )).map { content: String => + content -> + reset(content) + .map(resetValue => RegInit(resetValue)) + .getOrElse(Reg(chiselType(content))) + .suggestName(content) + .asUInt + }.toMap + + val constants: Map[String, UInt] = Seq( + // MISA in Rocket is not writable + "misa.V", + "vlenb" + ).map { content: String => + content -> constant(content).get + }.toMap +} diff --git a/rocketv/src/fpu/FPToFP.scala b/rocketv/src/fpu/FPToFP.scala new file mode 100644 index 000000000..1762877d2 --- /dev/null +++ b/rocketv/src/fpu/FPToFP.scala @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util.{Cat, Pipe, Valid} + +object FPToFPParameter { + implicit def rwP: upickle.default.ReadWriter[FPToFPParameter] = upickle.default.macroRW[FPToFPParameter] +} + +case class FPToFPParameter( + useAsyncReset: Boolean, + latency: Int, + xLen: Int, + fLen: Int, + minFLen: Int) + extends SerializableModuleParameter + +class FPToFPInterface(parameter: FPToFPParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + val in = Flipped(Valid(new FPInput(parameter.fLen))) + val out = Valid(new FPResult(parameter.fLen)) + val lt = Input(Bool()) // from FPToInt +} + +@instantiable +class FPToFP(val parameter: FPToFPParameter) + extends FixedIORawModule(new FPToFPInterface(parameter)) + with SerializableModule[FPToFPParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + val fLen = parameter.fLen + val minFLen = parameter.minFLen + val xLen = parameter.xLen + val latency = parameter.latency + val helper = new FPUHelper(minFLen, fLen, xLen) + val maxType = helper.maxType + val floatTypes = helper.floatTypes + def typeTag(t: FType) = helper.typeTag(t) + def sanitizeNaN(x: UInt, t: FType) = helper.sanitizeNaN(x, t) + + val in = Pipe(io.in) + + val signNum = Mux(in.bits.rm(1), in.bits.in1 ^ in.bits.in2, Mux(in.bits.rm(0), ~in.bits.in2, in.bits.in2)) + val fsgnj = Cat(signNum(fLen), in.bits.in1(fLen - 1, 0)) + + val fsgnjMux = Wire(new FPResult(parameter.fLen)) + fsgnjMux.exc := 0.U + fsgnjMux.data := fsgnj + + when(in.bits.fpuControl.wflags) { // fmin/fmax + val isnan1 = maxType.isNaN(in.bits.in1) + val isnan2 = maxType.isNaN(in.bits.in2) + val isInvalid = maxType.isSNaN(in.bits.in1) || maxType.isSNaN(in.bits.in2) + val isNaNOut = isnan1 && isnan2 + val isLHS = isnan2 || in.bits.rm(0) =/= io.lt && !isnan1 + fsgnjMux.exc := isInvalid << 4 + fsgnjMux.data := Mux(isNaNOut, maxType.qNaN, Mux(isLHS, in.bits.in1, in.bits.in2)) + } + + val inTag = in.bits.fpuControl.typeTagIn + val outTag = in.bits.fpuControl.typeTagOut + val mux = WireDefault(fsgnjMux) + for (t <- floatTypes.init) { + when(outTag === typeTag(t).U) { + mux.data := Cat(fsgnjMux.data >> t.recodedWidth, maxType.unsafeConvert(fsgnjMux.data, t)) + } + } + + when(in.bits.fpuControl.wflags && !in.bits.fpuControl.ren2) { // fcvt + if (floatTypes.size > 1) { + // widening conversions simply canonicalize NaN operands + val widened = Mux(maxType.isNaN(in.bits.in1), maxType.qNaN, in.bits.in1) + fsgnjMux.data := widened + fsgnjMux.exc := maxType.isSNaN(in.bits.in1) << 4 + + // narrowing conversions require rounding (for RVQ, this could be + // optimized to use a single variable-position rounding unit, rather + // than two fixed-position ones) + for (outType <- floatTypes.init) + when(outTag === typeTag(outType).U && ((typeTag(outType) == 0).B || outTag < inTag)) { + val narrower = Module(new hardfloat.RecFNToRecFN(maxType.exp, maxType.sig, outType.exp, outType.sig)) + narrower.io.in := in.bits.in1 + narrower.io.roundingMode := in.bits.rm + narrower.io.detectTininess := hardfloat.consts.tininess_afterRounding + val narrowed = sanitizeNaN(narrower.io.out, outType) + mux.data := Cat(fsgnjMux.data >> narrowed.getWidth, narrowed) + mux.exc := narrower.io.exceptionFlags + } + } + } + + io.out <> Pipe(in.valid, mux, latency - 1) +} diff --git a/rocketv/src/fpu/FPToInt.scala b/rocketv/src/fpu/FPToInt.scala new file mode 100644 index 000000000..9faa3f3ef --- /dev/null +++ b/rocketv/src/fpu/FPToInt.scala @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util._ + +object FPToIntParameter { + implicit def rwP: upickle.default.ReadWriter[FPToIntParameter] = upickle.default.macroRW[FPToIntParameter] +} + +case class FPToIntParameter( + useAsyncReset: Boolean, + xLen: Int, + fLen: Int, + minFLen: Int) + extends SerializableModuleParameter +class FPToIntInterface(parameter: FPToIntParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + val in = Flipped(Valid(new FPInput(parameter.fLen))) + val out = Valid(new FPToIntOutput(parameter.fLen, parameter.xLen)) +} + +@instantiable +class FPToInt(val parameter: FPToIntParameter) + extends FixedIORawModule(new FPToIntInterface(parameter)) + with SerializableModule[FPToIntParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + val minFLen: Int = parameter.minFLen + val fLen: Int = parameter.fLen + val xLen: Int = parameter.xLen + val helper = new FPUHelper(minFLen, fLen, xLen) + val maxExpWidth = helper.maxExpWidth + val maxSigWidth = helper.maxSigWidth + val floatTypes = helper.floatTypes + val maxType = helper.maxType + val minXLen = helper.minXLen + val nIntTypes = helper.nIntTypes + def ieee(x: UInt, t: FType = maxType) = helper.ieee(x, t) + + val in = RegEnable(io.in.bits, io.in.valid) + val valid = RegNext(io.in.valid) + + def sextTo(x: UInt, n: Int): UInt = { + require(x.getWidth <= n) + if (x.getWidth == n) x + else Cat(Fill(n - x.getWidth, x(x.getWidth - 1)), x) + } + + val dcmp = Module(new hardfloat.CompareRecFN(maxExpWidth, maxSigWidth)) + dcmp.io.a := in.in1 + dcmp.io.b := in.in2 + dcmp.io.signaling := !in.rm(1) + + val tag = in.fpuControl.typeTagOut + val store = VecInit( + floatTypes.map(t => + if (t == FType.H) Fill(maxType.ieeeWidth / minXLen, sextTo(ieee(in.in1)(15, 0), minXLen)) + else Fill(maxType.ieeeWidth / t.ieeeWidth, ieee(in.in1)(t.ieeeWidth - 1, 0)) + ) + )(tag) + val toint = WireDefault(store) + val intType = WireDefault(in.fmt(0)) + io.out.bits.store := store + io.out.bits.toint := VecInit( + (0 until helper.nIntTypes).map(i => sextTo(toint((helper.minXLen << i) - 1, 0), xLen)): Seq[UInt] + )(intType) + io.out.bits.exc := 0.U + + when(in.rm(0)) { + val classify_out = VecInit(floatTypes.map(t => t.classify(maxType.unsafeConvert(in.in1, t))))(tag) + toint := classify_out | (store >> minXLen << minXLen) + intType := false.B + } + + when(in.fpuControl.wflags) { // feq/flt/fle, fcvt + toint := (~in.rm & Cat(dcmp.io.lt, dcmp.io.eq)).orR | (store >> minXLen << minXLen) + io.out.bits.exc := dcmp.io.exceptionFlags + intType := false.B + + when(!in.fpuControl.ren2) { // fcvt + val cvtType = if (log2Ceil(nIntTypes) == 0) 0.U else in.typ(log2Ceil(nIntTypes), 1) + intType := cvtType + val conv = Module(new hardfloat.RecFNToIN(maxExpWidth, maxSigWidth, xLen)) + conv.io.in := in.in1 + conv.io.roundingMode := in.rm + conv.io.signedOut := ~in.typ(0) + toint := conv.io.out + io.out.bits.exc := Cat(conv.io.intExceptionFlags(2, 1).orR, 0.U(3.W), conv.io.intExceptionFlags(0)) + + for (i <- 0 until nIntTypes - 1) { + val w = minXLen << i + when(cvtType === i.U) { + val narrow = Module(new hardfloat.RecFNToIN(maxExpWidth, maxSigWidth, w)) + narrow.io.in := in.in1 + narrow.io.roundingMode := in.rm + narrow.io.signedOut := ~in.typ(0) + + val excSign = in.in1(maxExpWidth + maxSigWidth) && !maxType.isNaN(in.in1) + val excOut = Cat(conv.io.signedOut === excSign, Fill(w - 1, !excSign)) + val invalid = conv.io.intExceptionFlags(2) || narrow.io.intExceptionFlags(1) + when(invalid) { toint := Cat(conv.io.out >> w, excOut) } + io.out.bits.exc := Cat(invalid, 0.U(3.W), !invalid && conv.io.intExceptionFlags(0)) + } + } + } + } + + io.out.valid := valid + io.out.bits.lt := dcmp.io.lt || (dcmp.io.a.asSInt < 0.S && dcmp.io.b.asSInt >= 0.S) + io.out.bits.in := in +} diff --git a/rocketv/src/fpu/FPUFMAPipe.scala b/rocketv/src/fpu/FPUFMAPipe.scala new file mode 100644 index 000000000..761ffd5a4 --- /dev/null +++ b/rocketv/src/fpu/FPUFMAPipe.scala @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.experimental.hierarchy.{Instance, Instantiate, instantiable} +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util.{Pipe, Valid} + +object FPUFMAPipeParameter { + implicit def rwP: upickle.default.ReadWriter[FPUFMAPipeParameter] = upickle.default.macroRW[FPUFMAPipeParameter] +} + +case class FPUFMAPipeParameter( + useAsyncReset: Boolean, + latency: Int, + xLen: Int, + fLen: Int, + minFLen: Int, + t: FType) + extends SerializableModuleParameter { + require(latency > 0) +} + +class FPUFMAPipeInterface(parameter: FPUFMAPipeParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + val in = Flipped(Valid(new FPInput(parameter.fLen))) + val out = Valid(new FPResult(parameter.fLen)) +} + +@instantiable +class FPUFMAPipe(val parameter: FPUFMAPipeParameter) + extends FixedIORawModule(new FPUFMAPipeInterface(parameter)) + with SerializableModule[FPUFMAPipeParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + val fLen = parameter.fLen + val t = parameter.t + val minFLen: Int = parameter.minFLen + val xLen: Int = parameter.xLen + val latency: Int = parameter.latency + val helper = new FPUHelper(minFLen, fLen, xLen) + def sanitizeNaN(x: UInt, t: FType): UInt = helper.sanitizeNaN(x, t) + + val valid = RegNext(io.in.valid) + val in = Reg(new FPInput(fLen)) + when(io.in.valid) { + val one = 1.U << (t.sig + t.exp - 1) + val zero = (io.in.bits.in1 ^ io.in.bits.in2) & (1.U << (t.sig + t.exp)) + val cmd_fma = io.in.bits.fpuControl.ren3 + val cmd_addsub = io.in.bits.fpuControl.swap23 + in := io.in.bits + when(cmd_addsub) { in.in2 := one } + when(!(cmd_fma || cmd_addsub)) { in.in3 := zero } + } + + val fma: Instance[MulAddRecFNPipe] = Instantiate( + new MulAddRecFNPipe(MulAddRecFNPipeParameter(parameter.useAsyncReset, (latency - 1).min(2), t.exp, t.sig)) + ) + fma.io.clock := io.clock + fma.io.reset := io.reset + fma.io.validin := valid + fma.io.op := in.fmaCmd + fma.io.roundingMode := in.rm + fma.io.detectTininess := hardfloat.consts.tininess_afterRounding + fma.io.a := in.in1 + fma.io.b := in.in2 + fma.io.c := in.in3 + + val res = Wire(new FPResult(parameter.fLen)) + res.data := sanitizeNaN(fma.io.out, t) + res.exc := fma.io.exceptionFlags + + io.out := Pipe(fma.io.validout, res, (latency - 3).max(0)) +} diff --git a/rocketv/src/fpu/IntToFP.scala b/rocketv/src/fpu/IntToFP.scala new file mode 100644 index 000000000..e2be64c01 --- /dev/null +++ b/rocketv/src/fpu/IntToFP.scala @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util.{Cat, Pipe, Valid, log2Ceil} + +object IntToFPParameter { + implicit def rwP: upickle.default.ReadWriter[IntToFPParameter] = upickle.default.macroRW[IntToFPParameter] +} + +case class IntToFPParameter( + useAsyncReset: Boolean, + latency: Int, + fLen: Int, + xLen: Int, + minFLen: Int) + extends SerializableModuleParameter { + val minXLen = 32 +} +class IntToFPInterface(parameter: IntToFPParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + val in = Flipped(Valid(new IntToFPInput(parameter.xLen))) + val out = Valid(new FPResult(parameter.fLen)) +} + +@instantiable +class IntToFP(val parameter: IntToFPParameter) + extends FixedIORawModule(new IntToFPInterface(parameter)) + with SerializableModule[IntToFPParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + // retime + val latency: Int = parameter.latency + val fLen: Int = parameter.fLen + val minFLen: Int = parameter.minFLen + val minXLen: Int = parameter.minXLen + val xLen: Int = parameter.xLen + val helper = new FPUHelper(minFLen: Int, fLen: Int, xLen: Int) + def recode(x: UInt, tag: UInt) = helper.recode(x, tag) + val nIntTypes: Int = helper.nIntTypes + val floatTypes: Seq[FType] = helper.floatTypes + def sanitizeNaN(x: UInt, t: FType) = helper.sanitizeNaN(x, t) + + val in = Pipe(io.in) + val tag = in.bits.fpuControl.typeTagIn + + val mux = Wire(new FPResult(fLen)) + mux.exc := 0.U + mux.data := recode(in.bits.in1, tag) + + val intValue = { + val res = WireDefault(in.bits.in1.asSInt) + for (i <- 0 until nIntTypes - 1) { + val smallInt = in.bits.in1((minXLen << i) - 1, 0) + when(in.bits.typ(log2Ceil(nIntTypes), 1) === i.U) { + res := Mux(in.bits.typ(0), smallInt.zext, smallInt.asSInt) + } + } + res.asUInt + } + + when(in.bits.fpuControl.wflags) { // fcvt + // could be improved for RVD/RVQ with a single variable-position rounding + // unit, rather than N fixed-position ones + val i2fResults = for (t <- floatTypes) yield { + val i2f = Module(new hardfloat.INToRecFN(xLen, t.exp, t.sig)) + i2f.io.signedIn := ~in.bits.typ(0) + i2f.io.in := intValue + i2f.io.roundingMode := in.bits.rm + i2f.io.detectTininess := hardfloat.consts.tininess_afterRounding + (sanitizeNaN(i2f.io.out, t), i2f.io.exceptionFlags) + } + + val (data, exc) = i2fResults.unzip + val dataPadded = data.init.map(d => Cat(data.last >> d.getWidth, d)) :+ data.last + mux.data := VecInit(dataPadded)(tag) + mux.exc := VecInit(exc)(tag) + } + + io.out <> Pipe(in.valid, mux, latency - 1) +} diff --git a/rocketv/src/fpu/MulAddRecFNPipe.scala b/rocketv/src/fpu/MulAddRecFNPipe.scala new file mode 100644 index 000000000..48dc9e4b9 --- /dev/null +++ b/rocketv/src/fpu/MulAddRecFNPipe.scala @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util.Pipe + +object MulAddRecFNPipeParameter { + implicit def rwP: upickle.default.ReadWriter[MulAddRecFNPipeParameter] = upickle.default.macroRW[MulAddRecFNPipeParameter] +} + +case class MulAddRecFNPipeParameter( + useAsyncReset: Boolean, + latency: Int, + expWidth: Int, + sigWidth: Int) + extends SerializableModuleParameter { + require(latency <= 2) +} + +class MulAddRecFNPipeInterface(parameter: MulAddRecFNPipeParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + val validin = Input(Bool()) + val op = Input(UInt(2.W)) + val a = Input(UInt((parameter.expWidth + parameter.sigWidth + 1).W)) + val b = Input(UInt((parameter.expWidth + parameter.sigWidth + 1).W)) + val c = Input(UInt((parameter.expWidth + parameter.sigWidth + 1).W)) + val roundingMode = Input(UInt(3.W)) + val detectTininess = Input(UInt(1.W)) + val out = Output(UInt((parameter.expWidth + parameter.sigWidth + 1).W)) + val exceptionFlags = Output(UInt(5.W)) + val validout = Output(Bool()) +} + +@instantiable +class MulAddRecFNPipe(val parameter: MulAddRecFNPipeParameter) + extends FixedIORawModule(new MulAddRecFNPipeInterface(parameter)) + with SerializableModule[MulAddRecFNPipeParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + val latency: Int = parameter.latency + val expWidth: Int = parameter.expWidth + val sigWidth: Int = parameter.sigWidth + //------------------------------------------------------------------------ + //------------------------------------------------------------------------ + + val mulAddRecFNToRaw_preMul = Module(new hardfloat.MulAddRecFNToRaw_preMul(expWidth, sigWidth)) + val mulAddRecFNToRaw_postMul = Module(new hardfloat.MulAddRecFNToRaw_postMul(expWidth, sigWidth)) + + mulAddRecFNToRaw_preMul.io.op := io.op + mulAddRecFNToRaw_preMul.io.a := io.a + mulAddRecFNToRaw_preMul.io.b := io.b + mulAddRecFNToRaw_preMul.io.c := io.c + + val mulAddResult = + (mulAddRecFNToRaw_preMul.io.mulAddA * + mulAddRecFNToRaw_preMul.io.mulAddB) +& + mulAddRecFNToRaw_preMul.io.mulAddC + + val valid_stage0 = Wire(Bool()) + val roundingMode_stage0 = Wire(UInt(3.W)) + val detectTininess_stage0 = Wire(UInt(1.W)) + + val postmul_regs = if (latency > 0) 1 else 0 + mulAddRecFNToRaw_postMul.io.fromPreMul := Pipe(io.validin, mulAddRecFNToRaw_preMul.io.toPostMul, postmul_regs).bits + mulAddRecFNToRaw_postMul.io.mulAddResult := Pipe(io.validin, mulAddResult, postmul_regs).bits + mulAddRecFNToRaw_postMul.io.roundingMode := Pipe(io.validin, io.roundingMode, postmul_regs).bits + roundingMode_stage0 := Pipe(io.validin, io.roundingMode, postmul_regs).bits + detectTininess_stage0 := Pipe(io.validin, io.detectTininess, postmul_regs).bits + valid_stage0 := Pipe(io.validin, false.B, postmul_regs).valid + + //------------------------------------------------------------------------ + //------------------------------------------------------------------------ + + val roundRawFNToRecFN = Module(new hardfloat.RoundRawFNToRecFN(expWidth, sigWidth, 0)) + + val round_regs = if (latency == 2) 1 else 0 + roundRawFNToRecFN.io.invalidExc := Pipe(valid_stage0, mulAddRecFNToRaw_postMul.io.invalidExc, round_regs).bits + roundRawFNToRecFN.io.in := Pipe(valid_stage0, mulAddRecFNToRaw_postMul.io.rawOut, round_regs).bits + roundRawFNToRecFN.io.roundingMode := Pipe(valid_stage0, roundingMode_stage0, round_regs).bits + roundRawFNToRecFN.io.detectTininess := Pipe(valid_stage0, detectTininess_stage0, round_regs).bits + io.validout := Pipe(valid_stage0, false.B, round_regs).valid + + roundRawFNToRecFN.io.infiniteExc := false.B + + io.out := roundRawFNToRecFN.io.out + io.exceptionFlags := roundRawFNToRecFN.io.exceptionFlags +} diff --git a/script/emu/src/Main.scala b/script/emu/src/Main.scala index 4411b8cf1..c8bcec37f 100644 --- a/script/emu/src/Main.scala +++ b/script/emu/src/Main.scala @@ -62,7 +62,7 @@ object Main: "--no-link", "--print-out-paths", "--no-warn-dirty", - s".#t1.${config}.${caseAttrRoot}.${caseName}" + s".#t1.${config}.ip.${caseAttrRoot}.${caseName}" ) Logger.trace( s"Running `${nixArgs.mkString(" ")}` to get test case ELF file" @@ -80,7 +80,7 @@ object Main: ): os.Path = // FIXME: replace with actual trace emulator here val target = - if (isTrace) then s"${emuType}.difftest" else s"${emuType}.difftest" + if (isTrace) then s"${emuType}.verilator-emu" else s"${emuType}.verilator-emu" val nixArgs = Seq( "nix", "build", diff --git a/t1/src/Lane.scala b/t1/src/Lane.scala index aeb388ae8..59a7eb8a6 100644 --- a/t1/src/Lane.scala +++ b/t1/src/Lane.scala @@ -27,7 +27,7 @@ class LaneOM extends Class { vfus := vfusIn } -class LaneSlotProbe(instructionIndexBit: Int) extends Bundle { +class LaneSlotProbe(instructionIndexBits: Int) extends Bundle { val stage0EnqueueReady: Bool = Bool() val stage0EnqueueValid: Bool = Bool() val changingMaskSet: Bool = Bool() @@ -44,29 +44,26 @@ class LaneSlotProbe(instructionIndexBit: Int) extends Bundle { // write queue enq for lane val writeQueueEnq: Bool = Bool() - val writeTag: UInt = UInt(instructionIndexBit.W) + val writeTag: UInt = UInt(instructionIndexBits.W) val writeMask: UInt = UInt(4.W) } -class LaneWriteProbe(instructionIndexBit: Int) extends Bundle { - val writeTag: UInt = UInt(instructionIndexBit.W) +class LaneWriteProbe(instructionIndexBits: Int) extends Bundle { + val writeTag: UInt = UInt(instructionIndexBits.W) val writeMask: UInt = UInt(4.W) } -class LaneProbe(slotsSize: Int, instructionIndexBit: Int) extends Bundle { - val slots = Vec(slotsSize, new LaneSlotProbe(instructionIndexBit)) - // @todo @Clo91eaf remove valid here, add stall := valid & !ready - val laneRequestValid: Bool = Bool() - // @todo remove it. - val laneRequestReady: Bool = Bool() +class LaneProbe(parameter: LaneParameter) extends Bundle { + val slots = Vec(parameter.chainingSize, new LaneSlotProbe(parameter.instructionIndexBits)) + val laneRequestStall: Bool = Bool() // @todo @Clo91eaf change to occupied for each slot. val lastSlotOccupied: Bool = Bool() - // @todo replace it with VRFProbe - val vrfInstructionWriteReportReady: Bool = Bool() - val instructionFinished: UInt = UInt(slotsSize.W) - val instructionValid: UInt = UInt(slotsSize.W) + val instructionFinished: UInt = UInt(parameter.chainingSize.W) + val instructionValid: UInt = UInt(parameter.chainingSize.W) + + val crossWriteProbe: Vec[ValidIO[LaneWriteProbe]] = Vec(2, Valid(new LaneWriteProbe(parameter.instructionIndexBits))) - val crossWriteProbe: Vec[ValidIO[LaneWriteProbe]] = Vec(2, Valid(new LaneWriteProbe(instructionIndexBit))) + val vrfProbe: VRFProbe = new VRFProbe(parameter.vrfParam) } object LaneParameter { @@ -314,16 +311,9 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ val vrfReadyToStore: Bool = IO(Output(Bool())) @public - val probe: LaneProbe = IO(Output(Probe(new LaneProbe(parameter.chainingSize, parameter.instructionIndexBits)))) - val probeWire: LaneProbe = Wire(new LaneProbe(parameter.chainingSize, parameter.instructionIndexBits)) - define(probe, ProbeValue(probeWire)) - @public - val vrfProbe = IO(Output(Probe(new VRFProbe( - parameter.vrfParam.regNumBits, - parameter.vrfOffsetBits, - parameter.instructionIndexBits, - parameter.datapathWidth - )))) + val laneProbe = IO(Output(Probe(new LaneProbe(parameter)))) + val probeWire = Wire(new LaneProbe(parameter)) + define(laneProbe, ProbeValue(probeWire)) @public val vrfAllocateIssue: Bool = IO(Output(Bool())) @@ -333,7 +323,6 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ /** VRF instantces. */ val vrf: Instance[VRF] = Instantiate(new VRF(parameter.vrfParam)) - define(vrfProbe, vrf.probe) /** TODO: review later */ @@ -1222,10 +1211,8 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ tokenManager.topWriteDeq.bits := allVrfWriteAfterCheck(parameter.chainingSize).instructionIndex // probe wire - probeWire.laneRequestValid := laneRequest.valid - probeWire.laneRequestReady := laneRequest.ready + probeWire.laneRequestStall := laneRequest.valid && !laneRequest.ready probeWire.lastSlotOccupied := slotOccupied.last - probeWire.vrfInstructionWriteReportReady := vrf.instructionWriteReport.ready probeWire.instructionFinished := instructionFinished probeWire.instructionValid := instructionValid probeWire.crossWriteProbe.zip(writeBusPort).foreach {case (pb, port) => @@ -1233,4 +1220,5 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ pb.bits.writeTag := port.deq.bits.instructionIndex pb.bits.writeMask := port.deq.bits.mask } + probeWire.vrfProbe := probe.read(vrf.vrfProbe) } diff --git a/t1/src/LaneZvbb.scala b/t1/src/LaneZvbb.scala new file mode 100644 index 000000000..a438f363c --- /dev/null +++ b/t1/src/LaneZvbb.scala @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 Jiuyang Liu + +package org.chipsalliance.t1.rtl + +import chisel3.experimental.hierarchy.instantiable +import chisel3._ +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util._ +import org.chipsalliance.t1.rtl.decoder.{BoolField, Decoder} + +object LaneZvbbParam { + implicit def rw: upickle.default.ReadWriter[LaneZvbbParam] = upickle.default.macroRW +} + +case class LaneZvbbParam(datapathWidth: Int, latency: Int) extends VFUParameter with SerializableModuleParameter { + val inputBundle = new LaneZvbbRequest(datapathWidth) + val decodeField: BoolField = Decoder.zvbb + val outputBundle = new LaneZvbbResponse(datapathWidth) + override val NeedSplit: Boolean = false +} + +class LaneZvbbRequest(datapathWidth: Int) extends VFUPipeBundle { + val src = Vec(3, UInt(datapathWidth.W)) + val opcode = UInt(4.W) + val vSew = UInt(2.W) + val shifterSize = UInt(log2Ceil(datapathWidth).W) +} + +class LaneZvbbResponse(datapathWidth: Int) extends VFUPipeBundle { + val data = UInt(datapathWidth.W) +} + +@instantiable +class LaneZvbb(val parameter: LaneZvbbParam) + extends VFUModule(parameter) with SerializableModule[LaneZvbbParam]{ + val response: LaneZvbbResponse = Wire(new LaneZvbbResponse(parameter.datapathWidth)) + val request : LaneZvbbRequest = connectIO(response).asTypeOf(parameter.inputBundle) + + val zvbbSrc: UInt = request.src(1) // vs2 + val zvbbRs: UInt = request.src(0) // vs1 or rs1 + val vSew: UInt = UIntToOH(request.vSew) // sew = 0, 1, 2 + + val zvbbBRev = VecInit(zvbbSrc.asBools.reverse).asUInt // element's bit reverse + val zvbbBRev8 = VecInit(zvbbSrc.asBools.grouped(8).map(s => VecInit(s.reverse)).toSeq).asUInt // byte's bit reverse + val zvbbRev8 = VecInit(zvbbSrc.asBools.grouped(8).map(s => VecInit(s)).toSeq.reverse).asUInt // element's byte reverse + + val zvbbSrc16a = zvbbSrc(parameter.datapathWidth-1, parameter.datapathWidth-16) + val zvbbSrc16b = zvbbSrc(parameter.datapathWidth-17, parameter.datapathWidth-32) + val zvbbSrc8a = zvbbSrc(parameter.datapathWidth-1, parameter.datapathWidth-8) + val zvbbSrc8b = zvbbSrc(parameter.datapathWidth-9, parameter.datapathWidth-16) + val zvbbSrc8c = zvbbSrc(parameter.datapathWidth-17, parameter.datapathWidth-24) + val zvbbSrc8d = zvbbSrc(parameter.datapathWidth-25, parameter.datapathWidth-32) + + val zvbbRs16a = zvbbRs(parameter.datapathWidth-1, parameter.datapathWidth-16) + val zvbbRs16b = zvbbRs(parameter.datapathWidth-17, parameter.datapathWidth-32) + val zvbbRs8a = zvbbRs(parameter.datapathWidth-1, parameter.datapathWidth-8) + val zvbbRs8b = zvbbRs(parameter.datapathWidth-9, parameter.datapathWidth-16) + val zvbbRs8c = zvbbRs(parameter.datapathWidth-17, parameter.datapathWidth-24) + val zvbbRs8d = zvbbRs(parameter.datapathWidth-25, parameter.datapathWidth-32) + + val zero32: UInt = 0.U(32.W) + val zero16: UInt = 0.U(16.W) + val zero10: UInt = 0.U(11.W) + val zero8: UInt = 0.U(8.W) + val zero3: UInt = 0.U(4.W) + + val zvbbCLZ32: UInt = (32.U - PopCount(scanRightOr(zvbbSrc))).asUInt + val zvbbCLZ16: UInt = { + val clz16a: UInt = (16.U - PopCount(scanRightOr(zvbbSrc16a))).asUInt(4, 0) + val clz16b: UInt = (16.U - PopCount(scanRightOr(zvbbSrc16b))).asUInt(4, 0) + zero10 ## clz16a ## zero10 ## clz16b + } + val zvbbCLZ8: UInt = { + val clz8a: UInt = (8.U - PopCount(scanRightOr(zvbbSrc8a))).asUInt(3, 0) + val clz8b: UInt = (8.U - PopCount(scanRightOr(zvbbSrc8b))).asUInt(3, 0) + val clz8c: UInt = (8.U - PopCount(scanRightOr(zvbbSrc8c))).asUInt(3, 0) + val clz8d: UInt = (8.U - PopCount(scanRightOr(zvbbSrc8d))).asUInt(3, 0) + zero3 ## clz8a ## zero3 ## clz8b ## zero3 ## clz8c ## zero3 ## clz8d + } + val zvbbCLZ: UInt = Mux1H(vSew, Seq( + zvbbCLZ8, + zvbbCLZ16, + zvbbCLZ32, + )) + + val zvbbCTZ32 = (32.U - PopCount(scanLeftOr(zvbbSrc))).asUInt + val zvbbCTZ16: UInt = { + val ctz16a: UInt = (16.U - PopCount(scanLeftOr(zvbbSrc16a))).asUInt(4, 0) + val ctz16b: UInt = (16.U - PopCount(scanLeftOr(zvbbSrc16b))).asUInt(4, 0) + zero10 ## ctz16a ## zero10 ## ctz16b + } + val zvbbCTZ8: UInt = { + val ctz8a: UInt = (8.U - PopCount(scanLeftOr(zvbbSrc8a))).asUInt(3, 0) + val ctz8b: UInt = (8.U - PopCount(scanLeftOr(zvbbSrc8b))).asUInt(3, 0) + val ctz8c: UInt = (8.U - PopCount(scanLeftOr(zvbbSrc8c))).asUInt(3, 0) + val ctz8d: UInt = (8.U - PopCount(scanLeftOr(zvbbSrc8d))).asUInt(3, 0) + zero3 ## ctz8a ## zero3 ## ctz8b ## zero3 ## ctz8c ## zero3 ## ctz8d + } + val zvbbCTZ = Mux1H(vSew, Seq( + zvbbCTZ8, + zvbbCTZ16, + zvbbCTZ32, + )) + + val zvbbROL32 = zvbbSrc.rotateLeft(zvbbRs(4, 0)).asUInt + val zvbbROL16: UInt = { + val rol16a = zvbbSrc16a.rotateLeft(zvbbRs16a(3, 0)).asUInt(15, 0) + val rol16b = zvbbSrc16b.rotateLeft(zvbbRs16b(3, 0)).asUInt(15, 0) + rol16a ## rol16b + } + val zvbbROL8: UInt = { + val rol8a = zvbbSrc8a.rotateLeft(zvbbRs8a(2, 0)).asUInt(7, 0) + val rol8b = zvbbSrc8b.rotateLeft(zvbbRs8b(2, 0)).asUInt(7, 0) + val rol8c = zvbbSrc8c.rotateLeft(zvbbRs8c(2, 0)).asUInt(7, 0) + val rol8d = zvbbSrc8d.rotateLeft(zvbbRs8d(2, 0)).asUInt(7, 0) + rol8a ## rol8b ## rol8c ## rol8d + } + val zvbbROL = Mux1H(vSew, Seq( + zvbbROL8, + zvbbROL16, + zvbbROL32, + )) + + val zvbbROR32 = zvbbSrc.rotateRight(zvbbRs(4, 0)).asUInt + val zvbbROR16: UInt = { + val ror16a = zvbbSrc16a.rotateRight(zvbbRs16a(3, 0)).asUInt(15, 0) + val ror16b = zvbbSrc16b.rotateRight(zvbbRs16b(3, 0)).asUInt(15, 0) + ror16a ## ror16b + } + val zvbbROR8: UInt = { + val ror8a = zvbbSrc8a.rotateRight(zvbbRs8a(2, 0)).asUInt(7, 0) + val ror8b = zvbbSrc8b.rotateRight(zvbbRs8b(2, 0)).asUInt(7, 0) + val ror8c = zvbbSrc8c.rotateRight(zvbbRs8c(2, 0)).asUInt(7, 0) + val ror8d = zvbbSrc8d.rotateRight(zvbbRs8d(2, 0)).asUInt(7, 0) + ror8a ## ror8b ## ror8c ## ror8d + } + val zvbbROR = Mux1H(vSew, Seq( + zvbbROR8, + zvbbROR16, + zvbbROR32, + )) + + val zvbbSLL64_32 = ((zero32 ## zvbbSrc).asUInt << zvbbRs(4, 0)).asUInt(31, 0) + val zvbbSLL64_16: UInt = { + val sll64_16a = ((zero16 ## zvbbSrc16a).asUInt << zvbbRs16a(3, 0)).asUInt(15, 0) + val sll64_16b = ((zero16 ## zvbbSrc16b).asUInt << zvbbRs16b(3, 0)).asUInt(15, 0) + sll64_16a ## sll64_16b + } + val zvbbSLL64_8: UInt = { + val sll64_8a = ((zero8 ## zvbbSrc8a).asUInt << zvbbRs8a(2, 0)).asUInt(7, 0) + val sll64_8b = ((zero8 ## zvbbSrc8b).asUInt << zvbbRs8b(2, 0)).asUInt(7, 0) + val sll64_8c = ((zero8 ## zvbbSrc8c).asUInt << zvbbRs8c(2, 0)).asUInt(7, 0) + val sll64_8d = ((zero8 ## zvbbSrc8d).asUInt << zvbbRs8d(2, 0)).asUInt(7, 0) + sll64_8a ## sll64_8b ## sll64_8c ## sll64_8d + } + val zvbbSLL64 = Mux1H(vSew, Seq( + zvbbSLL64_8, + zvbbSLL64_16, + zvbbSLL64_32, + )) + val zvbbSLL = zvbbSLL64(parameter.datapathWidth-1, 0) + + val zvbbANDN = zvbbSrc & (~zvbbRs) + + response.data := Mux1H(UIntToOH(request.opcode), Seq( + zvbbBRev, + zvbbBRev8, + zvbbRev8, + zvbbCLZ, + zvbbCTZ, + zvbbROL, + zvbbROR, + zvbbSLL, + zvbbANDN, + )) +} + diff --git a/t1/src/T1.scala b/t1/src/T1.scala index 4dca51f4e..676ab423a 100644 --- a/t1/src/T1.scala +++ b/t1/src/T1.scala @@ -118,7 +118,12 @@ case class T1Parameter( val allInstructions: Seq[Instruction] = { org.chipsalliance.rvdecoderdb.instructions(org.chipsalliance.rvdecoderdb.extractResource(getClass.getClassLoader)) - .filter(instruction => instruction.instructionSet.name == "rv_v")++ + .filter{ + instruction => instruction.instructionSet.name match { + case "rv_v" => true + case "rv_zvbb" => if (zvbbEnable) true else false + case _ => false + }} ++ t1customInstructions.map(_.instruction) }.toSeq.sortBy(_.instructionSet.name).filter{ insn => insn.name match { @@ -127,7 +132,7 @@ case class T1Parameter( } } - require(extensions.forall(Seq("Zve32x", "Zve32f").contains), "unsupported extension.") + require(extensions.forall(Seq("Zve32x", "Zve32f", "Zvbb").contains), "unsupported extension.") // TODO: require bank not overlap /** xLen of T1, we currently only support 32. */ val xLen: Int = 32 @@ -144,6 +149,9 @@ case class T1Parameter( /** does t1 has floating datapath? */ val fpuEnable: Boolean = extensions.contains("Zve32f") + /** support of zvbb */ + lazy val zvbbEnable: Boolean = extensions.contains("Zvbb") + /** how many chaining does T1 support, this is not a parameter yet. */ val chainingSize: Int = 4 @@ -217,7 +225,7 @@ case class T1Parameter( // and the values are their respective delays. val crossLaneConnectCycles: Seq[Seq[Int]] = Seq.tabulate(laneNumber)(_ => Seq(1, 1)) - val decoderParam: DecoderParam = DecoderParam(fpuEnable, allInstructions) + val decoderParam: DecoderParam = DecoderParam(fpuEnable, zvbbEnable, allInstructions) /** paraemter for AXI4. */ val axi4BundleParameter: AXI4BundleParameter = AXI4BundleParameter( @@ -284,18 +292,23 @@ case class T1Parameter( def adderParam: LaneAdderParam = LaneAdderParam(datapathWidth, 0) } -class T1Probe(param: T1Parameter) extends Bundle { - val instructionCounter: UInt = UInt(param.instructionIndexBits.W) +class T1Probe(parameter: T1Parameter) extends Bundle { + val instructionCounter: UInt = UInt(parameter.instructionIndexBits.W) val instructionIssue: Bool = Bool() - val issueTag: UInt = UInt(param.instructionIndexBits.W) + val issueTag: UInt = UInt(parameter.instructionIndexBits.W) val retireValid: Bool = Bool() // write queue enq for mask unit - val writeQueueEnq: ValidIO[UInt] = Valid(UInt(param.instructionIndexBits.W)) - val writeQueueEnqMask: UInt = UInt((param.datapathWidth / 8).W) + val writeQueueEnq: ValidIO[UInt] = Valid(UInt(parameter.instructionIndexBits.W)) + val writeQueueEnqMask: UInt = UInt((parameter.datapathWidth / 8).W) // mask unit instruction valid - val instructionValid: UInt = UInt((param.chainingSize * 2).W) + val instructionValid: UInt = UInt((parameter.chainingSize * 2).W) // instruction index for check rd - val responseCounter: UInt = UInt(param.instructionIndexBits.W) + val responseCounter: UInt = UInt(parameter.instructionIndexBits.W) + // probes + val lsuProbe: LSUProbe = new LSUProbe(parameter.lsuParameters) + val laneProbes: Vec[LaneProbe] = Vec(parameter.laneNumber, new LaneProbe(parameter.laneParam)) + val issue: ValidIO[UInt] = Valid(UInt(parameter.instructionIndexBits.W)) + val retire: ValidIO[UInt] = Valid(UInt(parameter.xLen.W)) } class T1Interface(parameter: T1Parameter) extends Record { @@ -306,11 +319,7 @@ class T1Interface(parameter: T1Parameter) extends Record { def highBandwidthLoadStorePort: AXI4RWIrrevocable = elements("highBandwidthLoadStorePort").asInstanceOf[AXI4RWIrrevocable] def indexedLoadStorePort: AXI4RWIrrevocable = elements("indexedLoadStorePort").asInstanceOf[AXI4RWIrrevocable] def om: Property[ClassType] = elements("om").asInstanceOf[Property[ClassType]] - // TODO: refactor to an single Probe to avoid using Record on the [[T1Interface]]. - def lsuProbe: LSUProbe = elements("lsuProbe").asInstanceOf[LSUProbe] def t1Probe: T1Probe = elements("t1Probe").asInstanceOf[T1Probe] - def laneProbes: Seq[LaneProbe] = Seq.tabulate(parameter.laneNumber)(i => elements(s"lane${i}Probe").asInstanceOf[LaneProbe]) - def laneVrfProbes: Seq[VRFProbe] = Seq.tabulate(parameter.laneNumber)(i => elements(s"lane${i}VrfProbe").asInstanceOf[VRFProbe]) val elements: SeqMap[String, Data] = SeqMap.from( Seq( "clock" -> Input(Clock()), @@ -320,15 +329,8 @@ class T1Interface(parameter: T1Parameter) extends Record { "highBandwidthLoadStorePort" -> new AXI4RWIrrevocable(parameter.axi4BundleParameter), "indexedLoadStorePort" -> new AXI4RWIrrevocable(parameter.axi4BundleParameter.copy(dataWidth=32)), "om" -> Output(Property[AnyClassType]()), - "lsuProbe" -> Output(Probe(new LSUProbe(parameter.lsuParameters))), "t1Probe" -> Output(Probe(new T1Probe(parameter))), - ) ++ - Seq.tabulate(parameter.laneNumber)( - i => s"lane${i}Probe" -> Output(Probe(new LaneProbe(parameter.chainingSize, parameter.instructionIndexBits))) - ) ++ - Seq.tabulate(parameter.laneNumber)( - i => s"lane${i}VrfProbe" -> Output(Probe(new VRFProbe(parameter.laneParam.vrfParam.regNumBits, parameter.laneParam.vrfOffsetBits, parameter.laneParam.instructionIndexBits, parameter.laneParam.datapathWidth))) - ) + ) ) } @@ -1575,15 +1577,7 @@ class T1(val parameter: T1Parameter) lane } - laneVec.zipWithIndex.foreach { case (lane, index) => - define(io.laneProbes(index), lane.probe) - define(io.laneVrfProbes(index), lane.vrfProbe) - } - omInstance.lanesIn := Property(laneVec.map(_.om.asAnyClassType)) - - define(io.lsuProbe, lsu._probe) - dataInWritePipeVec := VecInit(laneVec.map(_.writeQueueValid)) // 连lsu @@ -1737,7 +1731,12 @@ class T1(val parameter: T1Parameter) !slots.last.state.sMaskUnitExecution && !slots.last.state.idle, indexToOH(slots.last.record.instructionIndex, parameter.chainingSize * 2)).asUInt probeWire.responseCounter := responseCounter - + probeWire.laneProbes.zip(laneVec).foreach { case (p, l) => p := probe.read(l.laneProbe) } + probeWire.lsuProbe := probe.read(lsu.lsuProbe) + probeWire.issue.valid := io.issue.fire + probeWire.issue.bits := instructionCounter + probeWire.retire.valid := io.retire.rd.valid + probeWire.retire.bits := io.retire.rd.bits.rdData // new V Request from core // val requestValidProbe: Bool = IO(Output(Probe(Bool()))) diff --git a/t1/src/VectorFunctionUnit.scala b/t1/src/VectorFunctionUnit.scala index 25ff98a49..cf06a66af 100644 --- a/t1/src/VectorFunctionUnit.scala +++ b/t1/src/VectorFunctionUnit.scala @@ -105,7 +105,8 @@ case class VFUInstantiateParameter( divModuleParameters: Seq[(SerializableModuleGenerator[LaneDiv, LaneDivParam], Seq[Int])], divfpModuleParameters: Seq[(SerializableModuleGenerator[LaneDivFP, LaneDivFPParam], Seq[Int])], otherModuleParameters: Seq[(SerializableModuleGenerator[OtherUnit, OtherUnitParam], Seq[Int])], - floatModuleParameters: Seq[(SerializableModuleGenerator[LaneFloat, LaneFloatParam], Seq[Int])] + floatModuleParameters: Seq[(SerializableModuleGenerator[LaneFloat, LaneFloatParam], Seq[Int])], + zvbbModuleParameters: Seq[(SerializableModuleGenerator[LaneZvbb, LaneZvbbParam], Seq[Int])] ) { val genVec: Seq[(SerializableModuleGenerator[_ <: VFUModule, _ <: VFUParameter], Seq[Int])] = logicModuleParameters ++ @@ -115,7 +116,8 @@ case class VFUInstantiateParameter( divModuleParameters ++ divfpModuleParameters ++ otherModuleParameters ++ - floatModuleParameters + floatModuleParameters ++ + zvbbModuleParameters genVec.foreach { case (_, connect) => connect.foreach(connectIndex => require(connectIndex < slotCount)) diff --git a/t1/src/decoder/Decoder.scala b/t1/src/decoder/Decoder.scala index 3ebe07df3..3a0299389 100644 --- a/t1/src/decoder/Decoder.scala +++ b/t1/src/decoder/Decoder.scala @@ -13,7 +13,7 @@ import org.chipsalliance.t1.rtl.decoder.attribute._ object DecoderParam { implicit def rwP: upickle.default.ReadWriter[DecoderParam] = upickle.default.macroRW } -case class DecoderParam(fpuEnable: Boolean, allInstructions: Seq[Instruction]) +case class DecoderParam(fpuEnable: Boolean, zvbbEnable: Boolean, allInstructions: Seq[Instruction]) trait T1DecodeFiled[D <: Data] extends DecodeField[T1DecodePattern, D] with FieldName @@ -221,6 +221,10 @@ object Decoder { override def getTriState(pattern: T1DecodePattern): TriState = pattern.isOrderreduce.value } + object zvbb extends BoolField { + override def getTriState(pattern: T1DecodePattern): TriState = pattern.isZvbb.value + } + object topUop extends T1TopUopField { override def genTable(pattern: T1DecodePattern): BitPat = pattern.topUop.value match { case _: TopT0.type => BitPat("b000") @@ -328,6 +332,19 @@ object Decoder { case _: zeroUop0.type => BitPat("b0000") case _ => BitPat.dontCare(4) } + case zvbbCase: ZvbbUOPType => + zvbbCase match { + case _: zvbbUop0.type => BitPat("b0000") // brev + case _: zvbbUop1.type => BitPat("b0001") // brev8 + case _: zvbbUop2.type => BitPat("b0010") // rev8 + case _: zvbbUop3.type => BitPat("b0011") // clz + case _: zvbbUop4.type => BitPat("b0100") // ctz + case _: zvbbUop5.type => BitPat("b0101") // rol + case _: zvbbUop6.type => BitPat("b0110") // ror + case _: zvbbUop7.type => BitPat("b0111") // wsll + case _: zvbbUop8.type => BitPat("b1000") // andn + case _ => BitPat.dontCare(4) + } case _ => BitPat.dontCare(4) } } @@ -399,6 +416,12 @@ object Decoder { orderReduce ) else Seq() + } ++ { + if (param.zvbbEnable) + Seq( + zvbb, + ) + else Seq() } def allDecodePattern(param: DecoderParam): Seq[T1DecodePattern] = param.allInstructions.map(T1DecodePattern(_, param)).toSeq.sortBy(_.instruction.name) diff --git a/t1/src/decoder/InstructionDocumentation.scala b/t1/src/decoder/InstructionDocumentation.scala index 22cf95823..86c5a7e35 100644 --- a/t1/src/decoder/InstructionDocumentation.scala +++ b/t1/src/decoder/InstructionDocumentation.scala @@ -422,5 +422,22 @@ case class InstructionDocumentation(instruction: Instruction, param: DecoderPara case "vzext.vf2" => "TODO!" case "vzext.vf4" => "TODO!" case "vzext.vf8" => "TODO!" + // rv_zvbb + case "vandn.vv" => "TODO!" + case "vandn.vx" => "TODO!" + case "vbrev.v" => "TODO!" + case "vbrev8.v" => "TODO!" + case "vrev8.v" => "TODO!" + case "vclz.v" => "TODO!" + case "vctz.v" => "TODO!" + case "vcpop.v" => "TODO!" + case "vrol.vv" => "TODO!" + case "vrol.vx" => "TODO!" + case "vror.vv" => "TODO!" + case "vror.vx" => "TODO!" + case "vror.vi" => "TODO!" + case "vwsll.vv" => "TODO!" + case "vwsll.vx" => "TODO!" + case "vwsll.vi" => "TODO!" } } diff --git a/t1/src/decoder/T1DecodePattern.scala b/t1/src/decoder/T1DecodePattern.scala index d1bb84930..5c7d10733 100644 --- a/t1/src/decoder/T1DecodePattern.scala +++ b/t1/src/decoder/T1DecodePattern.scala @@ -107,6 +107,7 @@ case class T1DecodePattern(instruction: Instruction, param: DecoderParam) extend def isVtype: isVtype = attribute.isVtype(this) def isVwmacc: isVwmacc = attribute.isVwmacc(this) def isWidenreduce: isWidenreduce = attribute.isWidenreduce(this) + def isZvbb: isZvbb = attribute.isZvbb(this) def fpExecutionType: FpExecutionType.Type = attribute.FpExecutionType(this) def topUop: TopUop = attribute.TopUop(this) def decoderUop: DecoderUop = attribute.DecoderUop(this) diff --git a/t1/src/decoder/attribute/isCrosswrite.scala b/t1/src/decoder/attribute/isCrosswrite.scala index cbe920dbb..bddbc3818 100644 --- a/t1/src/decoder/attribute/isCrosswrite.scala +++ b/t1/src/decoder/attribute/isCrosswrite.scala @@ -46,6 +46,10 @@ object isCrosswrite { "vwsubu.vx", "vwsubu.wv", "vwsubu.wx", + // rv_zvbb + "vwsll.vv", + "vwsll.vx", + "vwsll.vi", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/isItype.scala b/t1/src/decoder/attribute/isItype.scala index aafc0641c..5ba9baf2e 100644 --- a/t1/src/decoder/attribute/isItype.scala +++ b/t1/src/decoder/attribute/isItype.scala @@ -51,6 +51,9 @@ object isItype { "vssra.vi", "vssrl.vi", "vxor.vi", + // rv_zvbb + "vror.vi", + "vwsll.vi", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/isPopcount.scala b/t1/src/decoder/attribute/isPopcount.scala index 0137b77b0..3a949c436 100644 --- a/t1/src/decoder/attribute/isPopcount.scala +++ b/t1/src/decoder/attribute/isPopcount.scala @@ -18,6 +18,7 @@ object isPopcount { def y(t1DecodePattern: T1DecodePattern): Boolean = { val allMatched = Seq( "vcpop.m", + "vcpop.v", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/isScheduler.scala b/t1/src/decoder/attribute/isScheduler.scala index 229c45575..423b59a35 100644 --- a/t1/src/decoder/attribute/isScheduler.scala +++ b/t1/src/decoder/attribute/isScheduler.scala @@ -274,5 +274,5 @@ object isScheduler { } case class isScheduler(value: TriState) extends BooleanDecodeAttribute { - override val description: String = "lane will send request to Sequencer and wait ack from Sequencer. */ " + override val description: String = "lane will send request to Sequencer and wait ack from Sequencer. Instructions that will communicate with T1 top module.*/ " } diff --git a/t1/src/decoder/attribute/isSreadvd.scala b/t1/src/decoder/attribute/isSreadvd.scala index bf9fc6837..e6fa9bb76 100644 --- a/t1/src/decoder/attribute/isSreadvd.scala +++ b/t1/src/decoder/attribute/isSreadvd.scala @@ -307,5 +307,5 @@ object isSreadvd { } case class isSreadvd(value: TriState) extends BooleanDecodeAttribute { - override val description: String = "sReadVD -> !(ma || maskLogic) instruction need to read vd as operator. " + override val description: String = "sReadVD -> !(ma || maskLogic): instructions that need to read vd as the operator. " } diff --git a/t1/src/decoder/attribute/isSwrite.scala b/t1/src/decoder/attribute/isSwrite.scala index cfddf2e04..f16f28e5d 100644 --- a/t1/src/decoder/attribute/isSwrite.scala +++ b/t1/src/decoder/attribute/isSwrite.scala @@ -210,6 +210,10 @@ object isSwrite { "vzext.vf2", "vzext.vf4", "vzext.vf8", + // rv_zvbb + "vwsll.vv", + "vwsll.vx", + "vwsll.vi", ) allMatched.contains(t1DecodePattern.instruction.name) } @@ -224,5 +228,5 @@ object isSwrite { } case class isSwrite(value: TriState) extends BooleanDecodeAttribute { - override val description: String = "sWrite -> targetRd || readOnly || crossWrite || maskDestination || reduce || loadStore instruction will write vd or rd(scalar) from outside of lane. It will request vrf wait, and lane will not write. " + override val description: String = "sWrite -> targetRd || readOnly || crossWrite || maskDestination || reduce || loadStore instruction will write vd or rd(scalar) from outside of lane. It will request vrf wait, and lane will not write. No write to vd when isSwrite is True!!!" } diff --git a/t1/src/decoder/attribute/isUnsigned0.scala b/t1/src/decoder/attribute/isUnsigned0.scala index c180180bd..fb041c3c7 100644 --- a/t1/src/decoder/attribute/isUnsigned0.scala +++ b/t1/src/decoder/attribute/isUnsigned0.scala @@ -130,6 +130,22 @@ object isUnsigned0 { "vzext.vf2", "vzext.vf4", "vzext.vf8", + // rv_zvbb + "vandn.vv", + "vandn.vx", + "vbrev.v", + "vbrev8.v", + "vrev8.v", + "vclz.v", + "vctz.v", + "vrol.vv", + "vrol.vx", + "vror.vv", + "vror.vx", + "vror.vi", + "vwsll.vv", + "vwsll.vx", + "vwsll.vi", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/isUnsigned1.scala b/t1/src/decoder/attribute/isUnsigned1.scala index 1f71f2310..cf4f517a0 100644 --- a/t1/src/decoder/attribute/isUnsigned1.scala +++ b/t1/src/decoder/attribute/isUnsigned1.scala @@ -102,6 +102,22 @@ object isUnsigned1 { "vzext.vf2", "vzext.vf4", "vzext.vf8", + // rv_zvbb + "vandn.vv", + "vandn.vx", + "vbrev.v", + "vbrev8.v", + "vrev8.v", + "vclz.v", + "vctz.v", + "vrol.vv", + "vrol.vx", + "vror.vv", + "vror.vx", + "vror.vi", + "vwsll.vv", + "vwsll.vx", + "vwsll.vi", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/isVtype.scala b/t1/src/decoder/attribute/isVtype.scala index 605588b08..7649d715a 100644 --- a/t1/src/decoder/attribute/isVtype.scala +++ b/t1/src/decoder/attribute/isVtype.scala @@ -181,6 +181,11 @@ object isVtype { "vzext.vf2", "vzext.vf4", "vzext.vf8", + // rv_zvbb + "vandn.vv", + "vrol.vv", + "vror.vv", + "vwsll.vv", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/isZvbb.scala b/t1/src/decoder/attribute/isZvbb.scala new file mode 100644 index 000000000..c5735aaf9 --- /dev/null +++ b/t1/src/decoder/attribute/isZvbb.scala @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 Jiuyang Liu + +package org.chipsalliance.t1.rtl.decoder.attribute + +import org.chipsalliance.t1.rtl.decoder.T1DecodePattern + +object isZvbb { + def apply(t1DecodePattern: T1DecodePattern): isZvbb = + Seq( + y _ -> Y, + n _ -> N, + dc _ -> DC + ).collectFirst { + case (fn, tri) if fn(t1DecodePattern) => isZvbb(tri) + }.get + + def y(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched = if(t1DecodePattern.param.zvbbEnable) Seq( + "vandn.vv", + "vandn.vx", + "vbrev.v", + "vbrev8.v", + "vrev8.v", + "vclz.v", + "vctz.v", + "vrol.vv", + "vrol.vx", + "vror.vv", + "vror.vx", + "vror.vi", + "vwsll.vv", + "vwsll.vx", + "vwsll.vi", + ) else Seq() + allMatched.contains(t1DecodePattern.instruction.name) + } + def n(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched = t1DecodePattern.param.allInstructions.filter(i => + !(y(t1DecodePattern) || dc(t1DecodePattern)) + ) + allMatched.contains(t1DecodePattern.instruction) + } + + def dc(t1DecodePattern: T1DecodePattern): Boolean = false +} + +case class isZvbb(value: TriState) extends BooleanDecodeAttribute { + override val description: String = "goes to [[org.chipsalliance.t1.rtl.LaneZvbb]]." +} diff --git a/t1/src/decoder/attribute/uop.scala b/t1/src/decoder/attribute/uop.scala index 66d8dbf02..97d49365c 100644 --- a/t1/src/decoder/attribute/uop.scala +++ b/t1/src/decoder/attribute/uop.scala @@ -15,7 +15,8 @@ object DecoderUop { isLogic.y(t1DecodePattern) -> LogicUop(t1DecodePattern), isShift.y(t1DecodePattern) -> ShiftUop(t1DecodePattern), isOther.y(t1DecodePattern) -> OtherUop(t1DecodePattern), - isZero.y(t1DecodePattern) -> ZeroUOP(t1DecodePattern) + isZero.y(t1DecodePattern) -> ZeroUOP(t1DecodePattern), + isZvbb.y(t1DecodePattern) -> ZvbbUOP(t1DecodePattern), ).collectFirst { case (fn, tpe) if fn => DecoderUop(tpe) } diff --git a/t1/src/decoder/attribute/zvbbUop.scala b/t1/src/decoder/attribute/zvbbUop.scala new file mode 100644 index 000000000..06524e95b --- /dev/null +++ b/t1/src/decoder/attribute/zvbbUop.scala @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 Jiuyang Liu + +package org.chipsalliance.t1.rtl.decoder.attribute + +import org.chipsalliance.t1.rtl.decoder.T1DecodePattern + +trait ZvbbUOPType extends Uop +object zvbbUop0 extends ZvbbUOPType // brev +object zvbbUop1 extends ZvbbUOPType // brev8 +object zvbbUop2 extends ZvbbUOPType // rev8 +object zvbbUop3 extends ZvbbUOPType // clz +object zvbbUop4 extends ZvbbUOPType // ctz +object zvbbUop5 extends ZvbbUOPType // rol +object zvbbUop6 extends ZvbbUOPType // ror +object zvbbUop7 extends ZvbbUOPType // wsll +object zvbbUop8 extends ZvbbUOPType // andn + +object ZvbbUOP { + def apply(t1DecodePattern: T1DecodePattern): Uop = { + Seq( + t0 _ -> zvbbUop0, + t1 _ -> zvbbUop1, + t2 _ -> zvbbUop2, + t3 _ -> zvbbUop3, + t4 _ -> zvbbUop4, + t5 _ -> zvbbUop5, + t6 _ -> zvbbUop6, + t7 _ -> zvbbUop7, + t8 _ -> zvbbUop8, + ).collectFirst { + case (fn, tpe) if fn(t1DecodePattern) => tpe + }.getOrElse(UopDC) + } + def t0(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vbrev.v" + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t1(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vbrev8.v" + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t2(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vrev8.v" + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t3(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vclz.v" + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t4(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vctz.v" + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t5(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vrol.vv", + "vrol.vx", + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t6(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vror.vv", + "vror.vx", + "vror.vi", + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t7(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vwsll.vv", + "vwsll.vx", + "vwsll.vi", + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t8(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vandn.vv", + "vandn.vx", + ) + allMatched.contains(t1DecodePattern.instruction.name) + } +} diff --git a/t1/src/lsu/LSU.scala b/t1/src/lsu/LSU.scala index a938973dc..9cb3b7d58 100644 --- a/t1/src/lsu/LSU.scala +++ b/t1/src/lsu/LSU.scala @@ -246,9 +246,9 @@ class LSU(param: LSUParameter) extends Module { ) @public - val _probe = IO(Output(Probe(new LSUProbe(param)))) + val lsuProbe = IO(Output(Probe(new LSUProbe(param)))) val probeWire = Wire(new LSUProbe(param)) - define(_probe, ProbeValue(probeWire)) + define(lsuProbe, ProbeValue(probeWire)) // read vrf val otherTryReadVrf: UInt = Mux(otherUnit.vrfReadDataPorts.valid, otherUnit.status.targetLane, 0.U) diff --git a/t1/src/vrf/VRF.scala b/t1/src/vrf/VRF.scala index 179a32107..724ef6372 100644 --- a/t1/src/vrf/VRF.scala +++ b/t1/src/vrf/VRF.scala @@ -106,13 +106,13 @@ case class VRFParam( val vrfReadLatency = 2 } -class VRFProbe(regNumBits: Int, offsetBits: Int, instructionIndexSize: Int, dataPathWidth: Int) extends Bundle { +class VRFProbe(parameter: VRFParam) extends Bundle { val valid: Bool = Bool() - val requestVd: UInt = UInt(regNumBits.W) - val requestOffset: UInt = UInt(offsetBits.W) - val requestMask: UInt = UInt((dataPathWidth / 8).W) - val requestData: UInt = UInt(dataPathWidth.W) - val requestInstruction: UInt = UInt(instructionIndexSize.W) + val requestVd: UInt = UInt(parameter.regNumBits.W) + val requestOffset: UInt = UInt(parameter.vrfOffsetBits.W) + val requestMask: UInt = UInt((parameter.datapathWidth / 8).W) + val requestData: UInt = UInt(parameter.datapathWidth.W) + val requestInstruction: UInt = UInt(parameter.instructionIndexBits.W) } /** Vector Register File. @@ -564,9 +564,9 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar * Probe */ @public - val probe = IO(Output(Probe(new VRFProbe(parameter.regNumBits, parameter.vrfOffsetBits, parameter.instructionIndexBits, parameter.datapathWidth)))) - val probeWire = Wire(new VRFProbe(parameter.regNumBits, parameter.vrfOffsetBits, parameter.instructionIndexBits, parameter.datapathWidth)) - define(probe, ProbeValue(probeWire)) + val vrfProbe = IO(Output(Probe(new VRFProbe(parameter)))) + val probeWire = Wire(new VRFProbe(parameter)) + define(vrfProbe, ProbeValue(probeWire)) probeWire.valid := writePipe.valid probeWire.requestVd := writePipe.bits.vd diff --git a/t1rocket/src/T1RocketTile.scala b/t1rocket/src/T1RocketTile.scala new file mode 100644 index 000000000..9699eea05 --- /dev/null +++ b/t1rocket/src/T1RocketTile.scala @@ -0,0 +1,562 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.tile + +import chisel3._ +import chisel3.experimental.hierarchy.{Instance, Instantiate} +import chisel3.experimental.{SerializableModule, SerializableModuleGenerator, SerializableModuleParameter} +import chisel3.util.experimental.BitSet +import chisel3.util.log2Ceil +import chisel3.probe.{Probe, ProbeValue, define} +import org.chipsalliance.amba.axi4.bundle.{AXI4BundleParameter, AXI4ROIrrevocable, AXI4RWIrrevocable} +import org.chipsalliance.rocketv.{BHTParameter, FPU, FPUParameter, Frontend, FrontendParameter, HellaCache, HellaCacheArbiter, HellaCacheArbiterParameter, HellaCacheParameter, PTW, PTWParameter, Rocket, RocketParameter, RocketTileParameter, RocketProbe} +import org.chipsalliance.rvdecoderdb.Instruction +import org.chipsalliance.t1.rtl.decoder.T1CustomInstruction +import org.chipsalliance.t1.rtl.vrf.RamType +import org.chipsalliance.t1.rtl.vrf.RamType.{p0rp1w, p0rw, p0rwp1rw} +import org.chipsalliance.t1.rtl.lsu.LSUProbe +import org.chipsalliance.t1.rtl.vrf.VRFProbe +import org.chipsalliance.t1.rtl.{LaneAdder, LaneAdderParam, LaneDiv, LaneDivFP, LaneDivFPParam, LaneDivParam, LaneFloat, LaneFloatParam, LaneMul, LaneMulParam, LaneShifter, LaneShifterParameter, LogicParam, MaskedLogic, OtherUnit, OtherUnitParam, T1, T1Parameter, VFUInstantiateParameter, T1Probe, LaneProbe} + +object T1RocketTileParameter { + implicit def bitSetP: upickle.default.ReadWriter[BitSet] = upickle.default + .readwriter[String] + .bimap[BitSet]( + bs => bs.terms.map("b" + _.rawString).mkString("\n"), + str => if (str.isEmpty) BitSet.empty else BitSet.fromString(str) + ) + + implicit val vrfRamTypeP: upickle.default.ReadWriter[RamType] = upickle.default.ReadWriter.merge( + upickle.default.macroRW[p0rw.type], + upickle.default.macroRW[p0rp1w.type], + upickle.default.macroRW[p0rwp1rw.type] + ) + + implicit def rwP: upickle.default.ReadWriter[T1RocketTileParameter] = upickle.default.macroRW[T1RocketTileParameter] +} + +case class T1RocketTileParameter( + instructionSets: Seq[String], + cacheBlockBytes: Int, + nPMPs: Int, + cacheable: BitSet, + sideEffects: BitSet, + dcacheNSets: Int, + dcacheNWays: Int, + dcacheRowBits: Int, + iCacheNSets: Int, + iCacheNWays: Int, + iCachePrefetch: Boolean, + dLen: Int, + vrfBankSize: Int, + vrfRamType: RamType) + extends SerializableModuleParameter { + require(instructionSets.count(Seq("Zve32x", "Zve32f").contains) == 1, "at least support one Zve32x or Zve32f") + + val useAsyncReset: Boolean = false + val clockGate: Boolean = false + + val paddrBits: Int = xLen + // TODO: add S in the future + val priv: String = "m" + val hartIdLen: Int = 1 + val useBPWatch: Boolean = false + val mcontextWidth: Int = 0 + val scontextWidth: Int = 0 + val asidBits: Int = 0 + val resetVectorBits: Int = paddrBits + val nBreakpoints: Int = 0 + // TODO: set to 0 + val dtlbNSets: Int = 1 + val dtlbNWays: Int = 32 + val itlbNSets: Int = 1 + val itlbNWays: Int = 32 + val itlbNSectors: Int = 4 + val itlbNSuperpageEntries: Int = 4 + val nPTECacheEntries: Int = 9 + val nL2TLBWays: Int = 1 + val nL2TLBEntries: Int = 0 + // T1 doens't check exception. + val legal: BitSet = BitSet.fromRange(0, 1 << paddrBits) + val read: BitSet = BitSet.fromRange(0, 1 << paddrBits) + val write: BitSet = BitSet.fromRange(0, 1 << paddrBits) + val putPartial: BitSet = BitSet.fromRange(0, 1 << paddrBits) + val logic: BitSet = BitSet.fromRange(0, 1 << paddrBits) + val arithmetic: BitSet = BitSet.fromRange(0, 1 << paddrBits) + val exec: BitSet = BitSet.fromRange(0, 1 << paddrBits) + val btbEntries: Int = 28 + val btbNMatchBits: Int = 14 + val btbUpdatesOutOfOrder: Boolean = false + val nPages: Int = 6 + val nRAS: Int = 6 + val bhtParameter: Option[BHTParameter] = Some(BHTParameter(nEntries = 512, counterLength = 1, historyLength = 8, historyBits = 3)) + // TODO: remove it + val mulDivLatency: Int = 0 + val divUnroll: Int = 1 + val divEarlyOut: Boolean = false + val divEarlyOutGranularity: Int = 1 + val mulUnroll: Int = 1 + val mulEarlyOut: Boolean = false + val sfmaLatency: Int = 3 + val dfmaLatency: Int = 4 + val divSqrt: Boolean = true + // TODO: check decoder + val flushOnFenceI: Boolean = true + val fastLoadByte: Boolean = false + val fastLoadWord: Boolean = true + val maxUncachedInFlight: Int = 1 + val separateUncachedResp: Boolean = false + + + // calculate + def usingUser: Boolean = priv.contains("u") + + def usingSupervisor: Boolean = priv.contains("s") + + def vLen: Int = instructionSets.collectFirst { + case s"zvl${vlen}b" => vlen.toInt + }.get + + // static for now + def hasBeu: Boolean = false + + def usingNMI: Boolean = false + + def usingHypervisor: Boolean = false + + def usingDataScratchpad: Boolean = false + + def nLocalInterrupts: Int = 0 + + def dcacheArbPorts: Int = 2 + + def tagECC: Option[String] = None + + def dataECC: Option[String] = None + + def pgLevelBits: Int = 10 - log2Ceil(xLen / 32) + + def instructions: Seq[Instruction] = + org.chipsalliance.rvdecoderdb + .instructions( + org.chipsalliance.rvdecoderdb.extractResource(getClass.getClassLoader) + ) + .filter(instruction => + ( + instructionSets ++ + // Four mandatory instruction sets. + Seq("rv_i", "rv_zicsr", "rv_zifencei", "rv_system") + ).contains(instruction.instructionSet.name) + ) + .toSeq + .filter { + // special case for rv32 pseudo from rv64 + case i if i.pseudoFrom.isDefined && Seq("slli", "srli", "srai").contains(i.name) => true + case i if i.pseudoFrom.isDefined => false + case _ => true + } + .sortBy(i => (i.instructionSet.name, i.name)) + + private def hasInstructionSet(setName: String): Boolean = + instructions.flatMap(_.instructionSets.map(_.name)).contains(setName) + + def usingBTB: Boolean = btbEntries > 0 + + def xLen: Int = + (hasInstructionSet("rv32_i"), hasInstructionSet("rv64_i")) match { + case (true, true) => throw new Exception("cannot support both rv32 and rv64 together") + case (true, false) => 32 + case (false, true) => 64 + case (false, false) => throw new Exception("no basic instruction found.") + } + + def fLen: Option[Int] = + ( + hasInstructionSet("rv_f") || hasInstructionSet("rv64_f"), + hasInstructionSet("rv_d") || hasInstructionSet("rv64_d") + ) match { + case (false, false) => None + case (true, false) => Some(32) + case (false, true) => Some(64) + case (true, true) => Some(64) + } + + def usingVM = hasInstructionSet("sfence.vma") + + def pgLevels: Int = xLen match { + case 32 => 2 + case 64 => 3 + } + + def usingAtomics = hasInstructionSet("rv_a") || hasInstructionSet("rv64_a") + + def usingCompressed = hasInstructionSet("rv_c") + + def minFLen: Option[Int] = + if (hasInstructionSet("rv_zfh") || hasInstructionSet("rv64_zfh") || hasInstructionSet("rv_d_zfh")) + Some(16) + else + fLen + + def rocketParameter: RocketParameter = RocketParameter( + useAsyncReset, + clockGate, + instructionSets.toSet, + vLen, + usingUser, + hartIdLen, + nPMPs, + asidBits, + nBreakpoints, + usingBTB, + useBPWatch, + mcontextWidth, + scontextWidth, + mulDivLatency, + divUnroll, + divEarlyOut, + divEarlyOutGranularity, + mulUnroll, + mulEarlyOut, + paddrBits, + cacheBlockBytes, + hasBeu, + fastLoadByte, + fastLoadWord, + dcacheNSets, + flushOnFenceI, + usingT1 = true + ) + + def hellaCacheParameter: HellaCacheParameter = HellaCacheParameter( + useAsyncReset: Boolean, + clockGate: Boolean, + xLen: Int, + fLen.getOrElse(0): Int, + usingVM: Boolean, + paddrBits: Int, + cacheBlockBytes: Int, + dcacheNWays: Int, + dcacheNSets: Int, + dcacheRowBits: Int, + dtlbNSets: Int, + dtlbNWays: Int, + tagECC: Option[String], + dataECC: Option[String], + maxUncachedInFlight: Int, + separateUncachedResp: Boolean, + legal: BitSet, + cacheable: BitSet, + read: BitSet, + write: BitSet, + putPartial: BitSet, + logic: BitSet, + arithmetic: BitSet, + exec: BitSet, + sideEffects: BitSet + ) + + def hellaCacheArbiterParameter: HellaCacheArbiterParameter = HellaCacheArbiterParameter( + useAsyncReset: Boolean, + xLen: Int, + fLen.getOrElse(0): Int, + paddrBits: Int, + cacheBlockBytes: Int, + dcacheNSets: Int, + usingVM: Boolean, + separateUncachedResp: Boolean + ) + + def ptwParameter: PTWParameter = PTWParameter( + useAsyncReset: Boolean, + clockGate: Boolean, + usingVM: Boolean, + usingHypervisor: Boolean, + xLen: Int, + fLen.getOrElse(0): Int, + paddrBits: Int, + asidBits: Int, + pgLevels: Int, + nPTECacheEntries: Int, + nL2TLBWays: Int, + nL2TLBEntries: Int, + nPMPs: Int + ) + + def frontendParameter: FrontendParameter = FrontendParameter( + useAsyncReset = useAsyncReset: Boolean, + clockGate = clockGate: Boolean, + xLen = xLen: Int, + usingAtomics = usingAtomics: Boolean, + usingDataScratchpad = usingDataScratchpad: Boolean, + usingVM = usingVM: Boolean, + usingCompressed = usingCompressed: Boolean, + usingBTB = usingBTB: Boolean, + itlbNSets = itlbNSets: Int, + itlbNWays = itlbNWays: Int, + itlbNSectors = itlbNSectors: Int, + itlbNSuperpageEntries = itlbNSuperpageEntries: Int, + blockBytes = cacheBlockBytes: Int, + iCacheNSets = iCacheNSets: Int, + iCacheNWays = iCacheNWays: Int, + iCachePrefetch = iCachePrefetch: Boolean, + btbEntries = btbEntries: Int, + btbNMatchBits = btbNMatchBits: Int, + btbUpdatesOutOfOrder = btbUpdatesOutOfOrder: Boolean, + nPages = nPages: Int, + nRAS = nRAS: Int, + nPMPs = nPMPs: Int, + paddrBits = paddrBits: Int, + pgLevels = pgLevels: Int, + asidBits = asidBits: Int, + bhtParameter = bhtParameter: Option[BHTParameter], + legal = legal: BitSet, + cacheable = cacheable: BitSet, + read = read: BitSet, + write = write: BitSet, + putPartial = putPartial: BitSet, + logic = logic: BitSet, + arithmetic = arithmetic: BitSet, + exec = exec: BitSet, + sideEffects = sideEffects: BitSet + ) + + def fpuParameter: Option[FPUParameter] = fLen.zip(minFLen).map { + case (fLen, minFLen) => + FPUParameter( + useAsyncReset: Boolean, + clockGate: Boolean, + xLen: Int, + fLen: Int, + minFLen: Int, + sfmaLatency: Int, + dfmaLatency: Int, + divSqrt: Boolean, + hartIdLen: Int + ) + } + + val vfuInstantiateParameter = if (instructionSets.contains("Zve32f")) + VFUInstantiateParameter( + slotCount = 4, + logicModuleParameters = Seq( + (SerializableModuleGenerator(classOf[MaskedLogic], LogicParam(32, 1)), Seq(0, 1, 2, 3)) + ), + aluModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(0)), + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(1)), + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(2)), + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(3)) + ), + shifterModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneShifter], LaneShifterParameter(32, 1)), Seq(0, 1, 2, 3)) + ), + mulModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneMul], LaneMulParam(32, 2)), Seq(0, 1, 2, 3)) + ), + divModuleParameters = Seq(), + divfpModuleParameters = + Seq((SerializableModuleGenerator(classOf[LaneDivFP], LaneDivFPParam(32, 1)), Seq(0, 1, 2, 3))), + otherModuleParameters = + Seq(( + SerializableModuleGenerator( + classOf[OtherUnit], + OtherUnitParam(32, log2Ceil(vLen) + 1, log2Ceil(vLen * 8 / dLen), log2Ceil(dLen / 32), 4, 1) + ), + Seq(0, 1, 2, 3))), + floatModuleParameters = + Seq((SerializableModuleGenerator(classOf[LaneFloat], LaneFloatParam(32, 3)), Seq(0, 1, 2, 3))), + zvbbModuleParameters = Seq() + ) else + VFUInstantiateParameter( + slotCount = 4, + logicModuleParameters = Seq( + (SerializableModuleGenerator(classOf[MaskedLogic], LogicParam(32, 1)), Seq(0, 1, 2, 3)) + ), + aluModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(0)), + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(1)), + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(2)), + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(3)) + ), + shifterModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneShifter], LaneShifterParameter(32, 1)), Seq(0, 1, 2, 3)) + ), + mulModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneMul], LaneMulParam(32, 2)), Seq(0, 1, 2, 3)) + ), + divModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneDiv], LaneDivParam(32, 1)), Seq(0, 1, 2, 3)) + ), + divfpModuleParameters = Seq(), + otherModuleParameters = + Seq(( + SerializableModuleGenerator( + classOf[OtherUnit], + OtherUnitParam(32, log2Ceil(vLen) + 1, log2Ceil(vLen * 8 / dLen), log2Ceil(dLen / 32), 4, 1) + ), + Seq(0, 1, 2, 3))), + floatModuleParameters = Seq(), + zvbbModuleParameters = Seq() + ) + + def t1Parameter: T1Parameter = T1Parameter( + vLen = vLen, + dLen = dLen, + extensions = instructionSets.filter(Seq("Zve32x", "Zve32f").contains), + // empty for now. + t1customInstructions = Seq(), + vrfBankSize = vrfBankSize, + vrfRamType = vrfRamType, + vfuInstantiateParameter = vfuInstantiateParameter + ) + + def instructionFetchParameter: AXI4BundleParameter = frontendParameter.instructionFetchParameter + + def itimParameter: Option[AXI4BundleParameter] = frontendParameter.itimParameter + + def loadStoreParameter: AXI4BundleParameter = hellaCacheParameter.loadStoreParameter + + def dtimParameter: Option[AXI4BundleParameter] = hellaCacheParameter.dtimParameter + + def t1HighBandwidthParameter: AXI4BundleParameter = t1Parameter.axi4BundleParameter + + def t1HightOutstandingParameter: AXI4BundleParameter = t1Parameter.axi4BundleParameter.copy(dataWidth = 32) +} + +class T1RocketProbe(parameter: T1RocketTileParameter) extends Bundle { + val rocketProbe: RocketProbe = Output(new RocketProbe(parameter.rocketParameter)) + val t1Probe: T1Probe = Output(new T1Probe(parameter.t1Parameter)) +} + +class T1RocketTileInterface(parameter: T1RocketTileParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + // todo: Const + val hartid = Flipped(UInt(parameter.hartIdLen.W)) + val resetVector = Input(Const(UInt(parameter.resetVectorBits.W))) + + val debug: Bool = Input(Bool()) + val mtip: Bool = Input(Bool()) + val msip: Bool = Input(Bool()) + val meip: Bool = Input(Bool()) + val seip: Option[Bool] = Option.when(parameter.usingSupervisor)(Bool()) + val lip: Vec[Bool] = Vec(parameter.nLocalInterrupts, Bool()) + val nmi = Option.when(parameter.usingNMI)(Bool()) + val nmiInterruptVector = Option.when(parameter.usingNMI)(UInt(parameter.resetVectorBits.W)) + val nmiIxceptionVector = Option.when(parameter.usingNMI)(UInt(parameter.resetVectorBits.W)) + // TODO: buserror should be handled by NMI + val buserror: Bool = Input(Bool()) + val wfi: Bool = Output(Bool()) + val halt: Bool = Output(Bool()) + + val instructionFetchAXI: AXI4ROIrrevocable = + org.chipsalliance.amba.axi4.bundle.AXI4ROIrrevocable(parameter.instructionFetchParameter) + val itimAXI: Option[AXI4RWIrrevocable] = + parameter.itimParameter.map(p => Flipped(org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(p))) + + val loadStoreAXI: AXI4RWIrrevocable = + org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(parameter.loadStoreParameter) + val dtimAXI: Option[AXI4RWIrrevocable] = + parameter.dtimParameter.map(p => Flipped(org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(p))) + + val highBandwidthAXI: AXI4RWIrrevocable = org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(parameter.t1HighBandwidthParameter) + val highOutstandingAXI: AXI4RWIrrevocable = org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(parameter.t1HightOutstandingParameter) + + // TODO: merge it. + val t1RocketProbe: T1RocketProbe = Output(Probe(new T1RocketProbe(parameter))) +} + +class T1RocketTile(val parameter: T1RocketTileParameter) + extends FixedIORawModule(new T1RocketTileInterface(parameter)) + with SerializableModule[T1RocketTileParameter] { + val rocket: Instance[Rocket] = Instantiate(new Rocket(parameter.rocketParameter)) + val frontend: Instance[Frontend] = Instantiate(new Frontend(parameter.frontendParameter)) + val hellaCache: Instance[HellaCache] = Instantiate(new HellaCache(parameter.hellaCacheParameter)) + val hellaCacheArbiter: Instance[HellaCacheArbiter] = Instantiate( + new HellaCacheArbiter(parameter.hellaCacheArbiterParameter) + ) + val ptw: Instance[PTW] = Instantiate(new PTW(parameter.ptwParameter)) + val fpu: Option[Instance[FPU]] = parameter.fpuParameter.map(fpuParameter => Instantiate(new FPU(fpuParameter))) + val t1: Instance[T1] = Instantiate(new T1(parameter.t1Parameter)) + + rocket.io.clock := io.clock + rocket.io.reset := io.reset + rocket.io.hartid := io.hartid + rocket.io.interrupts.debug := io.debug + rocket.io.interrupts.mtip := io.mtip + rocket.io.interrupts.msip := io.msip + rocket.io.interrupts.meip := io.meip + rocket.io.interrupts.seip.foreach(_ := io.seip.get) + rocket.io.interrupts.lip := io.lip + rocket.io.interrupts.nmi.foreach { nmi => + nmi.rnmi := io.nmi.get + nmi.rnmi_interrupt_vector := io.nmiInterruptVector.get + nmi.rnmi_exception_vector := io.nmiIxceptionVector.get + } + // @todo make it optional + rocket.io.buserror := io.buserror + io.wfi := rocket.io.wfi + io.loadStoreAXI <> hellaCache.io.loadStoreAXI + io.dtimAXI.zip(hellaCache.io.dtimAXI).foreach { case (io, hellaCache) => io <> hellaCache } + io.instructionFetchAXI <> frontend.io.instructionFetchAXI + io.itimAXI.zip(frontend.io.itimAXI).foreach { case (io, frontend) => io <> frontend } + // design for halt and beu, only use the halt function for now. + io.halt := Seq(frontend.io.nonDiplomatic.errors.uncorrectable, hellaCache.io.errors.uncorrectable) + .flatMap(_.map(_.valid)) + .foldLeft(false.B)(_ || _) + + // rocket core io + rocket.io.imem <> frontend.io.nonDiplomatic.cpu + hellaCacheArbiter.io.requestor(0) <> rocket.io.dmem + rocket.io.ptw <> ptw.io.dpath + rocket.io.fpu.zip(fpu.map(_.io.core)).foreach { case (core, fpu) => core <> fpu } + // match connect + t1.io.issue <> rocket.io.t1.get.issue + rocket.io.t1.get.retire <> t1.io.retire + // used by trace module + rocket.io.bpwatch := DontCare + // don't use for now, this is design for report the custom cease status. + // rocket.io.cease + // it will be used in the future w/ trace support. + rocket.io.traceStall := false.B + + // frontend io + frontend.io.clock := io.clock + frontend.io.reset := io.reset + frontend.io.resetVector := io.resetVector + ptw.io.requestor(0) <> frontend.io.nonDiplomatic.ptw + + // hellacache io + hellaCache.io.clock := io.clock + hellaCache.io.reset := io.reset + ptw.io.requestor(1) <> hellaCache.io.ptw + hellaCache.io.cpu <> hellaCacheArbiter.io.mem + + // ptw io + ptw.io.clock := io.clock + ptw.io.reset := io.reset + hellaCacheArbiter.io.requestor(1) <> ptw.io.mem + + // hellacache arbiter io + hellaCacheArbiter.io.clock := io.clock + hellaCacheArbiter.io.reset := io.reset + + fpu.foreach { fpu => + fpu.io.clock := io.clock + fpu.io.reset := io.reset + // @todo: remove it from FPU. + fpu.io.cp_req <> DontCare + fpu.io.cp_resp <> DontCare + } + t1.io.clock := io.clock + t1.io.reset := io.reset + io.highBandwidthAXI <> t1.io.highBandwidthLoadStorePort + io.highOutstandingAXI <> t1.io.indexedLoadStorePort + + // probe + val probeWire = Wire(new T1RocketProbe(parameter)) + define(io.t1RocketProbe, ProbeValue(probeWire)) + probeWire.rocketProbe := probe.read(rocket.io.rocketProbe) + probeWire.t1Probe := probe.read(t1.io.t1Probe) +} diff --git a/t1rocketemu/.clang-format b/t1rocketemu/.clang-format new file mode 100644 index 000000000..57d55c245 --- /dev/null +++ b/t1rocketemu/.clang-format @@ -0,0 +1,236 @@ +--- +Language: Cpp +# BasedOnStyle: LLVM +AccessModifierOffset: -2 +AlignAfterOpenBracket: Align +AlignArrayOfStructures: None +AlignConsecutiveAssignments: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + PadOperators: true +AlignConsecutiveBitFields: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + PadOperators: false +AlignConsecutiveDeclarations: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + PadOperators: false +AlignConsecutiveMacros: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + PadOperators: false +AlignConsecutiveShortCaseStatements: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCaseColons: false +AlignEscapedNewlines: Right +AlignOperands: Align +AlignTrailingComments: + Kind: Always + OverEmptyLines: 0 +AllowAllArgumentsOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: Never +AllowShortCaseLabelsOnASingleLine: false +AllowShortEnumsOnASingleLine: true +AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: Never +AllowShortLambdasOnASingleLine: All +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: MultiLine +AttributeMacros: + - __capability +BinPackArguments: true +BinPackParameters: true +BitFieldColonSpacing: Both +BraceWrapping: + AfterCaseLabel: false + AfterClass: false + AfterControlStatement: Never + AfterEnum: false + AfterExternBlock: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false + BeforeLambdaBody: false + BeforeWhile: false + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakAfterAttributes: Never +BreakAfterJavaFieldAnnotations: false +BreakArrays: true +BreakBeforeBinaryOperators: None +BreakBeforeConceptDeclarations: Always +BreakBeforeBraces: Attach +BreakBeforeInlineASMColon: OnlyMultiline +BreakBeforeTernaryOperators: true +BreakConstructorInitializers: BeforeColon +BreakInheritanceList: BeforeColon +BreakStringLiterals: true +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +EmptyLineAfterAccessModifier: Never +EmptyLineBeforeAccessModifier: LogicalBlock +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IfMacros: + - KJ_IF_MAYBE +IncludeBlocks: Preserve +IncludeCategories: + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + SortPriority: 0 + CaseSensitive: false + - Regex: '^(<|"(gtest|gmock|isl|json)/)' + Priority: 3 + SortPriority: 0 + CaseSensitive: false + - Regex: '.*' + Priority: 1 + SortPriority: 0 + CaseSensitive: false +IncludeIsMainRegex: '(Test)?$' +IncludeIsMainSourceRegex: '' +IndentAccessModifiers: false +IndentCaseBlocks: false +IndentCaseLabels: false +IndentExternBlock: AfterExternBlock +IndentGotoLabels: true +IndentPPDirectives: None +IndentRequiresClause: true +IndentWidth: 2 +IndentWrappedFunctionNames: false +InsertBraces: false +InsertNewlineAtEOF: false +InsertTrailingCommas: None +IntegerLiteralSeparator: + Binary: 0 + BinaryMinDigits: 0 + Decimal: 0 + DecimalMinDigits: 0 + Hex: 0 + HexMinDigits: 0 +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: true +KeepEmptyLinesAtEOF: false +LambdaBodyIndentation: Signature +LineEnding: DeriveLF +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Auto +ObjCBlockIndentWidth: 2 +ObjCBreakBeforeNestedBlockParam: true +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PackConstructorInitializers: BinPack +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakOpenParenthesis: 0 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyIndentedWhitespace: 0 +PenaltyReturnTypeOnItsOwnLine: 60 +PointerAlignment: Right +PPIndentWidth: -1 +QualifierAlignment: Leave +ReferenceAlignment: Pointer +ReflowComments: true +RemoveBracesLLVM: false +RemoveParentheses: Leave +RemoveSemicolon: false +RequiresClausePosition: OwnLine +RequiresExpressionIndentation: OuterScope +SeparateDefinitionBlocks: Leave +ShortNamespaceLines: 1 +SortIncludes: CaseSensitive +SortJavaStaticImport: Before +SortUsingDeclarations: LexicographicNumeric +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceAroundPointerQualifiers: Default +SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeJsonColon: false +SpaceBeforeParens: ControlStatements +SpaceBeforeParensOptions: + AfterControlStatements: true + AfterForeachMacros: true + AfterFunctionDefinitionName: false + AfterFunctionDeclarationName: false + AfterIfMacros: true + AfterOverloadedOperator: false + AfterRequiresInClause: false + AfterRequiresInExpression: false + BeforeNonEmptyParentheses: false +SpaceBeforeRangeBasedForLoopColon: true +SpaceBeforeSquareBrackets: false +SpaceInEmptyBlock: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: Never +SpacesInContainerLiterals: true +SpacesInLineCommentPrefix: + Minimum: 1 + Maximum: -1 +SpacesInParens: Never +SpacesInParensOptions: + InCStyleCasts: false + InConditionalStatements: false + InEmptyParentheses: false + Other: false +SpacesInSquareBrackets: false +Standard: Latest +StatementAttributeLikeMacros: + - Q_EMIT +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TabWidth: 8 +UseTab: Never +VerilogBreakBetweenInstancePorts: true +WhitespaceSensitiveMacros: + - BOOST_PP_STRINGIZE + - CF_SWIFT_NAME + - NS_SWIFT_NAME + - PP_STRINGIZE + - STRINGIZE +... + diff --git a/t1rocketemu/.gitignore b/t1rocketemu/.gitignore new file mode 100644 index 000000000..9f970225a --- /dev/null +++ b/t1rocketemu/.gitignore @@ -0,0 +1 @@ +target/ \ No newline at end of file diff --git a/t1rocketemu/.rustfmt.toml b/t1rocketemu/.rustfmt.toml new file mode 100644 index 000000000..7b6c82e24 --- /dev/null +++ b/t1rocketemu/.rustfmt.toml @@ -0,0 +1,4 @@ +hard_tabs = false +tab_spaces = 2 +chain_width = 100 +struct_lit_width = 50 \ No newline at end of file diff --git a/t1rocketemu/Cargo.lock b/t1rocketemu/Cargo.lock new file mode 100644 index 000000000..4eea84c63 --- /dev/null +++ b/t1rocketemu/Cargo.lock @@ -0,0 +1,666 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "anstream" +version = "0.6.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" + +[[package]] +name = "anstyle-parse" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad186efb764318d35165f1758e7dcef3b10628e26d41a44bc5550652e6804391" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19" +dependencies = [ + "anstyle", + "windows-sys", +] + +[[package]] +name = "anyhow" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" + +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + +[[package]] +name = "cc" +version = "1.0.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac367972e516d45567c7eafc73d24e1c193dcf200a8d94e9db7b3d38b349572d" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clap" +version = "4.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84b3edb18336f4df585bc9aa31dd99c036dfa5dc5e9a2939a722a188f3a8970d" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1c09dd5ada6c6c78075d6fd0da3f90d8080651e2d6cc8eb2f1aaa4034ced708" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bac35c6dafb060fd4d275d9a4ffae97917c13a6327903a8be2153cd964f7085" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70" + +[[package]] +name = "cmake" +version = "0.1.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130" +dependencies = [ + "cc", +] + +[[package]] +name = "colorchoice" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422" + +[[package]] +name = "common" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "spike_rs", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "elf" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4445909572dbd556c457c849c4ca58623d84b27c8fff1e74b0b4227d8b90d17b" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "libc" +version = "0.2.155" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" + +[[package]] +name = "libloading" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e310b3a6b5907f99202fcdb4960ff45b93735d7c7d96b760fcff8db2dc0e103d" +dependencies = [ + "cfg-if", + "windows-targets", +] + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata 0.1.10", +] + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "offline" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "common", + "libloading", + "num-bigint", + "serde", + "serde_json", + "spike_rs", + "tracing", + "tracing-subscriber", + "xmas-elf", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "online_dpi" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "common", + "elf", + "hex", + "spike_rs", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "online_drive" +version = "0.1.0" +dependencies = [ + "cmake", + "online_dpi", +] + +[[package]] +name = "online_vcs" +version = "0.1.0" +dependencies = [ + "online_dpi", +] + +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + +[[package]] +name = "pin-project-lite" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" + +[[package]] +name = "proc-macro2" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata 0.4.7", + "regex-syntax 0.8.4", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax 0.6.29", +] + +[[package]] +name = "regex-automata" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax 0.8.4", +] + +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + +[[package]] +name = "regex-syntax" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "serde" +version = "1.0.203" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.203" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.118" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d947f6b3163d8857ea16c4fa0dd4840d52f3041039a85decd46867eb1abef2e4" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "spike_rs" +version = "0.1.0" +dependencies = [ + "anyhow", + "libc", + "tracing", + "xmas-elf", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "901fa70d88b9d6c98022e23b4136f9f3e54e4662c3bc1bd1d84a42a9a0f0c1e9" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thread_local" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" +dependencies = [ + "cfg-if", + "once_cell", +] + +[[package]] +name = "tracing" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" + +[[package]] +name = "xmas-elf" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42c49817e78342f7f30a181573d82ff55b88a35f86ccaf07fc64b3008f56d1c6" +dependencies = [ + "zero", +] + +[[package]] +name = "zero" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fe21bcc34ca7fe6dd56cc2cb1261ea59d6b93620215aefb5ea6032265527784" diff --git a/t1rocketemu/Cargo.toml b/t1rocketemu/Cargo.toml new file mode 100644 index 000000000..b4488928c --- /dev/null +++ b/t1rocketemu/Cargo.toml @@ -0,0 +1,25 @@ +[workspace] +resolver = "2" +members = [ + "test_common", + "spike_rs", + "offline", + "online_dpi", + "online_drive", + "online_vcs", +] +exclude = [ + "spike_interfaces" +] + +[workspace.package] +version = "0.1.0" + +[workspace.dependencies] +anyhow = "1.0.79" +clap = { version = "4.4.18", features = ["derive"] } +tracing = "0.1.40" +tracing-subscriber = { version = "0.3", features = ["env-filter", "ansi"] } +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +num-bigint = "0.4.6" diff --git a/t1rocketemu/configs/default.json b/t1rocketemu/configs/default.json new file mode 100644 index 000000000..0cf9f92dc --- /dev/null +++ b/t1rocketemu/configs/default.json @@ -0,0 +1,19 @@ +{ + "parameter": { + "instructionSets": ["rv32_i", "rv_f", "rv_a", "rv_v", "Zve32x", "zvl1024b", "rv_c"], + "cacheBlockBytes": 32, + "nPMPs": 8, + "cacheable": "b1???????????????????????????????", + "sideEffects": "b000?????????????????????????????", + "dcacheNSets": 64, + "dcacheNWays": 4, + "dcacheRowBits": 32, + "iCacheNSets": 32, + "iCacheNWays": 4, + "iCachePrefetch": false, + "dLen": 256, + "vrfBankSize": 2, + "vrfRamType": "org.chipsalliance.t1.rtl.vrf.RamType.p0rp1w" + }, + "generator": "org.chipsalliance.t1.tile.T1RocketTile" +} diff --git a/t1rocketemu/default.nix b/t1rocketemu/default.nix new file mode 100644 index 000000000..bd63fc4cb --- /dev/null +++ b/t1rocketemu/default.nix @@ -0,0 +1,18 @@ +{ lib +, newScope +}: +lib.makeScope newScope (scope: { + mlirbc = scope.callPackage ./nix/mlirbc.nix { }; + rtl = scope.callPackage ./nix/rtl.nix { }; + verilated-c-lib = scope.callPackage ./nix/verilated-c-lib.nix { }; + emu = scope.callPackage ./emu.nix { }; + designConfig = with builtins; (fromJSON (readFile ./configs/default.json)).parameter; + cases = scope.callPackage ../tests { + configName = "t1rocket"; + t1rocket-emu = scope.emu; + rtlDesignMetadata = { + march = "rv32iafcv_zve32x_zvl1024b"; + dlen = scope.designConfig.dLen; + }; + }; +}) diff --git a/t1rocketemu/emu.nix b/t1rocketemu/emu.nix new file mode 100644 index 000000000..50f14e2c9 --- /dev/null +++ b/t1rocketemu/emu.nix @@ -0,0 +1,61 @@ +{ lib +, rustPlatform +, zlib +, libspike +, libspike_interfaces +, cmake +, verilator +, verilated-c-lib +}: +rustPlatform.buildRustPackage { + name = "t1rocketemu"; + + src = with lib.fileset; toSource { + root = ./.; + fileset = unions [ + ./test_common + ./spike_rs + ./offline + ./online_dpi + ./online_drive + ./online_vcs + ./Cargo.lock + ./Cargo.toml + ]; + }; + + buildInputs = [ + zlib + libspike_interfaces + verilated-c-lib + ]; + + nativeBuildInputs = [ + verilator + cmake + ]; + + # FIXME: can we hack this into derivations, so that we don't need to specify library dir explicitly? + env = + let + toLib = drv: "${drv}/lib"; + in + { + SPIKE_LIB_DIR = toLib libspike; + SPIKE_INTERFACES_LIB_DIR = toLib libspike_interfaces; + VERILATED_INC_DIR = "${verilated-c-lib}/include"; + VERILATED_LIB_DIR = "${verilated-c-lib}/lib"; + }; + + cargoLock = { + lockFile = ./Cargo.lock; + }; + + outputs = [ "out" "driver" "offline" ]; + + postInstall = '' + mkdir -p $driver/bin $offline/bin + ln -s $out/bin/driver $driver/bin/driver + ln -s $out/bin/offline $driver/bin/offline + ''; +} diff --git a/t1rocketemu/nix/mlirbc.nix b/t1rocketemu/nix/mlirbc.nix new file mode 100644 index 000000000..14573c9b8 --- /dev/null +++ b/t1rocketemu/nix/mlirbc.nix @@ -0,0 +1,22 @@ +{ stdenvNoCC + +, espresso +, circt + +, elaborator +}: +stdenvNoCC.mkDerivation { + name = "t1rocketemu-elaborated.mlirbc"; + + nativeBuildInputs = [ elaborator espresso circt ]; + + buildCommand = '' + mkdir elaborate + elaborator t1rocketemu --target-dir elaborate --t1rocket-config ${../configs/default.json} + firtool elaborate/*.fir \ + --annotation-file elaborate/*.anno.json \ + --emit-bytecode \ + --parse-only \ + -o $out + ''; +} diff --git a/t1rocketemu/nix/rtl.nix b/t1rocketemu/nix/rtl.nix new file mode 100644 index 000000000..93b41a309 --- /dev/null +++ b/t1rocketemu/nix/rtl.nix @@ -0,0 +1,26 @@ +{ stdenvNoCC +, lib + +, circt +, mlirbc +}: + +let + mfcArgs = lib.escapeShellArgs [ + "-O=debug" + "--split-verilog" + "--preserve-values=named" + "--lowering-options=verifLabels,omitVersionComment" + "--strip-debug-info" + ]; +in +stdenvNoCC.mkDerivation { + name = "t1rocket-rtl"; + nativeBuildInputs = [ circt ]; + + buildCommand = '' + mkdir -p $out + + firtool ${mlirbc} ${mfcArgs} -o $out + ''; +} diff --git a/t1rocketemu/nix/verilated-c-lib.nix b/t1rocketemu/nix/verilated-c-lib.nix new file mode 100644 index 000000000..147747ad4 --- /dev/null +++ b/t1rocketemu/nix/verilated-c-lib.nix @@ -0,0 +1,78 @@ +{ lib +, fetchgit +, stdenv +, rtl +, verilator +, enable-trace ? true +, zlib +}: + +let + rocket-chip-v-src = fetchgit { + url = "https://github.com/chipsalliance/rocket-chip.git"; + rev = "833385404d9c722bdfad3e453c19a3ac6f40dbf0"; + fetchSubmodules = false; + sparseCheckout = [ + "src/main/resources/vsrc" + ]; + hash = "sha256-CUq9VDwb7ZtclosgOWfDZMOpH+U/yBjL5CNiXZRiB80="; + }; +in +stdenv.mkDerivation { + name = "t1rocket-verilated"; + + src = rtl; + + nativeBuildInputs = [ verilator ]; + + propagatedBuildInputs = lib.optionals enable-trace [ zlib ]; + + env.rocketChipVSrc = "${rocket-chip-v-src}/src/main/resources/vsrc/"; + + buildPhase = '' + runHook preBuild + + echo "[nix] running verilator" + # FIXME: fix all the warning and remove -Wno- flag here + verilator \ + -I"$rocketChipVSrc" \ + ${lib.optionalString enable-trace "--trace-fst"} \ + --timing \ + --threads 8 \ + --threads-max-mtasks 8000 \ + -O1 \ + -Wno-WIDTHEXPAND \ + -Wno-LATCH \ + --cc TestBench + + echo "[nix] building verilated C lib" + + # backup srcs + mkdir -p $out/share + cp -r obj_dir $out/share/verilated_src + + rm $out/share/verilated_src/*.dat + + # We can't use -C here because VTestBench.mk is generated with relative path + cd obj_dir + make -j "$NIX_BUILD_CORES" -f VTestBench.mk libVTestBench + + runHook postBuild + ''; + + hardeningDisable = [ "fortify" ]; + + passthru = { + inherit enable-trace rocket-chip-v-src; + }; + + installPhase = '' + runHook preInstall + + mkdir -p $out/include $out/lib + cp *.h $out/include + cp *.a $out/lib + + runHook postInstall + ''; +} diff --git a/t1rocketemu/offline/Cargo.toml b/t1rocketemu/offline/Cargo.toml new file mode 100644 index 000000000..2824a161e --- /dev/null +++ b/t1rocketemu/offline/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "offline" +version = "0.1.0" +edition = "2021" + +[dependencies] +clap = { workspace = true } +tracing = { workspace = true } +tracing-subscriber = { workspace = true } +anyhow = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +num-bigint = { workspace = true } + +libloading = "0.8.1" +xmas-elf = "0.9.1" + +common = { path = "../test_common" } +spike_rs = { path = "../spike_rs" } diff --git a/t1rocketemu/offline/src/difftest.rs b/t1rocketemu/offline/src/difftest.rs new file mode 100644 index 000000000..5e59ca60f --- /dev/null +++ b/t1rocketemu/offline/src/difftest.rs @@ -0,0 +1,94 @@ +use common::spike_runner::SpikeRunner; +use std::path::Path; +use tracing::info; + +use common::rtl_config::RTLConfig; +use common::CommonArgs; + +use crate::dut::Dut; +use crate::json_events::*; + +pub struct Difftest { + runner: SpikeRunner, + dut: Dut, + + #[allow(dead_code)] + config: RTLConfig, +} + +impl Difftest { + pub fn new(args: CommonArgs) -> Self { + let config = RTLConfig { vlen: args.vlen, dlen: args.dlen }; + Self { + runner: SpikeRunner::new(&args, true), + dut: Dut::new(Path::new( + &args.log_file.expect("difftest must be run with a log file"), + )), + config, + } + } + + pub fn diff(&mut self) -> anyhow::Result<()> { + self.runner.check_and_clear_fence(); + + let event = self.dut.step()?; + + match event { + JsonEvents::SimulationStart { cycle } => { + self.runner.cycle = *cycle; + Ok(()) + } + JsonEvents::SimulationStop { reason, cycle } => { + info!("simulation stopped at cycle {}, reason {}", cycle, reason); + self.runner.cycle = *cycle; + Ok(()) + } + JsonEvents::RegWrite { idx, data, cycle } => { + self.runner.cycle = *cycle; + self.runner.peek_reg_write(&RegWriteEvent { idx: *idx, data: *data, cycle: *cycle }) + } + JsonEvents::Issue { idx, cycle } => { + self.runner.cycle = *cycle; + self.runner.peek_issue(&IssueEvent { idx: *idx, cycle: *cycle }) + } + JsonEvents::MemoryWrite { mask, data, lsu_idx, address, cycle } => { + self.runner.cycle = *cycle; + self.runner.peek_memory_write(&MemoryWriteEvent { + mask: mask.clone(), + data: data.clone(), + lsu_idx: *lsu_idx, + address: *address, + cycle: *cycle, + }) + } + JsonEvents::LsuEnq { enq, cycle } => { + self.runner.cycle = *cycle; + self.runner.update_lsu_idx(&LsuEnqEvent { enq: *enq, cycle: *cycle }) + } + JsonEvents::VrfWrite { issue_idx, vd, offset, mask, data, lane, cycle } => { + self.runner.cycle = *cycle; + self.runner.peek_vrf_write(&VrfWriteEvent { + issue_idx: *issue_idx, + vd: *vd, + offset: *offset, + mask: mask.clone(), + data: data.clone(), + lane: *lane, + cycle: *cycle, + }) + } + JsonEvents::CheckRd { data, issue_idx, cycle } => { + self.runner.cycle = *cycle; + self.runner.check_rd(&CheckRdEvent { data: *data, issue_idx: *issue_idx, cycle: *cycle }) + } + JsonEvents::VrfScoreboard { count, issue_idx, cycle } => { + self.runner.cycle = *cycle; + self.runner.vrf_scoreboard(&VrfScoreboardEvent { + count: *count, + issue_idx: *issue_idx, + cycle: *cycle, + }) + } + } + } +} diff --git a/t1rocketemu/offline/src/dut.rs b/t1rocketemu/offline/src/dut.rs new file mode 100644 index 000000000..a4cc80821 --- /dev/null +++ b/t1rocketemu/offline/src/dut.rs @@ -0,0 +1,48 @@ +use anyhow::Context; +use std::io::BufRead; +use std::path::Path; + +use crate::json_events::JsonEvents; + +#[derive(Debug)] +pub struct Dut { + events: Vec, + idx: u32, +} + +impl Dut { + fn read_json(path: &Path) -> anyhow::Result> { + let file = std::fs::File::open(path).unwrap(); + let reader = std::io::BufReader::new(file); + + let mut events = Vec::new(); + + for (i, line) in reader.lines().enumerate() { + let line = line.expect("line read error"); + if line.starts_with("{") { + // ignore illegal lines + let event: JsonEvents = serde_json::from_str(&line) + .with_context(|| format!("parsing {} line {}", path.display(), i + 1))?; + events.push(event); + } + } + + Ok(events) + } + + pub fn new(path: &Path) -> Self { + let events = Self::read_json(path).unwrap(); + let idx = 0; + Self { events, idx } + } + + pub fn step(&mut self) -> anyhow::Result<&JsonEvents> { + let event = match self.events.get(self.idx as usize) { + Some(event) => event, + None => return Err(anyhow::anyhow!("no more events")), + }; + self.idx += 1; + + Ok(event) + } +} diff --git a/t1rocketemu/offline/src/json_events.rs b/t1rocketemu/offline/src/json_events.rs new file mode 100644 index 000000000..585c5372a --- /dev/null +++ b/t1rocketemu/offline/src/json_events.rs @@ -0,0 +1,431 @@ +use common::spike_runner::SpikeRunner; +use num_bigint::BigUint; +use serde::{Deserialize, Deserializer}; +use spike_rs::spike_event::LSU_IDX_DEFAULT; +use tracing::{debug, info}; + +fn str_to_vec_u8<'de, D>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let s: &str = Deserialize::deserialize(deserializer)?; + let bigint = BigUint::parse_bytes(s.trim_start().as_bytes(), 16) + .ok_or_else(|| serde::de::Error::custom("Failed to parse BigUint from hex string"))?; + Ok(bigint.to_bytes_le()) +} + +fn str_to_vec_bool<'de, D>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let s: &str = Deserialize::deserialize(deserializer)?; + let bigint = BigUint::parse_bytes(s.trim_start().as_bytes(), 16) + .ok_or_else(|| serde::de::Error::custom("Failed to parse BigUint from hex string"))?; + let bytes = bigint.to_bytes_le(); + let bools = bytes.iter().flat_map(|byte| (0..8).map(move |i| (byte >> i) & 1u8 == 1u8)).collect(); + + Ok(bools) +} + +fn str_to_u32<'de, D>(deserializer: D) -> Result +where + D: Deserializer<'de>, +{ + let s: &str = Deserialize::deserialize(deserializer)?; + let value = + u32::from_str_radix(s.trim_start_matches(' '), 16).map_err(serde::de::Error::custom)?; + + Ok(value) +} + +fn mask_display(mask: &Vec) -> String { + mask.into_iter().map(|&b| if b { '1' } else { '0' }).collect() +} + +#[derive(Deserialize, Debug)] +#[serde(tag = "event")] +pub(crate) enum JsonEvents { + SimulationStart { + cycle: u64, + }, + SimulationStop { + reason: u8, + cycle: u64, + }, + RegWrite { + idx: u8, + #[serde(deserialize_with = "str_to_u32", default)] + data: u32, + cycle: u64, + }, + Issue { + idx: u8, + cycle: u64, + }, + LsuEnq { + enq: u32, + cycle: u64, + }, + VrfWrite { + issue_idx: u8, + vd: u32, + offset: u32, + #[serde(deserialize_with = "str_to_vec_bool", default)] + mask: Vec, + #[serde(deserialize_with = "str_to_vec_u8", default)] + data: Vec, + lane: u32, + cycle: u64, + }, + MemoryWrite { + #[serde(deserialize_with = "str_to_vec_bool", default)] + mask: Vec, + #[serde(deserialize_with = "str_to_vec_u8", default)] + data: Vec, + lsu_idx: u8, + #[serde(deserialize_with = "str_to_u32", default)] + address: u32, + cycle: u64, + }, + CheckRd { + #[serde(deserialize_with = "str_to_u32", default)] + data: u32, + issue_idx: u8, + cycle: u64, + }, + VrfScoreboard { + count: u32, + issue_idx: u8, + cycle: u64, + }, +} + +pub struct RegWriteEvent { + pub idx: u8, + pub data: u32, + pub cycle: u64, +} + +pub struct IssueEvent { + pub idx: u8, + pub cycle: u64, +} + +pub struct LsuEnqEvent { + pub enq: u32, + pub cycle: u64, +} + +pub struct VrfWriteEvent { + pub lane: u32, + pub vd: u32, + pub offset: u32, + pub mask: Vec, + pub data: Vec, + pub issue_idx: u8, + pub cycle: u64, +} + +pub struct MemoryWriteEvent { + pub mask: Vec, + pub data: Vec, + pub lsu_idx: u8, + pub address: u32, + pub cycle: u64, +} + +pub struct VrfScoreboardEvent { + pub count: u32, + pub issue_idx: u8, + pub cycle: u64, +} + +pub struct CheckRdEvent { + pub data: u32, + pub issue_idx: u8, + pub cycle: u64, +} + +pub(crate) trait JsonEventRunner { + fn peek_reg_write(&mut self, reg_write: &RegWriteEvent) -> anyhow::Result<()>; + + fn peek_issue(&mut self, issue: &IssueEvent) -> anyhow::Result<()>; + + fn update_lsu_idx(&mut self, lsu_enq: &LsuEnqEvent) -> anyhow::Result<()>; + + fn peek_vrf_write(&mut self, vrf_write: &VrfWriteEvent) -> anyhow::Result<()>; + + fn vrf_scoreboard(&mut self, vrf_scoreboard: &VrfScoreboardEvent) -> anyhow::Result<()>; + + fn peek_memory_write(&mut self, memory_write: &MemoryWriteEvent) -> anyhow::Result<()>; + + fn check_and_clear_fence(&mut self); + + fn check_rd(&mut self, check_rd: &CheckRdEvent) -> anyhow::Result<()>; + + fn retire(&mut self, cycle: u64, issue_idx: u8) -> anyhow::Result<()>; +} + +impl JsonEventRunner for SpikeRunner { + fn peek_reg_write(&mut self, reg_write: &RegWriteEvent) -> anyhow::Result<()> { + let cycle = reg_write.cycle; + let idx = reg_write.idx; + let data = reg_write.data; + + let se = self.find_reg_write(); + + info!( + "[{cycle}] RegWrite: inst ({}) check reg write idx={idx}, data={data:08x}", + se.describe_insn() + ); + + assert_eq!(idx as u32, se.rd_idx, "idx should be equal to se.rd_idx"); + assert_eq!(data, se.rd_bits, "data should be equal to se.rd_bits"); + Ok(()) + } + + fn peek_issue(&mut self, issue: &IssueEvent) -> anyhow::Result<()> { + self.find_v_se_to_issue(); // ensure the front of queue is a new un-issued se + let se = self.commit_queue.front_mut().unwrap(); + if se.is_vfence() { + return Ok(()); + } + + se.issue_idx = issue.idx as u8; + + info!( + "[{}] Issue: issue_idx={}, pc={:#x}, inst={}", + issue.cycle, issue.idx, se.pc, se.disasm + ); + + Ok(()) + } + + fn update_lsu_idx(&mut self, lsu_enq: &LsuEnqEvent) -> anyhow::Result<()> { + let enq = lsu_enq.enq; + assert!(enq > 0, "enq should be greater than 0"); + let cycle = lsu_enq.cycle; + + if let Some(se) = self + .commit_queue + .iter_mut() + .rev() + .find(|se| (se.is_vload() || se.is_vstore()) && se.lsu_idx == LSU_IDX_DEFAULT) + { + let index = enq.trailing_zeros() as u8; + se.lsu_idx = index; + info!( + "[{cycle}] UpdateLSUIdx: instr ({}) is allocated with lsu_idx: {index}", + se.describe_insn() + ); + } + Ok(()) + } + + fn peek_vrf_write(&mut self, vrf_write: &VrfWriteEvent) -> anyhow::Result<()> { + let cycle = vrf_write.cycle; + let vlen_in_bytes = self.vlen / 8; + let lane_number = self.dlen / 32; + let record_idx_base = (vrf_write.vd * vlen_in_bytes + + (vrf_write.lane + lane_number * vrf_write.offset) * 4) as usize; + + let mut retire_issue: Option = None; + + if let Some(se) = + self.commit_queue.iter_mut().rev().find(|se| se.issue_idx == vrf_write.issue_idx) + { + debug!( + "[{}] VrfWrite: lane={}, vd={}, idx_base={}, issue_idx={}, offset={}, mask={}, data={:x?} ({})", + vrf_write.cycle, + vrf_write.lane, + record_idx_base, + vrf_write.vd, + vrf_write.issue_idx, + vrf_write.offset, + mask_display(&vrf_write.mask), + vrf_write.data, + se.describe_insn() + ); + + if let Some(unretired_writes) = se.vrf_access_record.unretired_writes { + assert!( + unretired_writes > 0, + "[{}] unretired_writes should be greater than 0, issue_idx={} ({})", + vrf_write.cycle, + vrf_write.issue_idx, + se.describe_insn() + ); + if unretired_writes == 1 { + retire_issue = Some(vrf_write.issue_idx); + } + se.vrf_access_record.unretired_writes = Some(unretired_writes - 1); + } else { + se.vrf_access_record.retired_writes += 1; + } + + vrf_write.mask.iter().enumerate().filter(|(_, &mask)| mask).for_each(|(offset, _)| { + let written_byte = *vrf_write.data.get(offset).unwrap_or(&0); + + if let Some(record) = se.vrf_access_record.all_writes.get_mut(&(record_idx_base + offset)) { + assert_eq!( + record.byte, + written_byte, + "[{}] {offset}th byte incorrect ({:02x} record != {written_byte:02x} written) \ + for vrf write (lane={}, vd={}, offset={}, mask={}, data={:x?}) \ + issue_idx={} [vrf_idx={}] (disasm: {}, pc: {:#x}, bits: {:#x})", + vrf_write.cycle, + record.byte, + vrf_write.lane, + vrf_write.vd, + vrf_write.offset, + mask_display(&vrf_write.mask), + vrf_write.data, + se.issue_idx, + record_idx_base + offset, + se.disasm, + se.pc, + se.inst_bits + ); + record.executed = true; + } else { + debug!( + "[{}] cannot find vrf write record, maybe not changed (lane={}, vd={}, idx={}, offset={}, mask={}, data={:x?})", + vrf_write.cycle, + vrf_write.lane, + vrf_write.vd, + record_idx_base + offset, + vrf_write.offset, + mask_display(&vrf_write.mask), + vrf_write.data + ); + } + }) + } else { + info!( + "[{cycle}] RecordRFAccess: rtl detect vrf write on lane={}, vd={} \ + with no matched se (issue_idx={}), \ + maybe from committed load insn", + vrf_write.lane, vrf_write.vd, vrf_write.issue_idx + ); + } + + if let Some(issue_idx) = retire_issue { + self.retire(cycle, issue_idx).unwrap(); + } + + Ok(()) + } + + fn peek_memory_write(&mut self, memory_write: &MemoryWriteEvent) -> anyhow::Result<()> { + let data = memory_write.data.to_owned(); + let mask = memory_write.mask.to_owned(); + let cycle = memory_write.cycle; + let base_addr = memory_write.address; + let lsu_idx = memory_write.lsu_idx; + + if let Some(se) = self.commit_queue.iter_mut().find(|se| se.lsu_idx == lsu_idx) { + info!("[{cycle}] MemoryWrite: address={base_addr:08x}, size={}, data={data:x?}, mask={}, pc = {:#x}, disasm = {}", data.len(), mask_display(&mask), se.pc, se.disasm); + // compare with spike event record + mask.iter().enumerate() + .filter(|(_, &mask)| mask) + .for_each(|(offset, _)| { + let byte_addr = base_addr + offset as u32; + let data_byte = *data.get(offset).unwrap_or(&0); + let mem_write = + se.mem_access_record.all_writes.get_mut(&byte_addr).unwrap_or_else(|| { + panic!("[{cycle}] cannot find mem write of byte_addr {byte_addr:08x}") + }); + let single_mem_write_val = mem_write.writes[mem_write.num_completed_writes].val; + mem_write.num_completed_writes += 1; + assert_eq!(single_mem_write_val, data_byte, "[{cycle}] expect mem write of byte {single_mem_write_val:02X}, actual byte {data_byte:02X} (byte_addr={byte_addr:08X}, pc = {:#x}, disasm = {})", se.pc, se.disasm); + }); + return Ok(()); + } + + panic!("[{cycle}] cannot find se with instruction lsu_idx={lsu_idx}") + } + + fn vrf_scoreboard(&mut self, vrf_scoreboard: &VrfScoreboardEvent) -> anyhow::Result<()> { + let count = vrf_scoreboard.count; + let issue_idx = vrf_scoreboard.issue_idx; + let cycle = vrf_scoreboard.cycle; + + let mut should_retire: Option = None; + + if let Some(se) = self.commit_queue.iter_mut().rev().find(|se| se.issue_idx == issue_idx) { + assert!( + se.vrf_access_record.retired_writes <= count, + "[{cycle}] retired_writes({}) should be less than count({count}), issue_idx={issue_idx} ({})", + se.vrf_access_record.retired_writes, se.describe_insn() + ); + + // if instruction writes rd, it will retire in check_rd() + if count == se.vrf_access_record.retired_writes && !se.is_rd_written { + should_retire = Some(issue_idx); + } + // if all writes are committed, retire the se + se.vrf_access_record.unretired_writes = Some(count - se.vrf_access_record.retired_writes); + + info!( + "[{cycle}] VrfScoreboardReport: count={count}, issue_idx={issue_idx}, retired={} ({})", + se.vrf_access_record.retired_writes, + se.describe_insn() + ); + } else { + panic!("[{cycle}] cannot find se with instruction issue_idx={issue_idx}"); + } + + if let Some(issue_idx) = should_retire { + self.retire(cycle, issue_idx).unwrap(); + } + + Ok(()) + } + + /// after update, if instructions before fence are cleared, fence is also cleared + fn check_and_clear_fence(&mut self) { + if !self.commit_queue.is_empty() { + let se = self.commit_queue.back().unwrap(); + + if se.is_vfence() && self.commit_queue.len() == 1 { + self.commit_queue.pop_back(); + } + } + } + + fn check_rd(&mut self, check_rd: &CheckRdEvent) -> anyhow::Result<()> { + let data = check_rd.data; + let cycle = check_rd.cycle; + let issue_idx = check_rd.issue_idx; + + let se = + self.commit_queue.iter_mut().find(|se| se.issue_idx == issue_idx).unwrap_or_else(|| { + panic!("[{cycle}] cannot find se with instruction issue_idx={issue_idx}") + }); + + info!("[{cycle}] CheckRd: issue_idx={issue_idx}, data={data:x?}"); + + se.check_rd(data).expect("Failed to check_rd"); + + self.retire(cycle, issue_idx).unwrap(); + + Ok(()) + } + + fn retire(&mut self, cycle: u64, issue_idx: u8) -> anyhow::Result<()> { + if let Some(idx) = self.commit_queue.iter().position(|se| se.issue_idx == issue_idx) { + if let Some(se) = self.commit_queue.remove(idx) { + info!( + "[{cycle}] Retire: retire se with issue_idx={issue_idx}, ({})", + se.describe_insn() + ); + se.check_is_ready_for_commit(cycle).unwrap(); + } else { + panic!("[{cycle}] Retire: cannot remove se with instruction issue_idx={issue_idx}") + } + } else { + panic!("[{cycle}] Retire: cannot find se with instruction issue_idx={issue_idx}") + } + Ok(()) + } +} diff --git a/t1rocketemu/offline/src/main.rs b/t1rocketemu/offline/src/main.rs new file mode 100644 index 000000000..0328e2cf3 --- /dev/null +++ b/t1rocketemu/offline/src/main.rs @@ -0,0 +1,57 @@ +mod difftest; +mod dut; +mod json_events; + +use clap::Parser; +use tracing::info; + +use common::spike_runner::SpikeRunner; +use common::CommonArgs; + +use crate::difftest::Difftest; + +fn run_spike(args: &CommonArgs) -> anyhow::Result<()> { + let mut count: u64 = 0; + + let spike = SpikeRunner::new(args, true); + loop { + count += 1; + if count % 1000000 == 0 { + info!("count = {}", count); + } + match spike.exec() { + Ok(_) => {} + Err(_) => { + info!("total v instrucions count = {}", count); + info!("Simulation quit graceful"); + return Ok(()); + } + }; + } +} + +fn main() -> anyhow::Result<()> { + // parse args + let args = CommonArgs::parse(); + + args.setup_logger()?; + + // if there is no log file, just run spike and quit + if args.log_file.is_none() { + run_spike(&args)?; + return Ok(()); + } + + // if there is a log file, run difftest + let mut diff = Difftest::new(args); + + loop { + match diff.diff() { + Ok(_) => {} + Err(e) => { + info!("Simulation quit/error with {}", e); + return Ok(()); + } + } + } +} diff --git a/t1rocketemu/online_dpi/Cargo.toml b/t1rocketemu/online_dpi/Cargo.toml new file mode 100644 index 000000000..5d2cb5f5d --- /dev/null +++ b/t1rocketemu/online_dpi/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "online_dpi" +edition = "2021" +version.workspace = true + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +common = { path = "../test_common" } +spike_rs = { path = "../spike_rs" } +clap = { workspace = true } +tracing = { workspace = true } +tracing-subscriber = { workspace = true } +anyhow = { workspace = true } + +elf = "0.7.4" +hex = "0.4.3" + +[features] +sv2023 = [] +svvpi = [] +trace = [] diff --git a/t1rocketemu/online_dpi/src/dpi.rs b/t1rocketemu/online_dpi/src/dpi.rs new file mode 100644 index 000000000..5d2599942 --- /dev/null +++ b/t1rocketemu/online_dpi/src/dpi.rs @@ -0,0 +1,337 @@ +#![allow(non_snake_case)] +#![allow(unused_variables)] + +use clap::Parser; +use std::ffi::{c_char, c_longlong, CString}; +use std::sync::Mutex; +use tracing::debug; + +use crate::drive::Driver; +use crate::svdpi::SvScope; +use crate::OfflineArgs; + +pub type SvBitVecVal = u32; + +// -------------------------- +// preparing data structures +// -------------------------- + +static DPI_TARGET: Mutex>> = Mutex::new(None); + +pub(crate) struct AxiReadPayload { + pub(crate) data: Vec, +} + +unsafe fn write_to_pointer(dst: *mut u8, data: &[u8]) { + let dst = std::slice::from_raw_parts_mut(dst, data.len()); + dst.copy_from_slice(data); +} + +unsafe fn fill_axi_read_payload(dst: *mut SvBitVecVal, dlen: u32, payload: &AxiReadPayload) { + let data_len = 256 * (dlen / 8) as usize; + assert!(payload.data.len() <= data_len); + write_to_pointer(dst as *mut u8, &payload.data); +} + +// Return (strobe in bit, data in byte) +unsafe fn load_from_payload( + payload: &*const SvBitVecVal, + data_width: usize, + dlen: usize, +) -> (Vec, &[u8]) { + let src = *payload as *mut u8; + let data_width_in_byte = dlen / 8; + let strb_width_in_byte = dlen / data_width; + let payload_size_in_byte = strb_width_in_byte + data_width_in_byte; // data width in byte + let byte_vec = std::slice::from_raw_parts(src, payload_size_in_byte); + let strobe = &byte_vec[0..strb_width_in_byte]; + let data = &byte_vec[strb_width_in_byte..]; + + let strb_width_in_bit = data_width / 8; + let masks: Vec = strobe + .into_iter() + .flat_map(|strb| { + let mask: Vec = (0..strb_width_in_bit).map(|i| (strb & (1 << i)) != 0).collect(); + mask + }) + .collect(); + assert!( + masks.len() == data.len(), + "strobe bit width is not aligned with data byte width" + ); + + debug!( + "load {payload_size_in_byte} byte from payload: raw_data={} strb={} data={}", + hex::encode(byte_vec), + hex::encode(strobe), + hex::encode(data), + ); + + (masks, data) +} + +//---------------------- +// dpi functions +//---------------------- + +/// evaluate after AW and W is finished at corresponding channel_id. +#[no_mangle] +unsafe extern "C" fn axi_write_highBandwidthAXI( + channel_id: c_longlong, + awid: c_longlong, + awaddr: c_longlong, + awlen: c_longlong, + awsize: c_longlong, + awburst: c_longlong, + awlock: c_longlong, + awcache: c_longlong, + awprot: c_longlong, + awqos: c_longlong, + awregion: c_longlong, + // struct packed {bit [255:0][DLEN:0] data; + // bit [255:0][DLEN/8:0] strb; } payload + payload: *const SvBitVecVal, +) { + debug!( + "axi_write_highBandwidth (channel_id={channel_id}, awid={awid}, awaddr={awaddr:#x}, \ + awlen={awlen}, awsize={awsize}, awburst={awburst}, awlock={awlock}, awcache={awcache}, \ + awprot={awprot}, awqos={awqos}, awregion={awregion})" + ); + let mut driver = DPI_TARGET.lock().unwrap(); + let driver = driver.as_mut().unwrap(); + let data_width = 32; // TODO: get from driver + let (strobe, data) = load_from_payload(&payload, 32, driver.dlen as usize); + driver.axi_write_high_bandwidth(awaddr as u32, awsize as u64, &strobe, data); +} + +/// evaluate at AR fire at corresponding channel_id. +#[no_mangle] +unsafe extern "C" fn axi_read_highBandwidthAXI( + channel_id: c_longlong, + arid: c_longlong, + araddr: c_longlong, + arlen: c_longlong, + arsize: c_longlong, + arburst: c_longlong, + arlock: c_longlong, + arcache: c_longlong, + arprot: c_longlong, + arqos: c_longlong, + arregion: c_longlong, + // struct packed {bit [255:0][DLEN:0] data; byte beats; } payload + payload: *mut SvBitVecVal, +) { + debug!( + "axi_read_highBandwidth (channel_id={channel_id}, arid={arid}, araddr={araddr:#x}, \ + arlen={arlen}, arsize={arsize}, arburst={arburst}, arlock={arlock}, arcache={arcache}, \ + arprot={arprot}, arqos={arqos}, arregion={arregion})" + ); + let mut driver = DPI_TARGET.lock().unwrap(); + let driver = driver.as_mut().unwrap(); + let response = driver.axi_read_high_bandwidth(araddr as u32, arsize as u64); + fill_axi_read_payload(payload, driver.dlen, &response); +} + +/// evaluate after AW and W is finished at corresponding channel_id. +#[no_mangle] +unsafe extern "C" fn axi_write_highOutstandingAXI( + channel_id: c_longlong, + awid: c_longlong, + awaddr: c_longlong, + awlen: c_longlong, + awsize: c_longlong, + awburst: c_longlong, + awlock: c_longlong, + awcache: c_longlong, + awprot: c_longlong, + awqos: c_longlong, + awregion: c_longlong, + // struct packed {bit [255:0][31:0] data; bit [255:0][3:0] strb; } payload + payload: *const SvBitVecVal, +) { + debug!( + "axi_write_high_outstanding (channel_id={channel_id}, awid={awid}, awaddr={awaddr:#x}, \ + awlen={awlen}, awsize={awsize}, awburst={awburst}, awlock={awlock}, awcache={awcache}, \ + awprot={awprot}, awqos={awqos}, awregion={awregion})" + ); + let mut driver = DPI_TARGET.lock().unwrap(); + let driver = driver.as_mut().unwrap(); + let data_width = 32; // TODO: get from driver + let (strobe, data) = load_from_payload(&payload, data_width, 32); + driver.axi_write_high_outstanding(awaddr as u32, awsize as u64, &strobe, data); +} + +/// evaluate at AR fire at corresponding channel_id. +#[no_mangle] +unsafe extern "C" fn axi_read_highOutstandingAXI( + channel_id: c_longlong, + arid: c_longlong, + araddr: c_longlong, + arlen: c_longlong, + arsize: c_longlong, + arburst: c_longlong, + arlock: c_longlong, + arcache: c_longlong, + arprot: c_longlong, + arqos: c_longlong, + arregion: c_longlong, + // struct packed {bit [255:0][DLEN:0] data; byte beats; } payload + payload: *mut SvBitVecVal, +) { + debug!( + "axi_read_high_outstanding (channel_id={channel_id}, arid={arid}, araddr={araddr:#x}, \ + arlen={arlen}, arsize={arsize}, arburst={arburst}, arlock={arlock}, arcache={arcache}, \ + arprot={arprot}, arqos={arqos}, arregion={arregion})" + ); + let mut driver = DPI_TARGET.lock().unwrap(); + let driver = driver.as_mut().unwrap(); + let response = driver.axi_read_high_outstanding(araddr as u32, arsize as u64); + fill_axi_read_payload(payload, driver.dlen, &response); +} + +#[no_mangle] +unsafe extern "C" fn axi_write_loadStoreAXI( + channel_id: c_longlong, + awid: c_longlong, + awaddr: c_longlong, + awlen: c_longlong, + awsize: c_longlong, + awburst: c_longlong, + awlock: c_longlong, + awcache: c_longlong, + awprot: c_longlong, + awqos: c_longlong, + awregion: c_longlong, + payload: *const SvBitVecVal, +) { + debug!( + "axi_write_loadStore (channel_id={channel_id}, awid={awid}, awaddr={awaddr:#x}, \ + awlen={awlen}, awsize={awsize}, awburst={awburst}, awlock={awlock}, awcache={awcache}, \ + awprot={awprot}, awqos={awqos}, awregion={awregion})" + ); + let mut driver = DPI_TARGET.lock().unwrap(); + let driver = driver.as_mut().unwrap(); + let data_width = 32; // TODO: get from sim + let (strobe, data) = load_from_payload(&payload, data_width, driver.dlen as usize); + driver.axi_write_load_store(awaddr as u32, awsize as u64, &strobe, data); +} + +#[no_mangle] +unsafe extern "C" fn axi_read_loadStoreAXI( + channel_id: c_longlong, + arid: c_longlong, + araddr: c_longlong, + arlen: c_longlong, + arsize: c_longlong, + arburst: c_longlong, + arlock: c_longlong, + arcache: c_longlong, + arprot: c_longlong, + arqos: c_longlong, + arregion: c_longlong, + payload: *mut SvBitVecVal, +) { + debug!( + "axi_read_loadStoreAXI (channel_id={channel_id}, arid={arid}, araddr={araddr:#x}, \ + arlen={arlen}, arsize={arsize}, arburst={arburst}, arlock={arlock}, arcache={arcache}, \ + arprot={arprot}, arqos={arqos}, arregion={arregion})" + ); + let mut driver = DPI_TARGET.lock().unwrap(); + let driver = driver.as_mut().unwrap(); + let response = driver.axi_read_load_store(araddr as u32, arsize as u64); + fill_axi_read_payload(payload, driver.dlen, &response); +} + +#[no_mangle] +unsafe extern "C" fn axi_read_instructionFetchAXI( + channel_id: c_longlong, + arid: c_longlong, + araddr: c_longlong, + arlen: c_longlong, + arsize: c_longlong, + arburst: c_longlong, + arlock: c_longlong, + arcache: c_longlong, + arprot: c_longlong, + arqos: c_longlong, + arregion: c_longlong, + payload: *mut SvBitVecVal, +) { + debug!( + "axi_read_instructionFetchAXI (channel_id={channel_id}, arid={arid}, araddr={araddr:#x}, \ + arlen={arlen}, arsize={arsize}, arburst={arburst}, arlock={arlock}, arcache={arcache}, \ + arprot={arprot}, arqos={arqos}, arregion={arregion})" + ); + let mut driver = DPI_TARGET.lock().unwrap(); + let driver = driver.as_mut().unwrap(); + let response = driver.axi_read_instruction_fetch(araddr as u32, arsize as u64); + fill_axi_read_payload(payload, driver.dlen, &response); +} + +#[no_mangle] +unsafe extern "C" fn t1rocket_cosim_init() { + let args = OfflineArgs::parse(); + args.common_args.setup_logger().unwrap(); + + let scope = SvScope::get_current().expect("failed to get scope in t1rocket_cosim_init"); + + let driver = Box::new(Driver::new(scope, &args)); + let mut dpi_target = DPI_TARGET.lock().unwrap(); + assert!( + dpi_target.is_none(), + "t1rocket_cosim_init should be called only once" + ); + *dpi_target = Some(driver); +} + +/// evaluate at every 1024 cycles, return reason = 0 to continue simulation, +/// other value is used as error code. +#[no_mangle] +unsafe extern "C" fn cosim_watchdog(reason: *mut c_char) { + // watchdog dpi call would be called before initialization, guard on null target + let mut driver = DPI_TARGET.lock().unwrap(); + if let Some(driver) = driver.as_mut() { + *reason = driver.watchdog() as c_char + } +} + +#[no_mangle] +unsafe extern "C" fn get_resetvector(resetvector: *mut c_longlong) { + let mut driver = DPI_TARGET.lock().unwrap(); + if let Some(driver) = driver.as_mut() { + *resetvector = driver.e_entry as c_longlong + } +} + +//-------------------------------- +// import functions and wrappers +//-------------------------------- + +mod dpi_export { + use std::ffi::c_char; + extern "C" { + #[cfg(feature = "trace")] + /// `export "DPI-C" function dump_wave(input string file)` + pub fn dump_wave(path: *const c_char); + + /// 'export "DPI-C" function quit()' + pub fn quit(); + } +} + +#[cfg(feature = "trace")] +pub(crate) fn dump_wave(scope: crate::svdpi::SvScope, path: &str) { + use crate::svdpi; + let path_cstring = CString::new(path).unwrap(); + + svdpi::set_scope(scope); + unsafe { + dpi_export::dump_wave(path_cstring.as_ptr()); + } +} + +pub(crate) fn quit() { + unsafe { + dpi_export::quit(); + } +} \ No newline at end of file diff --git a/t1rocketemu/online_dpi/src/drive.rs b/t1rocketemu/online_dpi/src/drive.rs new file mode 100644 index 000000000..6a4ff08f6 --- /dev/null +++ b/t1rocketemu/online_dpi/src/drive.rs @@ -0,0 +1,400 @@ +use crate::dpi::*; +use crate::{ get_t, EXIT_CODE, EXIT_POS }; +use crate::svdpi::SvScope; +use crate::OfflineArgs; + +use anyhow::Context; +use common::MEM_SIZE; +use elf::{ + abi::{EM_RISCV, ET_EXEC, PT_LOAD, STT_FUNC}, + endian::LittleEndian, + ElfStream, +}; +use std::collections::HashMap; +use std::os::unix::fs::FileExt; +use std::{fs, path::Path}; +use tracing::{debug, error, info, trace}; + +struct ShadowMem { + mem: Vec, +} + +impl ShadowMem { + pub fn new() -> Self { + Self { mem: vec![0; MEM_SIZE] } + } + + pub fn read_mem(&self, addr: u32, size: u32) -> &[u8] { + let start = addr as usize; + let end = (addr + size) as usize; + &self.mem[start..end] + } + + // size: 1 << arsize + // bus_size: AXI bus width in bytes + // return: Vec with len=bus_size + // if size < bus_size, the result is padded due to AXI narrow transfer rules + pub fn read_mem_axi(&self, addr: u32, size: u32, bus_size: u32) -> Vec { + assert!( + addr % size == 0 && bus_size % size == 0, + "unaligned access addr={addr:#x} size={size}B dlen={bus_size}B" + ); + + let data = self.read_mem(addr, size); + if size < bus_size { + // narrow + let mut data_padded = vec![0; bus_size as usize]; + let start = (addr % bus_size) as usize; + let end = start + data.len(); + data_padded[start..end].copy_from_slice(data); + + data_padded + } else { + // normal + data.to_vec() + } + } + + // size: 1 << awsize + // bus_size: AXI bus width in bytes + // masks: write strokes, len=bus_size + // data: write data, len=bus_size + pub fn write_mem_axi( + &mut self, + addr: u32, + size: u32, + bus_size: u32, + masks: &[bool], + data: &[u8], + ) { + assert!( + addr % size == 0 && bus_size % size == 0, + "unaligned write access addr={addr:#x} size={size}B dlen={bus_size}B" + ); + + // handle strb=0 AXI payload + if !masks.iter().any(|&x| x) { + trace!("Mask 0 write detect"); + return; + } + + // TODO: we do not check strobe is compatible with (addr, awsize) + let addr_align = addr & ((!bus_size) + 1); + + let bus_size = bus_size as usize; + assert_eq!(bus_size, masks.len()); + assert_eq!(bus_size, data.len()); + + for i in 0..bus_size { + if masks[i] { + self.mem[addr_align as usize + i] = data[i]; + } + } + } +} + +#[derive(Debug)] +#[allow(dead_code)] +pub struct FunctionSym { + #[allow(dead_code)] + pub(crate) name: String, + #[allow(dead_code)] + pub(crate) info: u8, +} +pub type FunctionSymTab = HashMap; + +pub(crate) struct Driver { + // SvScope from t1rocket_cosim_init + scope: SvScope, + + #[cfg(feature = "trace")] + wave_path: String, + #[cfg(feature = "trace")] + dump_start: u64, + #[cfg(feature = "trace")] + dump_end: u64, + #[cfg(feature = "trace")] + dump_started: bool, + + pub(crate) dlen: u32, + pub(crate) e_entry: u64, + + timeout: u64, + last_commit_cycle: u64, + + shadow_mem: ShadowMem, +} + +#[cfg(feature = "trace")] +fn parse_range(input: &str) -> (u64, u64) { + if input.is_empty() { + return (0, 0); + } + + let parts: Vec<&str> = input.split(",").collect(); + + if parts.len() != 1 && parts.len() != 2 { + error!("invalid dump wave range: `{input}` was given"); + return (0, 0); + } + + const INVALID_NUMBER: &'static str = "invalid number"; + + if parts.len() == 1 { + return (parts[0].parse().expect(INVALID_NUMBER), 0); + } + + if parts[0].is_empty() { + return (0, parts[1].parse().expect(INVALID_NUMBER)); + } + + let start = parts[0].parse().expect(INVALID_NUMBER); + let end = parts[1].parse().expect(INVALID_NUMBER); + if start > end { + panic!("dump start is larger than end: `{input}`"); + } + + (start, end) +} + +impl Driver { + pub(crate) fn new(scope: SvScope, args: &OfflineArgs) -> Self { + #[cfg(feature = "trace")] + let (dump_start, dump_end) = parse_range(&args.dump_range); + + // pass e_entry to rocket + let (e_entry, shadow_mem, _fn_sym_tab) = + Self::load_elf(&args.common_args.elf_file).expect("fail creating simulator"); + + Self { + scope, + + #[cfg(feature = "trace")] + wave_path: args.wave_path.to_owned(), + #[cfg(feature = "trace")] + dump_start, + #[cfg(feature = "trace")] + dump_end, + #[cfg(feature = "trace")] + dump_started: false, + + dlen: args.common_args.dlen, + e_entry, + + timeout: args.timeout, + last_commit_cycle: 0, + + shadow_mem, + } + } + + pub fn load_elf(path: &Path) -> anyhow::Result<(u64, ShadowMem, FunctionSymTab)> { + let file = fs::File::open(path).with_context(|| "reading ELF file")?; + let mut elf: ElfStream = + ElfStream::open_stream(&file).with_context(|| "parsing ELF file")?; + + if elf.ehdr.e_machine != EM_RISCV { + anyhow::bail!("ELF is not in RISC-V"); + } + + if elf.ehdr.e_type != ET_EXEC { + anyhow::bail!("ELF is not an executable"); + } + + if elf.ehdr.e_phnum == 0 { + anyhow::bail!("ELF has zero size program header"); + } + + debug!("ELF entry: 0x{:x}", elf.ehdr.e_entry); + let mut mem = ShadowMem::new(); + elf.segments().iter().filter(|phdr| phdr.p_type == PT_LOAD).for_each(|phdr| { + let vaddr: usize = phdr.p_vaddr.try_into().expect("fail converting vaddr(u64) to usize"); + let filesz: usize = phdr.p_filesz.try_into().expect("fail converting p_filesz(u64) to usize"); + debug!( + "Read loadable segments 0x{:x}..0x{:x} to memory 0x{:x}", + phdr.p_offset, + phdr.p_offset + filesz as u64, + vaddr + ); + + // Load file start from offset into given mem slice + // The `offset` of the read_at method is relative to the start of the file and thus independent from the current cursor. + let mem_slice = &mut mem.mem[vaddr..vaddr + filesz]; + file.read_at(mem_slice, phdr.p_offset).unwrap_or_else(|err| { + panic!( + "fail reading ELF into mem with vaddr={}, filesz={}, offset={}. Error detail: {}", + vaddr, filesz, phdr.p_offset, err + ) + }); + }); + + // FIXME: now the symbol table doesn't contain any function value + let mut fn_sym_tab = FunctionSymTab::new(); + let symbol_table = + elf.symbol_table().with_context(|| "reading symbol table(SHT_SYMTAB) from ELF")?; + if let Some((parsed_table, string_table)) = symbol_table { + parsed_table + .iter() + // st_symtype = symbol.st_info & 0xf (But why masking here?) + .filter(|sym| sym.st_symtype() == STT_FUNC) + .for_each(|sym| { + let name = string_table + .get(sym.st_name as usize) + .unwrap_or_else(|_| panic!("fail to get name at st_name={}", sym.st_name)); + fn_sym_tab.insert( + sym.st_value, + FunctionSym { name: name.to_string(), info: sym.st_symtype() }, + ); + }); + } else { + debug!("load_elf: symtab not found"); + }; + + Ok((elf.ehdr.e_entry, mem, fn_sym_tab)) + } + + pub(crate) fn axi_read_high_bandwidth(&mut self, addr: u32, arsize: u64) -> AxiReadPayload { + let size = 1 << arsize; + let data = self.shadow_mem.read_mem_axi(addr, size, self.dlen / 8); + let data_hex = hex::encode(&data); + self.last_commit_cycle = get_t(); + trace!( + "[{}] axi_read_high_bandwidth (addr={addr:#x}, size={size}, data={data_hex})", + get_t() + ); + AxiReadPayload { data } + } + + pub(crate) fn axi_write_high_bandwidth( + &mut self, + addr: u32, + awsize: u64, + strobe: &[bool], + data: &[u8], + ) { + let size = 1 << awsize; + self.shadow_mem.write_mem_axi(addr, size, self.dlen / 8, &strobe, data); + let data_hex = hex::encode(data); + self.last_commit_cycle = get_t(); + trace!( + "[{}] axi_write_high_bandwidth (addr={addr:#x}, size={size}, data={data_hex})", + get_t() + ); + } + + pub(crate) fn axi_read_high_outstanding(&mut self, addr: u32, arsize: u64) -> AxiReadPayload { + let size = 1 << arsize; + assert!(size <= 4); + let data = self.shadow_mem.read_mem_axi(addr, size, 4); + let data_hex = hex::encode(&data); + self.last_commit_cycle = get_t(); + trace!( + "[{}] axi_read_high_outstanding (addr={addr:#x}, size={size}, data={data_hex})", + get_t() + ); + AxiReadPayload { data } + } + + pub(crate) fn axi_write_high_outstanding( + &mut self, + addr: u32, + awsize: u64, + strobe: &[bool], + data: &[u8], + ) { + let size = 1 << awsize; + self.shadow_mem.write_mem_axi(addr, size, 4, strobe, data); + let data_hex = hex::encode(data); + self.last_commit_cycle = get_t(); + trace!( + "[{}] axi_write_high_outstanding (addr={addr:#x}, size={size}, data={data_hex})", + get_t() + ); + } + + pub(crate) fn axi_read_load_store(&mut self, addr: u32, arsize: u64) -> AxiReadPayload { + let size = 1 << arsize; + let data = self.shadow_mem.read_mem_axi(addr, size, 32); + let data_hex = hex::encode(&data); + self.last_commit_cycle = get_t(); + trace!( + "[{}] axi_read_load_store (addr={addr:#x}, size={size}, data={data_hex})", + get_t() + ); + AxiReadPayload { data } + } + + pub(crate) fn axi_write_load_store( + &mut self, + addr: u32, + awsize: u64, + strobe: &[bool], + data: &[u8], + ) { + let size = 1 << awsize; + self.shadow_mem.write_mem_axi(addr, size, 32, strobe, data); + let data_hex = hex::encode(data); + self.last_commit_cycle = get_t(); + + // exit with code + if addr == EXIT_POS && data.len() == 4 && data == &EXIT_CODE.to_le_bytes() { + info!("exit successfully"); + quit(); + return; + } + + trace!( + "[{}] axi_write_load_store (addr={addr:#x}, size={size}, data={data_hex})", + get_t() + ); + } + + pub(crate) fn axi_read_instruction_fetch(&mut self, addr: u32, arsize: u64) -> AxiReadPayload { + let size = 1 << arsize; + let data = self.shadow_mem.read_mem_axi(addr, size, 32); + let data_hex = hex::encode(&data); + trace!( + "[{}] axi_read_instruction_fetch (addr={addr:#x}, size={size}, data={data_hex})", + get_t() + ); + AxiReadPayload { data } + } + + pub(crate) fn watchdog(&mut self) -> u8 { + const WATCHDOG_CONTINUE: u8 = 0; + const WATCHDOG_TIMEOUT: u8 = 1; + + let tick = get_t(); + if tick - self.last_commit_cycle > self.timeout { + error!( + "[{}] watchdog timeout (last_commit_cycle={})", + get_t(), + self.last_commit_cycle + ); + WATCHDOG_TIMEOUT + } else { + #[cfg(feature = "trace")] + if self.dump_end != 0 && tick > self.dump_end { + info!( + "[{tick}] run to dump end, exiting (last_commit_cycle={})", + self.last_commit_cycle + ); + return WATCHDOG_TIMEOUT; + } + + #[cfg(feature = "trace")] + if !self.dump_started && tick >= self.dump_start { + self.start_dump_wave(); + self.dump_started = true; + } + + trace!("[{}] watchdog continue", get_t()); + WATCHDOG_CONTINUE + } + } + + #[cfg(feature = "trace")] + fn start_dump_wave(&mut self) { + dump_wave(self.scope, &self.wave_path); + } +} diff --git a/t1rocketemu/online_dpi/src/lib.rs b/t1rocketemu/online_dpi/src/lib.rs new file mode 100644 index 000000000..35a72ec33 --- /dev/null +++ b/t1rocketemu/online_dpi/src/lib.rs @@ -0,0 +1,48 @@ +use clap::Parser; +use common::CommonArgs; + +pub mod dpi; +pub mod drive; +pub mod svdpi; +#[cfg(feature = "svvpi")] +pub mod svvpi; + +#[derive(Parser)] +pub(crate) struct OfflineArgs { + #[command(flatten)] + pub common_args: CommonArgs, + + #[cfg(feature = "trace")] + #[arg(long)] + pub wave_path: String, + + #[cfg(feature = "trace")] + #[arg(long, default_value = "")] + pub dump_range: String, + + #[arg(long, default_value_t = 1000000)] + pub timeout: u64, +} + +// quit signal +const EXIT_POS: u32 = 0x4000_0000; +const EXIT_CODE: u32 = 0xdead_beef; + +// keep in sync with TestBench.ClockGen +pub const CYCLE_PERIOD: u64 = 20; + +/// get cycle +#[cfg(any(feature = "sv2023", feature = "svvpi"))] +pub fn get_t() -> u64 { + get_time() / CYCLE_PERIOD +} + +#[cfg(feature = "sv2023")] +pub fn get_time() -> u64 { + svdpi::get_time() +} + +#[cfg(all(not(feature = "sv2023"), feature = "svvpi"))] +pub fn get_time() -> u64 { + svvpi::get_time() +} \ No newline at end of file diff --git a/t1rocketemu/online_dpi/src/svdpi.rs b/t1rocketemu/online_dpi/src/svdpi.rs new file mode 100644 index 000000000..227626d79 --- /dev/null +++ b/t1rocketemu/online_dpi/src/svdpi.rs @@ -0,0 +1,50 @@ +use std::{ffi::{c_void, CString}, ptr::{self, NonNull}}; + +#[rustfmt::skip] +pub mod sys; + +/// get current simulation time in _simulation time unit_ +#[cfg(feature = "sv2023")] +pub fn get_time() -> u64 { + let mut time = sys::svTimeVal { + type_: sys::sv_sim_time as i32, + high: 0, + low: 0, + real: 0.0, + }; + unsafe { + let ret = sys::svGetTime(ptr::null_mut(), &mut time); + assert!(ret == 0, "svGetTime failed"); + } + + ((time.high as u64) << 32) + (time.low as u64) +} + +pub fn set_scope_by_name(name: &str) { + let name_cstr = CString::new(name).unwrap(); + unsafe { + let scope = sys::svGetScopeFromName(name_cstr.as_ptr()); + assert!(!scope.is_null(), "unrecognized scope `{name}`"); + sys::svSetScope(scope); + } +} + +pub fn set_scope(scope: SvScope) { + unsafe { + sys::svSetScope(scope.ptr.as_ptr()); + } +} + +#[derive(Debug, Clone, Copy)] +pub struct SvScope { + ptr: NonNull, +} + +unsafe impl Send for SvScope {} + +impl SvScope { + pub fn get_current() -> Option { + let ptr = unsafe { sys::svGetScope() }; + NonNull::new(ptr).map(|ptr| Self { ptr }) + } +} diff --git a/t1rocketemu/online_dpi/src/svdpi/sys.rs b/t1rocketemu/online_dpi/src/svdpi/sys.rs new file mode 100644 index 000000000..892d7534b --- /dev/null +++ b/t1rocketemu/online_dpi/src/svdpi/sys.rs @@ -0,0 +1,750 @@ +// modified from `bindgen --allowlist-item 'sv.*' svdpi.h` +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] + +/* automatically generated by rust-bindgen 0.69.4 */ + +pub const sv_0: u32 = 0; +pub const sv_1: u32 = 1; +pub const sv_z: u32 = 2; +pub const sv_x: u32 = 3; +pub const sv_scaled_real_time: u32 = 1; +pub const sv_sim_time: u32 = 2; +pub type svScalar = u8; +pub type svBit = svScalar; +pub type svLogic = svScalar; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct t_vpi_vecval { + pub aval: u32, + pub bval: u32, +} +#[test] +fn bindgen_test_layout_t_vpi_vecval() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 8usize, + concat!("Size of: ", stringify!(t_vpi_vecval)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(t_vpi_vecval)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).aval) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_vecval), + "::", + stringify!(aval) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).bval) as usize - ptr as usize }, + 4usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_vecval), + "::", + stringify!(bval) + ) + ); +} +pub type s_vpi_vecval = t_vpi_vecval; +pub type svLogicVecVal = s_vpi_vecval; +pub type svBitVecVal = u32; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct t_vpi_time { + pub type_: i32, + pub high: u32, + pub low: u32, + pub real: f64, +} +#[test] +fn bindgen_test_layout_t_vpi_time() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 24usize, + concat!("Size of: ", stringify!(t_vpi_time)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(t_vpi_time)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).type_) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_time), + "::", + stringify!(type_) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).high) as usize - ptr as usize }, + 4usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_time), + "::", + stringify!(high) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).low) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_time), + "::", + stringify!(low) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).real) as usize - ptr as usize }, + 16usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_time), + "::", + stringify!(real) + ) + ); +} +pub type s_vpi_time = t_vpi_time; +pub type svTimeVal = s_vpi_time; +extern "C" { + pub fn svDpiVersion() -> *const ::std::os::raw::c_char; +} +pub type svScope = *mut ::std::os::raw::c_void; +pub type svOpenArrayHandle = *mut ::std::os::raw::c_void; +extern "C" { + pub fn svGetBitselBit(s: *const svBitVecVal, i: ::std::os::raw::c_int) -> svBit; +} +extern "C" { + pub fn svGetBitselLogic(s: *const svLogicVecVal, i: ::std::os::raw::c_int) -> svLogic; +} +extern "C" { + pub fn svPutBitselBit(d: *mut svBitVecVal, i: ::std::os::raw::c_int, s: svBit); +} +extern "C" { + pub fn svPutBitselLogic(d: *mut svLogicVecVal, i: ::std::os::raw::c_int, s: svLogic); +} +extern "C" { + pub fn svGetPartselBit( + d: *mut svBitVecVal, + s: *const svBitVecVal, + i: ::std::os::raw::c_int, + w: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetPartselLogic( + d: *mut svLogicVecVal, + s: *const svLogicVecVal, + i: ::std::os::raw::c_int, + w: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutPartselBit( + d: *mut svBitVecVal, + s: svBitVecVal, + i: ::std::os::raw::c_int, + w: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutPartselLogic( + d: *mut svLogicVecVal, + s: svLogicVecVal, + i: ::std::os::raw::c_int, + w: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svLeft(h: svOpenArrayHandle, d: ::std::os::raw::c_int) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn svRight(h: svOpenArrayHandle, d: ::std::os::raw::c_int) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn svLow(h: svOpenArrayHandle, d: ::std::os::raw::c_int) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn svHigh(h: svOpenArrayHandle, d: ::std::os::raw::c_int) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn svIncrement(h: svOpenArrayHandle, d: ::std::os::raw::c_int) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn svSize(h: svOpenArrayHandle, d: ::std::os::raw::c_int) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn svDimensions(h: svOpenArrayHandle) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn svGetArrayPtr(arg1: svOpenArrayHandle) -> *mut ::std::os::raw::c_void; +} +extern "C" { + pub fn svSizeOfArray(arg1: svOpenArrayHandle) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn svGetArrElemPtr( + arg1: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + ... + ) -> *mut ::std::os::raw::c_void; +} +extern "C" { + pub fn svGetArrElemPtr1( + arg1: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + ) -> *mut ::std::os::raw::c_void; +} +extern "C" { + pub fn svGetArrElemPtr2( + arg1: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + ) -> *mut ::std::os::raw::c_void; +} +extern "C" { + pub fn svGetArrElemPtr3( + arg1: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + indx3: ::std::os::raw::c_int, + ) -> *mut ::std::os::raw::c_void; +} +extern "C" { + pub fn svPutBitArrElemVecVal( + d: svOpenArrayHandle, + s: *const svBitVecVal, + indx1: ::std::os::raw::c_int, + ... + ); +} +extern "C" { + pub fn svPutBitArrElem1VecVal( + d: svOpenArrayHandle, + s: *const svBitVecVal, + indx1: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutBitArrElem2VecVal( + d: svOpenArrayHandle, + s: *const svBitVecVal, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutBitArrElem3VecVal( + d: svOpenArrayHandle, + s: *const svBitVecVal, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + indx3: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutLogicArrElemVecVal( + d: svOpenArrayHandle, + s: *const svLogicVecVal, + indx1: ::std::os::raw::c_int, + ... + ); +} +extern "C" { + pub fn svPutLogicArrElem1VecVal( + d: svOpenArrayHandle, + s: *const svLogicVecVal, + indx1: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutLogicArrElem2VecVal( + d: svOpenArrayHandle, + s: *const svLogicVecVal, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutLogicArrElem3VecVal( + d: svOpenArrayHandle, + s: *const svLogicVecVal, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + indx3: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetBitArrElemVecVal( + d: *mut svBitVecVal, + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + ... + ); +} +extern "C" { + pub fn svGetBitArrElem1VecVal( + d: *mut svBitVecVal, + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetBitArrElem2VecVal( + d: *mut svBitVecVal, + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetBitArrElem3VecVal( + d: *mut svBitVecVal, + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + indx3: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetLogicArrElemVecVal( + d: *mut svLogicVecVal, + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + ... + ); +} +extern "C" { + pub fn svGetLogicArrElem1VecVal( + d: *mut svLogicVecVal, + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetLogicArrElem2VecVal( + d: *mut svLogicVecVal, + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetLogicArrElem3VecVal( + d: *mut svLogicVecVal, + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + indx3: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetBitArrElem(s: svOpenArrayHandle, indx1: ::std::os::raw::c_int, ...) -> svBit; +} +extern "C" { + pub fn svGetBitArrElem1(s: svOpenArrayHandle, indx1: ::std::os::raw::c_int) -> svBit; +} +extern "C" { + pub fn svGetBitArrElem2( + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + ) -> svBit; +} +extern "C" { + pub fn svGetBitArrElem3( + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + indx3: ::std::os::raw::c_int, + ) -> svBit; +} +extern "C" { + pub fn svGetLogicArrElem(s: svOpenArrayHandle, indx1: ::std::os::raw::c_int, ...) -> svLogic; +} +extern "C" { + pub fn svGetLogicArrElem1(s: svOpenArrayHandle, indx1: ::std::os::raw::c_int) -> svLogic; +} +extern "C" { + pub fn svGetLogicArrElem2( + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + ) -> svLogic; +} +extern "C" { + pub fn svGetLogicArrElem3( + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + indx3: ::std::os::raw::c_int, + ) -> svLogic; +} +extern "C" { + pub fn svPutLogicArrElem( + d: svOpenArrayHandle, + value: svLogic, + indx1: ::std::os::raw::c_int, + ... + ); +} +extern "C" { + pub fn svPutLogicArrElem1(d: svOpenArrayHandle, value: svLogic, indx1: ::std::os::raw::c_int); +} +extern "C" { + pub fn svPutLogicArrElem2( + d: svOpenArrayHandle, + value: svLogic, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutLogicArrElem3( + d: svOpenArrayHandle, + value: svLogic, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + indx3: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutBitArrElem(d: svOpenArrayHandle, value: svBit, indx1: ::std::os::raw::c_int, ...); +} +extern "C" { + pub fn svPutBitArrElem1(d: svOpenArrayHandle, value: svBit, indx1: ::std::os::raw::c_int); +} +extern "C" { + pub fn svPutBitArrElem2( + d: svOpenArrayHandle, + value: svBit, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutBitArrElem3( + d: svOpenArrayHandle, + value: svBit, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + indx3: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetScope() -> svScope; +} +extern "C" { + pub fn svSetScope(scope: svScope) -> svScope; +} +extern "C" { + pub fn svGetNameFromScope(arg1: svScope) -> *const ::std::os::raw::c_char; +} +extern "C" { + pub fn svGetScopeFromName(scopeName: *const ::std::os::raw::c_char) -> svScope; +} +extern "C" { + pub fn svPutUserData( + scope: svScope, + userKey: *mut ::std::os::raw::c_void, + userData: *mut ::std::os::raw::c_void, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn svGetUserData( + scope: svScope, + userKey: *mut ::std::os::raw::c_void, + ) -> *mut ::std::os::raw::c_void; +} +extern "C" { + pub fn svGetCallerInfo( + fileName: *mut *const ::std::os::raw::c_char, + lineNumber: *mut ::std::os::raw::c_int, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn svIsDisabledState() -> ::std::os::raw::c_int; +} +extern "C" { + pub fn svAckDisabledState(); +} +#[cfg(feature = "sv2023")] +extern "C" { + pub fn svGetTime(scope: svScope, time: *mut svTimeVal) -> ::std::os::raw::c_int; +} +#[cfg(feature = "sv2023")] +extern "C" { + pub fn svGetTimeUnit(scope: svScope, time_unit: *mut i32) -> ::std::os::raw::c_int; +} +#[cfg(feature = "sv2023")] +extern "C" { + pub fn svGetTimePrecision(scope: svScope, time_precision: *mut i32) -> ::std::os::raw::c_int; +} +pub type svBitVec32 = ::std::os::raw::c_uint; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct svLogicVec32 { + pub c: ::std::os::raw::c_uint, + pub d: ::std::os::raw::c_uint, +} +#[test] +fn bindgen_test_layout_svLogicVec32() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 8usize, + concat!("Size of: ", stringify!(svLogicVec32)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(svLogicVec32)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).c) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(svLogicVec32), + "::", + stringify!(c) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).d) as usize - ptr as usize }, + 4usize, + concat!( + "Offset of field: ", + stringify!(svLogicVec32), + "::", + stringify!(d) + ) + ); +} +pub type svBitPackedArrRef = *mut ::std::os::raw::c_void; +pub type svLogicPackedArrRef = *mut ::std::os::raw::c_void; +extern "C" { + pub fn svSizeOfBitPackedArr(width: ::std::os::raw::c_int) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn svSizeOfLogicPackedArr(width: ::std::os::raw::c_int) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn svPutBitVec32(d: svBitPackedArrRef, s: *const svBitVec32, w: ::std::os::raw::c_int); +} +extern "C" { + pub fn svPutLogicVec32( + d: svLogicPackedArrRef, + s: *const svLogicVec32, + w: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetBitVec32(d: *mut svBitVec32, s: svBitPackedArrRef, w: ::std::os::raw::c_int); +} +extern "C" { + pub fn svGetLogicVec32(d: *mut svLogicVec32, s: svLogicPackedArrRef, w: ::std::os::raw::c_int); +} +extern "C" { + pub fn svGetSelectBit(s: svBitPackedArrRef, i: ::std::os::raw::c_int) -> svBit; +} +extern "C" { + pub fn svGetSelectLogic(s: svLogicPackedArrRef, i: ::std::os::raw::c_int) -> svLogic; +} +extern "C" { + pub fn svPutSelectBit(d: svBitPackedArrRef, i: ::std::os::raw::c_int, s: svBit); +} +extern "C" { + pub fn svPutSelectLogic(d: svLogicPackedArrRef, i: ::std::os::raw::c_int, s: svLogic); +} +extern "C" { + pub fn svGetPartSelectBit( + d: *mut svBitVec32, + s: svBitPackedArrRef, + i: ::std::os::raw::c_int, + w: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetBits( + s: svBitPackedArrRef, + i: ::std::os::raw::c_int, + w: ::std::os::raw::c_int, + ) -> svBitVec32; +} +extern "C" { + pub fn svGet32Bits(s: svBitPackedArrRef, i: ::std::os::raw::c_int) -> svBitVec32; +} +extern "C" { + pub fn svGet64Bits(s: svBitPackedArrRef, i: ::std::os::raw::c_int) -> u64; +} +extern "C" { + pub fn svGetPartSelectLogic( + d: *mut svLogicVec32, + s: svLogicPackedArrRef, + i: ::std::os::raw::c_int, + w: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutPartSelectBit( + d: svBitPackedArrRef, + s: svBitVec32, + i: ::std::os::raw::c_int, + w: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutPartSelectLogic( + d: svLogicPackedArrRef, + s: *const svLogicVec32, + i: ::std::os::raw::c_int, + w: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutBitArrElemVec32( + d: svOpenArrayHandle, + s: *const svBitVec32, + indx1: ::std::os::raw::c_int, + ... + ); +} +extern "C" { + pub fn svPutBitArrElem1Vec32( + d: svOpenArrayHandle, + s: *const svBitVec32, + indx1: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutBitArrElem2Vec32( + d: svOpenArrayHandle, + s: *const svBitVec32, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutBitArrElem3Vec32( + d: svOpenArrayHandle, + s: *const svBitVec32, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + indx3: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutLogicArrElemVec32( + d: svOpenArrayHandle, + s: *const svLogicVec32, + indx1: ::std::os::raw::c_int, + ... + ); +} +extern "C" { + pub fn svPutLogicArrElem1Vec32( + d: svOpenArrayHandle, + s: *const svLogicVec32, + indx1: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutLogicArrElem2Vec32( + d: svOpenArrayHandle, + s: *const svLogicVec32, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutLogicArrElem3Vec32( + d: svOpenArrayHandle, + s: *const svLogicVec32, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + indx3: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetBitArrElemVec32( + d: *mut svBitVec32, + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + ... + ); +} +extern "C" { + pub fn svGetBitArrElem1Vec32( + d: *mut svBitVec32, + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetBitArrElem2Vec32( + d: *mut svBitVec32, + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetBitArrElem3Vec32( + d: *mut svBitVec32, + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + indx3: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetLogicArrElemVec32( + d: *mut svLogicVec32, + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + ... + ); +} +extern "C" { + pub fn svGetLogicArrElem1Vec32( + d: *mut svLogicVec32, + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetLogicArrElem2Vec32( + d: *mut svLogicVec32, + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetLogicArrElem3Vec32( + d: *mut svLogicVec32, + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + indx3: ::std::os::raw::c_int, + ); +} diff --git a/t1rocketemu/online_dpi/src/svvpi.rs b/t1rocketemu/online_dpi/src/svvpi.rs new file mode 100644 index 000000000..401f7f65a --- /dev/null +++ b/t1rocketemu/online_dpi/src/svvpi.rs @@ -0,0 +1,18 @@ +#[rustfmt::skip] +pub mod sys; + +use std::ptr; + +/// get current simulation time in _simulation time unit_ +pub fn get_time() -> u64 { + let mut time = sys::s_vpi_time { + type_: sys::vpiSimTime as i32, + high: 0, + low: 0, + real: 0.0, + }; + unsafe { + sys::vpi_get_time(ptr::null_mut(), &mut time); + } + ((time.high as u64) << 32) + (time.low as u64) +} diff --git a/t1rocketemu/online_dpi/src/svvpi/sys.rs b/t1rocketemu/online_dpi/src/svvpi/sys.rs new file mode 100644 index 000000000..c3d269855 --- /dev/null +++ b/t1rocketemu/online_dpi/src/svvpi/sys.rs @@ -0,0 +1,2102 @@ +// modified from `bindgen --allowlist-item 'vpi.*' sv_vpi_user.h` +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] + +/* automatically generated by rust-bindgen 0.69.4 */ + +pub const vpiAlways: u32 = 1; +pub const vpiAssignStmt: u32 = 2; +pub const vpiAssignment: u32 = 3; +pub const vpiBegin: u32 = 4; +pub const vpiCase: u32 = 5; +pub const vpiCaseItem: u32 = 6; +pub const vpiConstant: u32 = 7; +pub const vpiContAssign: u32 = 8; +pub const vpiDeassign: u32 = 9; +pub const vpiDefParam: u32 = 10; +pub const vpiDelayControl: u32 = 11; +pub const vpiDisable: u32 = 12; +pub const vpiEventControl: u32 = 13; +pub const vpiEventStmt: u32 = 14; +pub const vpiFor: u32 = 15; +pub const vpiForce: u32 = 16; +pub const vpiForever: u32 = 17; +pub const vpiFork: u32 = 18; +pub const vpiFuncCall: u32 = 19; +pub const vpiFunction: u32 = 20; +pub const vpiGate: u32 = 21; +pub const vpiIf: u32 = 22; +pub const vpiIfElse: u32 = 23; +pub const vpiInitial: u32 = 24; +pub const vpiIntegerVar: u32 = 25; +pub const vpiInterModPath: u32 = 26; +pub const vpiIterator: u32 = 27; +pub const vpiIODecl: u32 = 28; +pub const vpiMemory: u32 = 29; +pub const vpiMemoryWord: u32 = 30; +pub const vpiModPath: u32 = 31; +pub const vpiModule: u32 = 32; +pub const vpiNamedBegin: u32 = 33; +pub const vpiNamedEvent: u32 = 34; +pub const vpiNamedFork: u32 = 35; +pub const vpiNet: u32 = 36; +pub const vpiNetBit: u32 = 37; +pub const vpiNullStmt: u32 = 38; +pub const vpiOperation: u32 = 39; +pub const vpiParamAssign: u32 = 40; +pub const vpiParameter: u32 = 41; +pub const vpiPartSelect: u32 = 42; +pub const vpiPathTerm: u32 = 43; +pub const vpiPort: u32 = 44; +pub const vpiPortBit: u32 = 45; +pub const vpiPrimTerm: u32 = 46; +pub const vpiRealVar: u32 = 47; +pub const vpiReg: u32 = 48; +pub const vpiRegBit: u32 = 49; +pub const vpiRelease: u32 = 50; +pub const vpiRepeat: u32 = 51; +pub const vpiRepeatControl: u32 = 52; +pub const vpiSchedEvent: u32 = 53; +pub const vpiSpecParam: u32 = 54; +pub const vpiSwitch: u32 = 55; +pub const vpiSysFuncCall: u32 = 56; +pub const vpiSysTaskCall: u32 = 57; +pub const vpiTableEntry: u32 = 58; +pub const vpiTask: u32 = 59; +pub const vpiTaskCall: u32 = 60; +pub const vpiTchk: u32 = 61; +pub const vpiTchkTerm: u32 = 62; +pub const vpiTimeVar: u32 = 63; +pub const vpiTimeQueue: u32 = 64; +pub const vpiUdp: u32 = 65; +pub const vpiUdpDefn: u32 = 66; +pub const vpiUserSystf: u32 = 67; +pub const vpiVarSelect: u32 = 68; +pub const vpiWait: u32 = 69; +pub const vpiWhile: u32 = 70; +pub const vpiAttribute: u32 = 105; +pub const vpiBitSelect: u32 = 106; +pub const vpiCallback: u32 = 107; +pub const vpiDelayTerm: u32 = 108; +pub const vpiDelayDevice: u32 = 109; +pub const vpiFrame: u32 = 110; +pub const vpiGateArray: u32 = 111; +pub const vpiModuleArray: u32 = 112; +pub const vpiPrimitiveArray: u32 = 113; +pub const vpiNetArray: u32 = 114; +pub const vpiRange: u32 = 115; +pub const vpiRegArray: u32 = 116; +pub const vpiSwitchArray: u32 = 117; +pub const vpiUdpArray: u32 = 118; +pub const vpiContAssignBit: u32 = 128; +pub const vpiNamedEventArray: u32 = 129; +pub const vpiIndexedPartSelect: u32 = 130; +pub const vpiGenScopeArray: u32 = 133; +pub const vpiGenScope: u32 = 134; +pub const vpiGenVar: u32 = 135; +pub const vpiCondition: u32 = 71; +pub const vpiDelay: u32 = 72; +pub const vpiElseStmt: u32 = 73; +pub const vpiForIncStmt: u32 = 74; +pub const vpiForInitStmt: u32 = 75; +pub const vpiHighConn: u32 = 76; +pub const vpiLhs: u32 = 77; +pub const vpiIndex: u32 = 78; +pub const vpiLeftRange: u32 = 79; +pub const vpiLowConn: u32 = 80; +pub const vpiParent: u32 = 81; +pub const vpiRhs: u32 = 82; +pub const vpiRightRange: u32 = 83; +pub const vpiScope: u32 = 84; +pub const vpiSysTfCall: u32 = 85; +pub const vpiTchkDataTerm: u32 = 86; +pub const vpiTchkNotifier: u32 = 87; +pub const vpiTchkRefTerm: u32 = 88; +pub const vpiArgument: u32 = 89; +pub const vpiBit: u32 = 90; +pub const vpiDriver: u32 = 91; +pub const vpiInternalScope: u32 = 92; +pub const vpiLoad: u32 = 93; +pub const vpiModDataPathIn: u32 = 94; +pub const vpiModPathIn: u32 = 95; +pub const vpiModPathOut: u32 = 96; +pub const vpiOperand: u32 = 97; +pub const vpiPortInst: u32 = 98; +pub const vpiProcess: u32 = 99; +pub const vpiVariables: u32 = 100; +pub const vpiUse: u32 = 101; +pub const vpiExpr: u32 = 102; +pub const vpiPrimitive: u32 = 103; +pub const vpiStmt: u32 = 104; +pub const vpiActiveTimeFormat: u32 = 119; +pub const vpiInTerm: u32 = 120; +pub const vpiInstanceArray: u32 = 121; +pub const vpiLocalDriver: u32 = 122; +pub const vpiLocalLoad: u32 = 123; +pub const vpiOutTerm: u32 = 124; +pub const vpiPorts: u32 = 125; +pub const vpiSimNet: u32 = 126; +pub const vpiTaskFunc: u32 = 127; +pub const vpiBaseExpr: u32 = 131; +pub const vpiWidthExpr: u32 = 132; +pub const vpiAutomatics: u32 = 136; +pub const vpiUndefined: i32 = -1; +pub const vpiType: u32 = 1; +pub const vpiName: u32 = 2; +pub const vpiFullName: u32 = 3; +pub const vpiSize: u32 = 4; +pub const vpiFile: u32 = 5; +pub const vpiLineNo: u32 = 6; +pub const vpiTopModule: u32 = 7; +pub const vpiCellInstance: u32 = 8; +pub const vpiDefName: u32 = 9; +pub const vpiProtected: u32 = 10; +pub const vpiTimeUnit: u32 = 11; +pub const vpiTimePrecision: u32 = 12; +pub const vpiDefNetType: u32 = 13; +pub const vpiUnconnDrive: u32 = 14; +pub const vpiHighZ: u32 = 1; +pub const vpiPull1: u32 = 2; +pub const vpiPull0: u32 = 3; +pub const vpiDefFile: u32 = 15; +pub const vpiDefLineNo: u32 = 16; +pub const vpiDefDelayMode: u32 = 47; +pub const vpiDelayModeNone: u32 = 1; +pub const vpiDelayModePath: u32 = 2; +pub const vpiDelayModeDistrib: u32 = 3; +pub const vpiDelayModeUnit: u32 = 4; +pub const vpiDelayModeZero: u32 = 5; +pub const vpiDelayModeMTM: u32 = 6; +pub const vpiDefDecayTime: u32 = 48; +pub const vpiScalar: u32 = 17; +pub const vpiVector: u32 = 18; +pub const vpiExplicitName: u32 = 19; +pub const vpiDirection: u32 = 20; +pub const vpiInput: u32 = 1; +pub const vpiOutput: u32 = 2; +pub const vpiInout: u32 = 3; +pub const vpiMixedIO: u32 = 4; +pub const vpiNoDirection: u32 = 5; +pub const vpiConnByName: u32 = 21; +pub const vpiNetType: u32 = 22; +pub const vpiWire: u32 = 1; +pub const vpiWand: u32 = 2; +pub const vpiWor: u32 = 3; +pub const vpiTri: u32 = 4; +pub const vpiTri0: u32 = 5; +pub const vpiTri1: u32 = 6; +pub const vpiTriReg: u32 = 7; +pub const vpiTriAnd: u32 = 8; +pub const vpiTriOr: u32 = 9; +pub const vpiSupply1: u32 = 10; +pub const vpiSupply0: u32 = 11; +pub const vpiNone: u32 = 12; +pub const vpiUwire: u32 = 13; +pub const vpiNettypeNet: u32 = 14; +pub const vpiNettypeNetSelect: u32 = 15; +pub const vpiInterconnect: u32 = 16; +pub const vpiExplicitScalared: u32 = 23; +pub const vpiExplicitVectored: u32 = 24; +pub const vpiExpanded: u32 = 25; +pub const vpiImplicitDecl: u32 = 26; +pub const vpiChargeStrength: u32 = 27; +pub const vpiArray: u32 = 28; +pub const vpiPortIndex: u32 = 29; +pub const vpiTermIndex: u32 = 30; +pub const vpiStrength0: u32 = 31; +pub const vpiStrength1: u32 = 32; +pub const vpiPrimType: u32 = 33; +pub const vpiAndPrim: u32 = 1; +pub const vpiNandPrim: u32 = 2; +pub const vpiNorPrim: u32 = 3; +pub const vpiOrPrim: u32 = 4; +pub const vpiXorPrim: u32 = 5; +pub const vpiXnorPrim: u32 = 6; +pub const vpiBufPrim: u32 = 7; +pub const vpiNotPrim: u32 = 8; +pub const vpiBufif0Prim: u32 = 9; +pub const vpiBufif1Prim: u32 = 10; +pub const vpiNotif0Prim: u32 = 11; +pub const vpiNotif1Prim: u32 = 12; +pub const vpiNmosPrim: u32 = 13; +pub const vpiPmosPrim: u32 = 14; +pub const vpiCmosPrim: u32 = 15; +pub const vpiRnmosPrim: u32 = 16; +pub const vpiRpmosPrim: u32 = 17; +pub const vpiRcmosPrim: u32 = 18; +pub const vpiRtranPrim: u32 = 19; +pub const vpiRtranif0Prim: u32 = 20; +pub const vpiRtranif1Prim: u32 = 21; +pub const vpiTranPrim: u32 = 22; +pub const vpiTranif0Prim: u32 = 23; +pub const vpiTranif1Prim: u32 = 24; +pub const vpiPullupPrim: u32 = 25; +pub const vpiPulldownPrim: u32 = 26; +pub const vpiSeqPrim: u32 = 27; +pub const vpiCombPrim: u32 = 28; +pub const vpiPolarity: u32 = 34; +pub const vpiDataPolarity: u32 = 35; +pub const vpiPositive: u32 = 1; +pub const vpiNegative: u32 = 2; +pub const vpiUnknown: u32 = 3; +pub const vpiEdge: u32 = 36; +pub const vpiNoEdge: u32 = 0; +pub const vpiEdge01: u32 = 1; +pub const vpiEdge10: u32 = 2; +pub const vpiEdge0x: u32 = 4; +pub const vpiEdgex1: u32 = 8; +pub const vpiEdge1x: u32 = 16; +pub const vpiEdgex0: u32 = 32; +pub const vpiPosedge: u32 = 13; +pub const vpiNegedge: u32 = 50; +pub const vpiAnyEdge: u32 = 63; +pub const vpiPathType: u32 = 37; +pub const vpiPathFull: u32 = 1; +pub const vpiPathParallel: u32 = 2; +pub const vpiTchkType: u32 = 38; +pub const vpiSetup: u32 = 1; +pub const vpiHold: u32 = 2; +pub const vpiPeriod: u32 = 3; +pub const vpiWidth: u32 = 4; +pub const vpiSkew: u32 = 5; +pub const vpiRecovery: u32 = 6; +pub const vpiNoChange: u32 = 7; +pub const vpiSetupHold: u32 = 8; +pub const vpiFullskew: u32 = 9; +pub const vpiRecrem: u32 = 10; +pub const vpiRemoval: u32 = 11; +pub const vpiTimeskew: u32 = 12; +pub const vpiOpType: u32 = 39; +pub const vpiMinusOp: u32 = 1; +pub const vpiPlusOp: u32 = 2; +pub const vpiNotOp: u32 = 3; +pub const vpiBitNegOp: u32 = 4; +pub const vpiUnaryAndOp: u32 = 5; +pub const vpiUnaryNandOp: u32 = 6; +pub const vpiUnaryOrOp: u32 = 7; +pub const vpiUnaryNorOp: u32 = 8; +pub const vpiUnaryXorOp: u32 = 9; +pub const vpiUnaryXNorOp: u32 = 10; +pub const vpiSubOp: u32 = 11; +pub const vpiDivOp: u32 = 12; +pub const vpiModOp: u32 = 13; +pub const vpiEqOp: u32 = 14; +pub const vpiNeqOp: u32 = 15; +pub const vpiCaseEqOp: u32 = 16; +pub const vpiCaseNeqOp: u32 = 17; +pub const vpiGtOp: u32 = 18; +pub const vpiGeOp: u32 = 19; +pub const vpiLtOp: u32 = 20; +pub const vpiLeOp: u32 = 21; +pub const vpiLShiftOp: u32 = 22; +pub const vpiRShiftOp: u32 = 23; +pub const vpiAddOp: u32 = 24; +pub const vpiMultOp: u32 = 25; +pub const vpiLogAndOp: u32 = 26; +pub const vpiLogOrOp: u32 = 27; +pub const vpiBitAndOp: u32 = 28; +pub const vpiBitOrOp: u32 = 29; +pub const vpiBitXorOp: u32 = 30; +pub const vpiBitXNorOp: u32 = 31; +pub const vpiBitXnorOp: u32 = 31; +pub const vpiConditionOp: u32 = 32; +pub const vpiConcatOp: u32 = 33; +pub const vpiMultiConcatOp: u32 = 34; +pub const vpiEventOrOp: u32 = 35; +pub const vpiNullOp: u32 = 36; +pub const vpiListOp: u32 = 37; +pub const vpiMinTypMaxOp: u32 = 38; +pub const vpiPosedgeOp: u32 = 39; +pub const vpiNegedgeOp: u32 = 40; +pub const vpiArithLShiftOp: u32 = 41; +pub const vpiArithRShiftOp: u32 = 42; +pub const vpiPowerOp: u32 = 43; +pub const vpiConstType: u32 = 40; +pub const vpiDecConst: u32 = 1; +pub const vpiRealConst: u32 = 2; +pub const vpiBinaryConst: u32 = 3; +pub const vpiOctConst: u32 = 4; +pub const vpiHexConst: u32 = 5; +pub const vpiStringConst: u32 = 6; +pub const vpiIntConst: u32 = 7; +pub const vpiTimeConst: u32 = 8; +pub const vpiBlocking: u32 = 41; +pub const vpiCaseType: u32 = 42; +pub const vpiCaseExact: u32 = 1; +pub const vpiCaseX: u32 = 2; +pub const vpiCaseZ: u32 = 3; +pub const vpiNetDeclAssign: u32 = 43; +pub const vpiFuncType: u32 = 44; +pub const vpiIntFunc: u32 = 1; +pub const vpiRealFunc: u32 = 2; +pub const vpiTimeFunc: u32 = 3; +pub const vpiSizedFunc: u32 = 4; +pub const vpiSizedSignedFunc: u32 = 5; +pub const vpiSysFuncType: u32 = 44; +pub const vpiSysFuncInt: u32 = 1; +pub const vpiSysFuncReal: u32 = 2; +pub const vpiSysFuncTime: u32 = 3; +pub const vpiSysFuncSized: u32 = 4; +pub const vpiUserDefn: u32 = 45; +pub const vpiScheduled: u32 = 46; +pub const vpiActive: u32 = 49; +pub const vpiAutomatic: u32 = 50; +pub const vpiCell: u32 = 51; +pub const vpiConfig: u32 = 52; +pub const vpiConstantSelect: u32 = 53; +pub const vpiDecompile: u32 = 54; +pub const vpiDefAttribute: u32 = 55; +pub const vpiDelayType: u32 = 56; +pub const vpiModPathDelay: u32 = 1; +pub const vpiInterModPathDelay: u32 = 2; +pub const vpiMIPDelay: u32 = 3; +pub const vpiIteratorType: u32 = 57; +pub const vpiLibrary: u32 = 58; +pub const vpiOffset: u32 = 60; +pub const vpiResolvedNetType: u32 = 61; +pub const vpiSaveRestartID: u32 = 62; +pub const vpiSaveRestartLocation: u32 = 63; +pub const vpiValid: u32 = 64; +pub const vpiValidFalse: u32 = 0; +pub const vpiValidTrue: u32 = 1; +pub const vpiSigned: u32 = 65; +pub const vpiLocalParam: u32 = 70; +pub const vpiModPathHasIfNone: u32 = 71; +pub const vpiIndexedPartSelectType: u32 = 72; +pub const vpiPosIndexed: u32 = 1; +pub const vpiNegIndexed: u32 = 2; +pub const vpiIsMemory: u32 = 73; +pub const vpiIsProtected: u32 = 74; +pub const vpiStop: u32 = 66; +pub const vpiFinish: u32 = 67; +pub const vpiReset: u32 = 68; +pub const vpiSetInteractiveScope: u32 = 69; +pub const vpiScaledRealTime: u32 = 1; +pub const vpiSimTime: u32 = 2; +pub const vpiSuppressTime: u32 = 3; +pub const vpiSupplyDrive: u32 = 128; +pub const vpiStrongDrive: u32 = 64; +pub const vpiPullDrive: u32 = 32; +pub const vpiWeakDrive: u32 = 8; +pub const vpiLargeCharge: u32 = 16; +pub const vpiMediumCharge: u32 = 4; +pub const vpiSmallCharge: u32 = 2; +pub const vpiHiZ: u32 = 1; +pub const vpiBinStrVal: u32 = 1; +pub const vpiOctStrVal: u32 = 2; +pub const vpiDecStrVal: u32 = 3; +pub const vpiHexStrVal: u32 = 4; +pub const vpiScalarVal: u32 = 5; +pub const vpiIntVal: u32 = 6; +pub const vpiRealVal: u32 = 7; +pub const vpiStringVal: u32 = 8; +pub const vpiVectorVal: u32 = 9; +pub const vpiStrengthVal: u32 = 10; +pub const vpiTimeVal: u32 = 11; +pub const vpiObjTypeVal: u32 = 12; +pub const vpiSuppressVal: u32 = 13; +pub const vpiShortIntVal: u32 = 14; +pub const vpiLongIntVal: u32 = 15; +pub const vpiShortRealVal: u32 = 16; +pub const vpiRawTwoStateVal: u32 = 17; +pub const vpiRawFourStateVal: u32 = 18; +pub const vpiNoDelay: u32 = 1; +pub const vpiInertialDelay: u32 = 2; +pub const vpiTransportDelay: u32 = 3; +pub const vpiPureTransportDelay: u32 = 4; +pub const vpiForceFlag: u32 = 5; +pub const vpiReleaseFlag: u32 = 6; +pub const vpiCancelEvent: u32 = 7; +pub const vpiReturnEvent: u32 = 4096; +pub const vpiUserAllocFlag: u32 = 8192; +pub const vpiOneValue: u32 = 16384; +pub const vpiPropagateOff: u32 = 32768; +pub const vpi0: u32 = 0; +pub const vpi1: u32 = 1; +pub const vpiZ: u32 = 2; +pub const vpiX: u32 = 3; +pub const vpiH: u32 = 4; +pub const vpiL: u32 = 5; +pub const vpiDontCare: u32 = 6; +pub const vpiSysTask: u32 = 1; +pub const vpiSysFunc: u32 = 2; +pub const vpiCompile: u32 = 1; +pub const vpiPLI: u32 = 2; +pub const vpiRun: u32 = 3; +pub const vpiNotice: u32 = 1; +pub const vpiWarning: u32 = 2; +pub const vpiError: u32 = 3; +pub const vpiSystem: u32 = 4; +pub const vpiInternal: u32 = 5; +pub const vpiPackage: u32 = 600; +pub const vpiInterface: u32 = 601; +pub const vpiProgram: u32 = 602; +pub const vpiInterfaceArray: u32 = 603; +pub const vpiProgramArray: u32 = 604; +pub const vpiTypespec: u32 = 605; +pub const vpiModport: u32 = 606; +pub const vpiInterfaceTfDecl: u32 = 607; +pub const vpiRefObj: u32 = 608; +pub const vpiTypeParameter: u32 = 609; +pub const vpiVarBit: u32 = 49; +pub const vpiLongIntVar: u32 = 610; +pub const vpiShortIntVar: u32 = 611; +pub const vpiIntVar: u32 = 612; +pub const vpiShortRealVar: u32 = 613; +pub const vpiByteVar: u32 = 614; +pub const vpiClassVar: u32 = 615; +pub const vpiStringVar: u32 = 616; +pub const vpiEnumVar: u32 = 617; +pub const vpiStructVar: u32 = 618; +pub const vpiUnionVar: u32 = 619; +pub const vpiBitVar: u32 = 620; +pub const vpiLogicVar: u32 = 48; +pub const vpiArrayVar: u32 = 116; +pub const vpiClassObj: u32 = 621; +pub const vpiChandleVar: u32 = 622; +pub const vpiPackedArrayVar: u32 = 623; +pub const vpiVirtualInterfaceVar: u32 = 728; +pub const vpiLongIntTypespec: u32 = 625; +pub const vpiShortRealTypespec: u32 = 626; +pub const vpiByteTypespec: u32 = 627; +pub const vpiShortIntTypespec: u32 = 628; +pub const vpiIntTypespec: u32 = 629; +pub const vpiClassTypespec: u32 = 630; +pub const vpiStringTypespec: u32 = 631; +pub const vpiChandleTypespec: u32 = 632; +pub const vpiEnumTypespec: u32 = 633; +pub const vpiEnumConst: u32 = 634; +pub const vpiIntegerTypespec: u32 = 635; +pub const vpiTimeTypespec: u32 = 636; +pub const vpiRealTypespec: u32 = 637; +pub const vpiStructTypespec: u32 = 638; +pub const vpiUnionTypespec: u32 = 639; +pub const vpiBitTypespec: u32 = 640; +pub const vpiLogicTypespec: u32 = 641; +pub const vpiArrayTypespec: u32 = 642; +pub const vpiVoidTypespec: u32 = 643; +pub const vpiTypespecMember: u32 = 644; +pub const vpiPackedArrayTypespec: u32 = 692; +pub const vpiSequenceTypespec: u32 = 696; +pub const vpiPropertyTypespec: u32 = 697; +pub const vpiEventTypespec: u32 = 698; +pub const vpiInterfaceTypespec: u32 = 906; +pub const vpiClockingBlock: u32 = 650; +pub const vpiClockingIODecl: u32 = 651; +pub const vpiClassDefn: u32 = 652; +pub const vpiConstraint: u32 = 653; +pub const vpiConstraintOrdering: u32 = 654; +pub const vpiDistItem: u32 = 645; +pub const vpiAliasStmt: u32 = 646; +pub const vpiThread: u32 = 647; +pub const vpiMethodFuncCall: u32 = 648; +pub const vpiMethodTaskCall: u32 = 649; +pub const vpiAssert: u32 = 686; +pub const vpiAssume: u32 = 687; +pub const vpiCover: u32 = 688; +pub const vpiRestrict: u32 = 901; +pub const vpiDisableCondition: u32 = 689; +pub const vpiClockingEvent: u32 = 690; +pub const vpiPropertyDecl: u32 = 655; +pub const vpiPropertySpec: u32 = 656; +pub const vpiPropertyExpr: u32 = 657; +pub const vpiMulticlockSequenceExpr: u32 = 658; +pub const vpiClockedSeq: u32 = 659; +pub const vpiClockedProp: u32 = 902; +pub const vpiPropertyInst: u32 = 660; +pub const vpiSequenceDecl: u32 = 661; +pub const vpiCaseProperty: u32 = 662; +pub const vpiCasePropertyItem: u32 = 905; +pub const vpiSequenceInst: u32 = 664; +pub const vpiImmediateAssert: u32 = 665; +pub const vpiImmediateAssume: u32 = 694; +pub const vpiImmediateCover: u32 = 695; +pub const vpiReturn: u32 = 666; +pub const vpiAnyPattern: u32 = 667; +pub const vpiTaggedPattern: u32 = 668; +pub const vpiStructPattern: u32 = 669; +pub const vpiDoWhile: u32 = 670; +pub const vpiOrderedWait: u32 = 671; +pub const vpiWaitFork: u32 = 672; +pub const vpiDisableFork: u32 = 673; +pub const vpiExpectStmt: u32 = 674; +pub const vpiForeachStmt: u32 = 675; +pub const vpiReturnStmt: u32 = 691; +pub const vpiFinal: u32 = 676; +pub const vpiExtends: u32 = 677; +pub const vpiDistribution: u32 = 678; +pub const vpiSeqFormalDecl: u32 = 679; +pub const vpiPropFormalDecl: u32 = 699; +pub const vpiArrayNet: u32 = 114; +pub const vpiEnumNet: u32 = 680; +pub const vpiIntegerNet: u32 = 681; +pub const vpiLogicNet: u32 = 36; +pub const vpiTimeNet: u32 = 682; +pub const vpiUnionNet: u32 = 525; +pub const vpiShortRealNet: u32 = 526; +pub const vpiRealNet: u32 = 527; +pub const vpiByteNet: u32 = 528; +pub const vpiShortIntNet: u32 = 529; +pub const vpiIntNet: u32 = 530; +pub const vpiLongIntNet: u32 = 531; +pub const vpiBitNet: u32 = 532; +pub const vpiInterconnectNet: u32 = 533; +pub const vpiInterconnectArray: u32 = 534; +pub const vpiStructNet: u32 = 683; +pub const vpiBreak: u32 = 684; +pub const vpiContinue: u32 = 685; +pub const vpiPackedArrayNet: u32 = 693; +pub const vpiNettypeDecl: u32 = 523; +pub const vpiConstraintExpr: u32 = 747; +pub const vpiElseConst: u32 = 748; +pub const vpiImplication: u32 = 749; +pub const vpiConstrIf: u32 = 738; +pub const vpiConstrIfElse: u32 = 739; +pub const vpiConstrForEach: u32 = 736; +pub const vpiSoftDisable: u32 = 733; +pub const vpiLetDecl: u32 = 903; +pub const vpiLetExpr: u32 = 904; +pub const vpiActual: u32 = 700; +pub const vpiTypedefAlias: u32 = 701; +pub const vpiIndexTypespec: u32 = 702; +pub const vpiBaseTypespec: u32 = 703; +pub const vpiElemTypespec: u32 = 704; +pub const vpiNetTypedefAlias: u32 = 705; +pub const vpiInputSkew: u32 = 706; +pub const vpiOutputSkew: u32 = 707; +pub const vpiGlobalClocking: u32 = 708; +pub const vpiDefaultClocking: u32 = 709; +pub const vpiDefaultDisableIff: u32 = 710; +pub const vpiOrigin: u32 = 713; +pub const vpiPrefix: u32 = 714; +pub const vpiWith: u32 = 715; +pub const vpiProperty: u32 = 718; +pub const vpiValueRange: u32 = 720; +pub const vpiPattern: u32 = 721; +pub const vpiWeight: u32 = 722; +pub const vpiConstraintItem: u32 = 746; +pub const vpiTypedef: u32 = 725; +pub const vpiImport: u32 = 726; +pub const vpiDerivedClasses: u32 = 727; +pub const vpiInterfaceDecl: u32 = 728; +pub const vpiMethods: u32 = 730; +pub const vpiSolveBefore: u32 = 731; +pub const vpiSolveAfter: u32 = 732; +pub const vpiWaitingProcesses: u32 = 734; +pub const vpiMessages: u32 = 735; +pub const vpiLoopVars: u32 = 737; +pub const vpiConcurrentAssertion: u32 = 740; +pub const vpiConcurrentAssertions: u32 = 740; +pub const vpiMatchItem: u32 = 741; +pub const vpiMember: u32 = 742; +pub const vpiElement: u32 = 743; +pub const vpiAssertion: u32 = 744; +pub const vpiInstance: u32 = 745; +pub const vpiTop: u32 = 600; +pub const vpiUnit: u32 = 602; +pub const vpiJoinType: u32 = 603; +pub const vpiJoin: u32 = 0; +pub const vpiJoinNone: u32 = 1; +pub const vpiJoinAny: u32 = 2; +pub const vpiAccessType: u32 = 604; +pub const vpiForkJoinAcc: u32 = 1; +pub const vpiExternAcc: u32 = 2; +pub const vpiDPIExportAcc: u32 = 3; +pub const vpiDPIImportAcc: u32 = 4; +pub const vpiArrayType: u32 = 606; +pub const vpiStaticArray: u32 = 1; +pub const vpiDynamicArray: u32 = 2; +pub const vpiAssocArray: u32 = 3; +pub const vpiQueueArray: u32 = 4; +pub const vpiArrayMember: u32 = 607; +pub const vpiIsRandomized: u32 = 608; +pub const vpiLocalVarDecls: u32 = 609; +pub const vpiOpStrong: u32 = 656; +pub const vpiRandType: u32 = 610; +pub const vpiNotRand: u32 = 1; +pub const vpiRand: u32 = 2; +pub const vpiRandC: u32 = 3; +pub const vpiPortType: u32 = 611; +pub const vpiInterfacePort: u32 = 1; +pub const vpiModportPort: u32 = 2; +pub const vpiConstantVariable: u32 = 612; +pub const vpiStructUnionMember: u32 = 615; +pub const vpiVisibility: u32 = 620; +pub const vpiPublicVis: u32 = 1; +pub const vpiProtectedVis: u32 = 2; +pub const vpiLocalVis: u32 = 3; +pub const vpiOneStepConst: u32 = 9; +pub const vpiUnboundedConst: u32 = 10; +pub const vpiNullConst: u32 = 11; +pub const vpiAlwaysType: u32 = 624; +pub const vpiAlwaysComb: u32 = 2; +pub const vpiAlwaysFF: u32 = 3; +pub const vpiAlwaysLatch: u32 = 4; +pub const vpiDistType: u32 = 625; +pub const vpiEqualDist: u32 = 1; +pub const vpiDivDist: u32 = 2; +pub const vpiPacked: u32 = 630; +pub const vpiTagged: u32 = 632; +pub const vpiRef: u32 = 6; +pub const vpiVirtual: u32 = 635; +pub const vpiHasActual: u32 = 636; +pub const vpiIsConstraintEnabled: u32 = 638; +pub const vpiSoft: u32 = 639; +pub const vpiClassType: u32 = 640; +pub const vpiMailboxClass: u32 = 1; +pub const vpiSemaphoreClass: u32 = 2; +pub const vpiUserDefinedClass: u32 = 3; +pub const vpiProcessClass: u32 = 4; +pub const vpiMethod: u32 = 645; +pub const vpiIsClockInferred: u32 = 649; +pub const vpiIsDeferred: u32 = 657; +pub const vpiIsFinal: u32 = 670; +pub const vpiIsCoverSequence: u32 = 659; +pub const vpiQualifier: u32 = 650; +pub const vpiNoQualifier: u32 = 0; +pub const vpiUniqueQualifier: u32 = 1; +pub const vpiPriorityQualifier: u32 = 2; +pub const vpiTaggedQualifier: u32 = 4; +pub const vpiRandQualifier: u32 = 8; +pub const vpiInsideQualifier: u32 = 16; +pub const vpiInputEdge: u32 = 651; +pub const vpiOutputEdge: u32 = 652; +pub const vpiGeneric: u32 = 653; +pub const vpiCompatibilityMode: u32 = 654; +pub const vpiMode1364v1995: u32 = 1; +pub const vpiMode1364v2001: u32 = 2; +pub const vpiMode1364v2005: u32 = 3; +pub const vpiMode1800v2005: u32 = 4; +pub const vpiMode1800v2009: u32 = 5; +pub const vpiPackedArrayMember: u32 = 655; +pub const vpiStartLine: u32 = 661; +pub const vpiColumn: u32 = 662; +pub const vpiEndLine: u32 = 663; +pub const vpiEndColumn: u32 = 664; +pub const vpiAllocScheme: u32 = 658; +pub const vpiAutomaticScheme: u32 = 1; +pub const vpiDynamicScheme: u32 = 2; +pub const vpiOtherScheme: u32 = 3; +pub const vpiObjId: u32 = 660; +pub const vpiDPIPure: u32 = 665; +pub const vpiDPIContext: u32 = 666; +pub const vpiDPICStr: u32 = 667; +pub const vpiDPI: u32 = 1; +pub const vpiDPIC: u32 = 2; +pub const vpiDPICIdentifier: u32 = 668; +pub const vpiIsModPort: u32 = 669; +pub const vpiImplyOp: u32 = 50; +pub const vpiNonOverlapImplyOp: u32 = 51; +pub const vpiOverlapImplyOp: u32 = 52; +pub const vpiAcceptOnOp: u32 = 83; +pub const vpiRejectOnOp: u32 = 84; +pub const vpiSyncAcceptOnOp: u32 = 85; +pub const vpiSyncRejectOnOp: u32 = 86; +pub const vpiOverlapFollowedByOp: u32 = 87; +pub const vpiNonOverlapFollowedByOp: u32 = 88; +pub const vpiNexttimeOp: u32 = 89; +pub const vpiAlwaysOp: u32 = 90; +pub const vpiEventuallyOp: u32 = 91; +pub const vpiUntilOp: u32 = 92; +pub const vpiUntilWithOp: u32 = 93; +pub const vpiUnaryCycleDelayOp: u32 = 53; +pub const vpiCycleDelayOp: u32 = 54; +pub const vpiIntersectOp: u32 = 55; +pub const vpiFirstMatchOp: u32 = 56; +pub const vpiThroughoutOp: u32 = 57; +pub const vpiWithinOp: u32 = 58; +pub const vpiRepeatOp: u32 = 59; +pub const vpiConsecutiveRepeatOp: u32 = 60; +pub const vpiGotoRepeatOp: u32 = 61; +pub const vpiPostIncOp: u32 = 62; +pub const vpiPreIncOp: u32 = 63; +pub const vpiPostDecOp: u32 = 64; +pub const vpiPreDecOp: u32 = 65; +pub const vpiMatchOp: u32 = 66; +pub const vpiCastOp: u32 = 67; +pub const vpiIffOp: u32 = 68; +pub const vpiWildEqOp: u32 = 69; +pub const vpiWildNeqOp: u32 = 70; +pub const vpiStreamLROp: u32 = 71; +pub const vpiStreamRLOp: u32 = 72; +pub const vpiMatchedOp: u32 = 73; +pub const vpiTriggeredOp: u32 = 74; +pub const vpiAssignmentPatternOp: u32 = 75; +pub const vpiMultiAssignmentPatternOp: u32 = 76; +pub const vpiIfOp: u32 = 77; +pub const vpiIfElseOp: u32 = 78; +pub const vpiCompAndOp: u32 = 79; +pub const vpiCompOrOp: u32 = 80; +pub const vpiImpliesOp: u32 = 94; +pub const vpiInsideOp: u32 = 95; +pub const vpiTypeOp: u32 = 81; +pub const vpiAssignmentOp: u32 = 82; +pub const vpiOtherFunc: u32 = 6; +pub const vpiValidUnknown: u32 = 2; +pub const vpiCoverageStart: u32 = 750; +pub const vpiCoverageStop: u32 = 751; +pub const vpiCoverageReset: u32 = 752; +pub const vpiCoverageCheck: u32 = 753; +pub const vpiCoverageMerge: u32 = 754; +pub const vpiCoverageSave: u32 = 755; +pub const vpiAssertCoverage: u32 = 760; +pub const vpiFsmStateCoverage: u32 = 761; +pub const vpiStatementCoverage: u32 = 762; +pub const vpiToggleCoverage: u32 = 763; +pub const vpiCovered: u32 = 765; +pub const vpiCoverMax: u32 = 766; +pub const vpiCoveredMax: u32 = 766; +pub const vpiCoveredCount: u32 = 767; +pub const vpiAssertAttemptCovered: u32 = 770; +pub const vpiAssertSuccessCovered: u32 = 771; +pub const vpiAssertFailureCovered: u32 = 772; +pub const vpiAssertVacuousSuccessCovered: u32 = 773; +pub const vpiAssertDisableCovered: u32 = 774; +pub const vpiAssertKillCovered: u32 = 777; +pub const vpiFsmStates: u32 = 775; +pub const vpiFsmStateExpression: u32 = 776; +pub const vpiFsm: u32 = 758; +pub const vpiFsmHandle: u32 = 759; +pub const vpiAssertionLock: u32 = 645; +pub const vpiAssertionUnlock: u32 = 646; +pub const vpiAssertionDisable: u32 = 620; +pub const vpiAssertionEnable: u32 = 621; +pub const vpiAssertionReset: u32 = 622; +pub const vpiAssertionKill: u32 = 623; +pub const vpiAssertionEnableStep: u32 = 624; +pub const vpiAssertionDisableStep: u32 = 625; +pub const vpiAssertionClockSteps: u32 = 626; +pub const vpiAssertionSysLock: u32 = 647; +pub const vpiAssertionSysUnlock: u32 = 648; +pub const vpiAssertionSysOn: u32 = 627; +pub const vpiAssertionSysOff: u32 = 628; +pub const vpiAssertionSysKill: u32 = 632; +pub const vpiAssertionSysEnd: u32 = 629; +pub const vpiAssertionSysReset: u32 = 630; +pub const vpiAssertionDisablePassAction: u32 = 633; +pub const vpiAssertionEnablePassAction: u32 = 634; +pub const vpiAssertionDisableFailAction: u32 = 635; +pub const vpiAssertionEnableFailAction: u32 = 636; +pub const vpiAssertionDisableVacuousAction: u32 = 637; +pub const vpiAssertionEnableNonvacuousAction: u32 = 638; +pub const vpiAssertionSysEnablePassAction: u32 = 639; +pub const vpiAssertionSysEnableFailAction: u32 = 640; +pub const vpiAssertionSysDisablePassAction: u32 = 641; +pub const vpiAssertionSysDisableFailAction: u32 = 642; +pub const vpiAssertionSysEnableNonvacuousAction: u32 = 643; +pub const vpiAssertionSysDisableVacuousAction: u32 = 644; +pub type va_list = __builtin_va_list; +pub type PLI_INT64 = i64; +pub type PLI_INT32 = ::std::os::raw::c_int; +pub type PLI_UINT32 = ::std::os::raw::c_uint; +pub type PLI_INT16 = ::std::os::raw::c_short; +pub type PLI_BYTE8 = ::std::os::raw::c_char; +#[doc = " TYPEDEFS"] +pub type vpiHandle = *mut PLI_UINT32; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct t_vpi_time { + pub type_: PLI_INT32, + pub high: PLI_UINT32, + pub low: PLI_UINT32, + pub real: f64, +} +#[test] +fn bindgen_test_layout_t_vpi_time() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 24usize, + concat!("Size of: ", stringify!(t_vpi_time)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(t_vpi_time)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).type_) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_time), + "::", + stringify!(type_) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).high) as usize - ptr as usize }, + 4usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_time), + "::", + stringify!(high) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).low) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_time), + "::", + stringify!(low) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).real) as usize - ptr as usize }, + 16usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_time), + "::", + stringify!(real) + ) + ); +} +pub type s_vpi_time = t_vpi_time; +pub type p_vpi_time = *mut t_vpi_time; +#[doc = " delay structures"] +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct t_vpi_delay { + pub da: *mut t_vpi_time, + pub no_of_delays: PLI_INT32, + pub time_type: PLI_INT32, + pub mtm_flag: PLI_INT32, + pub append_flag: PLI_INT32, + pub pulsere_flag: PLI_INT32, +} +#[test] +fn bindgen_test_layout_t_vpi_delay() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 32usize, + concat!("Size of: ", stringify!(t_vpi_delay)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(t_vpi_delay)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).da) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_delay), + "::", + stringify!(da) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).no_of_delays) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_delay), + "::", + stringify!(no_of_delays) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).time_type) as usize - ptr as usize }, + 12usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_delay), + "::", + stringify!(time_type) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).mtm_flag) as usize - ptr as usize }, + 16usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_delay), + "::", + stringify!(mtm_flag) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).append_flag) as usize - ptr as usize }, + 20usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_delay), + "::", + stringify!(append_flag) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).pulsere_flag) as usize - ptr as usize }, + 24usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_delay), + "::", + stringify!(pulsere_flag) + ) + ); +} +#[doc = " delay structures"] +pub type p_vpi_delay = *mut t_vpi_delay; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct t_vpi_vecval { + pub aval: PLI_UINT32, + pub bval: PLI_UINT32, +} +#[test] +fn bindgen_test_layout_t_vpi_vecval() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 8usize, + concat!("Size of: ", stringify!(t_vpi_vecval)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(t_vpi_vecval)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).aval) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_vecval), + "::", + stringify!(aval) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).bval) as usize - ptr as usize }, + 4usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_vecval), + "::", + stringify!(bval) + ) + ); +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct t_vpi_strengthval { + pub logic: PLI_INT32, + pub s0: PLI_INT32, + pub s1: PLI_INT32, +} +#[test] +fn bindgen_test_layout_t_vpi_strengthval() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 12usize, + concat!("Size of: ", stringify!(t_vpi_strengthval)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(t_vpi_strengthval)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).logic) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_strengthval), + "::", + stringify!(logic) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).s0) as usize - ptr as usize }, + 4usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_strengthval), + "::", + stringify!(s0) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).s1) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_strengthval), + "::", + stringify!(s1) + ) + ); +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct t_vpi_value { + pub format: PLI_INT32, + pub value: t_vpi_value__bindgen_ty_1, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union t_vpi_value__bindgen_ty_1 { + pub str_: *mut PLI_BYTE8, + pub scalar: PLI_INT32, + pub integer: PLI_INT32, + pub real: f64, + pub time: *mut t_vpi_time, + pub vector: *mut t_vpi_vecval, + pub strength: *mut t_vpi_strengthval, + pub misc: *mut PLI_BYTE8, +} +#[test] +fn bindgen_test_layout_t_vpi_value__bindgen_ty_1() { + const UNINIT: ::std::mem::MaybeUninit = + ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 8usize, + concat!("Size of: ", stringify!(t_vpi_value__bindgen_ty_1)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(t_vpi_value__bindgen_ty_1)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).str_) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_value__bindgen_ty_1), + "::", + stringify!(str_) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).scalar) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_value__bindgen_ty_1), + "::", + stringify!(scalar) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).integer) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_value__bindgen_ty_1), + "::", + stringify!(integer) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).real) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_value__bindgen_ty_1), + "::", + stringify!(real) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).time) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_value__bindgen_ty_1), + "::", + stringify!(time) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).vector) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_value__bindgen_ty_1), + "::", + stringify!(vector) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).strength) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_value__bindgen_ty_1), + "::", + stringify!(strength) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).misc) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_value__bindgen_ty_1), + "::", + stringify!(misc) + ) + ); +} +#[test] +fn bindgen_test_layout_t_vpi_value() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!("Size of: ", stringify!(t_vpi_value)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(t_vpi_value)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).format) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_value), + "::", + stringify!(format) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).value) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_value), + "::", + stringify!(value) + ) + ); +} +pub type p_vpi_value = *mut t_vpi_value; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct t_vpi_arrayvalue { + pub format: PLI_UINT32, + pub flags: PLI_UINT32, + pub value: t_vpi_arrayvalue__bindgen_ty_1, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union t_vpi_arrayvalue__bindgen_ty_1 { + pub integers: *mut PLI_INT32, + pub shortints: *mut PLI_INT16, + pub longints: *mut PLI_INT64, + pub rawvals: *mut PLI_BYTE8, + pub vectors: *mut t_vpi_vecval, + pub times: *mut t_vpi_time, + pub reals: *mut f64, + pub shortreals: *mut f32, +} +#[test] +fn bindgen_test_layout_t_vpi_arrayvalue__bindgen_ty_1() { + const UNINIT: ::std::mem::MaybeUninit = + ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 8usize, + concat!("Size of: ", stringify!(t_vpi_arrayvalue__bindgen_ty_1)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(t_vpi_arrayvalue__bindgen_ty_1)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).integers) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_arrayvalue__bindgen_ty_1), + "::", + stringify!(integers) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).shortints) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_arrayvalue__bindgen_ty_1), + "::", + stringify!(shortints) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).longints) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_arrayvalue__bindgen_ty_1), + "::", + stringify!(longints) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).rawvals) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_arrayvalue__bindgen_ty_1), + "::", + stringify!(rawvals) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).vectors) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_arrayvalue__bindgen_ty_1), + "::", + stringify!(vectors) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).times) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_arrayvalue__bindgen_ty_1), + "::", + stringify!(times) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).reals) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_arrayvalue__bindgen_ty_1), + "::", + stringify!(reals) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).shortreals) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_arrayvalue__bindgen_ty_1), + "::", + stringify!(shortreals) + ) + ); +} +#[test] +fn bindgen_test_layout_t_vpi_arrayvalue() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!("Size of: ", stringify!(t_vpi_arrayvalue)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(t_vpi_arrayvalue)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).format) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_arrayvalue), + "::", + stringify!(format) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).flags) as usize - ptr as usize }, + 4usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_arrayvalue), + "::", + stringify!(flags) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).value) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_arrayvalue), + "::", + stringify!(value) + ) + ); +} +pub type p_vpi_arrayvalue = *mut t_vpi_arrayvalue; +#[doc = " system task/function structure"] +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct t_vpi_systf_data { + pub type_: PLI_INT32, + pub sysfunctype: PLI_INT32, + pub tfname: *mut PLI_BYTE8, + pub calltf: ::std::option::Option PLI_INT32>, + pub compiletf: ::std::option::Option PLI_INT32>, + pub sizetf: ::std::option::Option PLI_INT32>, + pub user_data: *mut PLI_BYTE8, +} +#[test] +fn bindgen_test_layout_t_vpi_systf_data() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 48usize, + concat!("Size of: ", stringify!(t_vpi_systf_data)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(t_vpi_systf_data)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).type_) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_systf_data), + "::", + stringify!(type_) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).sysfunctype) as usize - ptr as usize }, + 4usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_systf_data), + "::", + stringify!(sysfunctype) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).tfname) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_systf_data), + "::", + stringify!(tfname) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).calltf) as usize - ptr as usize }, + 16usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_systf_data), + "::", + stringify!(calltf) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).compiletf) as usize - ptr as usize }, + 24usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_systf_data), + "::", + stringify!(compiletf) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).sizetf) as usize - ptr as usize }, + 32usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_systf_data), + "::", + stringify!(sizetf) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).user_data) as usize - ptr as usize }, + 40usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_systf_data), + "::", + stringify!(user_data) + ) + ); +} +#[doc = " system task/function structure"] +pub type p_vpi_systf_data = *mut t_vpi_systf_data; +#[doc = " SystemVerilog execution information structure"] +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct t_vpi_vlog_info { + pub argc: PLI_INT32, + pub argv: *mut *mut PLI_BYTE8, + pub product: *mut PLI_BYTE8, + pub version: *mut PLI_BYTE8, +} +#[test] +fn bindgen_test_layout_t_vpi_vlog_info() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 32usize, + concat!("Size of: ", stringify!(t_vpi_vlog_info)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(t_vpi_vlog_info)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).argc) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_vlog_info), + "::", + stringify!(argc) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).argv) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_vlog_info), + "::", + stringify!(argv) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).product) as usize - ptr as usize }, + 16usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_vlog_info), + "::", + stringify!(product) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).version) as usize - ptr as usize }, + 24usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_vlog_info), + "::", + stringify!(version) + ) + ); +} +#[doc = " SystemVerilog execution information structure"] +pub type p_vpi_vlog_info = *mut t_vpi_vlog_info; +#[doc = " PLI error information structure"] +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct t_vpi_error_info { + pub state: PLI_INT32, + pub level: PLI_INT32, + pub message: *mut PLI_BYTE8, + pub product: *mut PLI_BYTE8, + pub code: *mut PLI_BYTE8, + pub file: *mut PLI_BYTE8, + pub line: PLI_INT32, +} +#[test] +fn bindgen_test_layout_t_vpi_error_info() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 48usize, + concat!("Size of: ", stringify!(t_vpi_error_info)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(t_vpi_error_info)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).state) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_error_info), + "::", + stringify!(state) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).level) as usize - ptr as usize }, + 4usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_error_info), + "::", + stringify!(level) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).message) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_error_info), + "::", + stringify!(message) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).product) as usize - ptr as usize }, + 16usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_error_info), + "::", + stringify!(product) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).code) as usize - ptr as usize }, + 24usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_error_info), + "::", + stringify!(code) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).file) as usize - ptr as usize }, + 32usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_error_info), + "::", + stringify!(file) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).line) as usize - ptr as usize }, + 40usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_error_info), + "::", + stringify!(line) + ) + ); +} +#[doc = " PLI error information structure"] +pub type p_vpi_error_info = *mut t_vpi_error_info; +#[doc = " callback structures"] +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct t_cb_data { + pub reason: PLI_INT32, + pub cb_rtn: ::std::option::Option PLI_INT32>, + pub obj: vpiHandle, + pub time: p_vpi_time, + pub value: p_vpi_value, + pub index: PLI_INT32, + pub user_data: *mut PLI_BYTE8, +} +#[test] +fn bindgen_test_layout_t_cb_data() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 56usize, + concat!("Size of: ", stringify!(t_cb_data)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(t_cb_data)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).reason) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_cb_data), + "::", + stringify!(reason) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).cb_rtn) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(t_cb_data), + "::", + stringify!(cb_rtn) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).obj) as usize - ptr as usize }, + 16usize, + concat!( + "Offset of field: ", + stringify!(t_cb_data), + "::", + stringify!(obj) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).time) as usize - ptr as usize }, + 24usize, + concat!( + "Offset of field: ", + stringify!(t_cb_data), + "::", + stringify!(time) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).value) as usize - ptr as usize }, + 32usize, + concat!( + "Offset of field: ", + stringify!(t_cb_data), + "::", + stringify!(value) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).index) as usize - ptr as usize }, + 40usize, + concat!( + "Offset of field: ", + stringify!(t_cb_data), + "::", + stringify!(index) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).user_data) as usize - ptr as usize }, + 48usize, + concat!( + "Offset of field: ", + stringify!(t_cb_data), + "::", + stringify!(user_data) + ) + ); +} +#[doc = " callback structures"] +pub type p_cb_data = *mut t_cb_data; +extern "C" { + pub fn vpi_register_cb(cb_data_p: p_cb_data) -> vpiHandle; +} +extern "C" { + pub fn vpi_remove_cb(cb_obj: vpiHandle) -> PLI_INT32; +} +extern "C" { + pub fn vpi_get_cb_info(object: vpiHandle, cb_data_p: p_cb_data); +} +extern "C" { + pub fn vpi_register_systf(systf_data_p: p_vpi_systf_data) -> vpiHandle; +} +extern "C" { + pub fn vpi_get_systf_info(object: vpiHandle, systf_data_p: p_vpi_systf_data); +} +extern "C" { + pub fn vpi_handle_by_name(name: *mut PLI_BYTE8, scope: vpiHandle) -> vpiHandle; +} +extern "C" { + pub fn vpi_handle_by_index(object: vpiHandle, indx: PLI_INT32) -> vpiHandle; +} +extern "C" { + pub fn vpi_handle(type_: PLI_INT32, refHandle: vpiHandle) -> vpiHandle; +} +extern "C" { + pub fn vpi_handle_multi( + type_: PLI_INT32, + refHandle1: vpiHandle, + refHandle2: vpiHandle, + ... + ) -> vpiHandle; +} +extern "C" { + pub fn vpi_iterate(type_: PLI_INT32, refHandle: vpiHandle) -> vpiHandle; +} +extern "C" { + pub fn vpi_scan(iterator: vpiHandle) -> vpiHandle; +} +extern "C" { + pub fn vpi_get(property: PLI_INT32, object: vpiHandle) -> PLI_INT32; +} +extern "C" { + pub fn vpi_get64(property: PLI_INT32, object: vpiHandle) -> PLI_INT64; +} +extern "C" { + pub fn vpi_get_str(property: PLI_INT32, object: vpiHandle) -> *mut PLI_BYTE8; +} +extern "C" { + pub fn vpi_get_delays(object: vpiHandle, delay_p: p_vpi_delay); +} +extern "C" { + pub fn vpi_put_delays(object: vpiHandle, delay_p: p_vpi_delay); +} +extern "C" { + pub fn vpi_get_value(expr: vpiHandle, value_p: p_vpi_value); +} +extern "C" { + pub fn vpi_put_value( + object: vpiHandle, + value_p: p_vpi_value, + time_p: p_vpi_time, + flags: PLI_INT32, + ) -> vpiHandle; +} +extern "C" { + pub fn vpi_get_value_array( + object: vpiHandle, + arrayvalue_p: p_vpi_arrayvalue, + index_p: *mut PLI_INT32, + num: PLI_UINT32, + ); +} +extern "C" { + pub fn vpi_put_value_array( + object: vpiHandle, + arrayvalue_p: p_vpi_arrayvalue, + index_p: *mut PLI_INT32, + num: PLI_UINT32, + ); +} +extern "C" { + pub fn vpi_get_time(object: vpiHandle, time_p: p_vpi_time); +} +extern "C" { + pub fn vpi_mcd_open(fileName: *mut PLI_BYTE8) -> PLI_UINT32; +} +extern "C" { + pub fn vpi_mcd_close(mcd: PLI_UINT32) -> PLI_UINT32; +} +extern "C" { + pub fn vpi_mcd_name(cd: PLI_UINT32) -> *mut PLI_BYTE8; +} +extern "C" { + pub fn vpi_mcd_printf(mcd: PLI_UINT32, format: *mut PLI_BYTE8, ...) -> PLI_INT32; +} +extern "C" { + pub fn vpi_printf(format: *mut PLI_BYTE8, ...) -> PLI_INT32; +} +extern "C" { + pub fn vpi_compare_objects(object1: vpiHandle, object2: vpiHandle) -> PLI_INT32; +} +extern "C" { + pub fn vpi_chk_error(error_info_p: p_vpi_error_info) -> PLI_INT32; +} +extern "C" { + pub fn vpi_free_object(object: vpiHandle) -> PLI_INT32; +} +extern "C" { + pub fn vpi_release_handle(object: vpiHandle) -> PLI_INT32; +} +extern "C" { + pub fn vpi_get_vlog_info(vlog_info_p: p_vpi_vlog_info) -> PLI_INT32; +} +extern "C" { + pub fn vpi_get_data(id: PLI_INT32, dataLoc: *mut PLI_BYTE8, numOfBytes: PLI_INT32) + -> PLI_INT32; +} +extern "C" { + pub fn vpi_put_data(id: PLI_INT32, dataLoc: *mut PLI_BYTE8, numOfBytes: PLI_INT32) + -> PLI_INT32; +} +extern "C" { + pub fn vpi_get_userdata(obj: vpiHandle) -> *mut ::std::os::raw::c_void; +} +extern "C" { + pub fn vpi_put_userdata(obj: vpiHandle, userdata: *mut ::std::os::raw::c_void) -> PLI_INT32; +} +extern "C" { + pub fn vpi_vprintf(format: *mut PLI_BYTE8, ap: *mut __va_list_tag) -> PLI_INT32; +} +extern "C" { + pub fn vpi_mcd_vprintf( + mcd: PLI_UINT32, + format: *mut PLI_BYTE8, + ap: *mut __va_list_tag, + ) -> PLI_INT32; +} +extern "C" { + pub fn vpi_flush() -> PLI_INT32; +} +extern "C" { + pub fn vpi_mcd_flush(mcd: PLI_UINT32) -> PLI_INT32; +} +extern "C" { + pub fn vpi_control(operation: PLI_INT32, ...) -> PLI_INT32; +} +extern "C" { + pub fn vpi_handle_by_multi_index( + obj: vpiHandle, + num_index: PLI_INT32, + index_array: *mut PLI_INT32, + ) -> vpiHandle; +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct t_vpi_assertion_step_info { + pub matched_expression_count: PLI_INT32, + pub matched_exprs: *mut vpiHandle, + pub stateFrom: PLI_INT32, + pub stateTo: PLI_INT32, +} +#[test] +fn bindgen_test_layout_t_vpi_assertion_step_info() { + const UNINIT: ::std::mem::MaybeUninit = + ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 24usize, + concat!("Size of: ", stringify!(t_vpi_assertion_step_info)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(t_vpi_assertion_step_info)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).matched_expression_count) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_assertion_step_info), + "::", + stringify!(matched_expression_count) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).matched_exprs) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_assertion_step_info), + "::", + stringify!(matched_exprs) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).stateFrom) as usize - ptr as usize }, + 16usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_assertion_step_info), + "::", + stringify!(stateFrom) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).stateTo) as usize - ptr as usize }, + 20usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_assertion_step_info), + "::", + stringify!(stateTo) + ) + ); +} +pub type p_vpi_assertion_step_info = *mut t_vpi_assertion_step_info; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct t_vpi_attempt_info { + pub detail: t_vpi_attempt_info__bindgen_ty_1, + pub attemptStartTime: s_vpi_time, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union t_vpi_attempt_info__bindgen_ty_1 { + pub failExpr: vpiHandle, + pub step: p_vpi_assertion_step_info, +} +#[test] +fn bindgen_test_layout_t_vpi_attempt_info__bindgen_ty_1() { + const UNINIT: ::std::mem::MaybeUninit = + ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 8usize, + concat!("Size of: ", stringify!(t_vpi_attempt_info__bindgen_ty_1)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!( + "Alignment of ", + stringify!(t_vpi_attempt_info__bindgen_ty_1) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).failExpr) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_attempt_info__bindgen_ty_1), + "::", + stringify!(failExpr) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).step) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_attempt_info__bindgen_ty_1), + "::", + stringify!(step) + ) + ); +} +#[test] +fn bindgen_test_layout_t_vpi_attempt_info() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 32usize, + concat!("Size of: ", stringify!(t_vpi_attempt_info)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(t_vpi_attempt_info)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).detail) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_attempt_info), + "::", + stringify!(detail) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).attemptStartTime) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_attempt_info), + "::", + stringify!(attemptStartTime) + ) + ); +} +pub type p_vpi_attempt_info = *mut t_vpi_attempt_info; +pub type vpi_assertion_callback_func = ::std::option::Option< + unsafe extern "C" fn( + reason: PLI_INT32, + cb_time: p_vpi_time, + assertion: vpiHandle, + info: p_vpi_attempt_info, + user_data: *mut PLI_BYTE8, + ) -> PLI_INT32, +>; +extern "C" { + pub fn vpi_register_assertion_cb( + assertion: vpiHandle, + reason: PLI_INT32, + cb_rtn: vpi_assertion_callback_func, + user_data: *mut PLI_BYTE8, + ) -> vpiHandle; +} +pub type __builtin_va_list = [__va_list_tag; 1usize]; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct __va_list_tag { + pub gp_offset: ::std::os::raw::c_uint, + pub fp_offset: ::std::os::raw::c_uint, + pub overflow_arg_area: *mut ::std::os::raw::c_void, + pub reg_save_area: *mut ::std::os::raw::c_void, +} +#[test] +fn bindgen_test_layout___va_list_tag() { + const UNINIT: ::std::mem::MaybeUninit<__va_list_tag> = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::<__va_list_tag>(), + 24usize, + concat!("Size of: ", stringify!(__va_list_tag)) + ); + assert_eq!( + ::std::mem::align_of::<__va_list_tag>(), + 8usize, + concat!("Alignment of ", stringify!(__va_list_tag)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).gp_offset) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(__va_list_tag), + "::", + stringify!(gp_offset) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).fp_offset) as usize - ptr as usize }, + 4usize, + concat!( + "Offset of field: ", + stringify!(__va_list_tag), + "::", + stringify!(fp_offset) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).overflow_arg_area) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(__va_list_tag), + "::", + stringify!(overflow_arg_area) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).reg_save_area) as usize - ptr as usize }, + 16usize, + concat!( + "Offset of field: ", + stringify!(__va_list_tag), + "::", + stringify!(reg_save_area) + ) + ); +} diff --git a/t1rocketemu/online_drive/Cargo.toml b/t1rocketemu/online_drive/Cargo.toml new file mode 100644 index 000000000..929a8b546 --- /dev/null +++ b/t1rocketemu/online_drive/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "online_drive" +version = "0.1.0" +edition = "2021" + +[dependencies] +online_dpi = { path = "../online_dpi", features = ["sv2023"] } + +[build-dependencies] +cmake = "0.1.50" + +[features] +trace = ["online_dpi/trace"] diff --git a/t1rocketemu/online_drive/build.rs b/t1rocketemu/online_drive/build.rs new file mode 100644 index 000000000..fe883aaa8 --- /dev/null +++ b/t1rocketemu/online_drive/build.rs @@ -0,0 +1,21 @@ +use cmake::Config; + +fn main() { + #[cfg(feature = "trace")] + let dst = + Config::new("verilator_shim").define("VM_TRACE", "1").very_verbose(true).always_configure(true).build(); + #[cfg(not(feature = "trace"))] + let dst = Config::new("verilator_shim").very_verbose(true).always_configure(true).build(); + + println!("cargo::rustc-link-search=native={}/lib", dst.display()); + + // link order matters! so we use +whole-archive here + // verilator_main <- VTestBench <-- verilated <- verilator_shim <- stdc++ + // verilated <- libz + println!("cargo::rustc-link-lib=static:+whole-archive=verilator_shim"); + println!("cargo::rustc-link-lib=static:+whole-archive=VTestBench"); + println!("cargo::rustc-link-lib=static:+whole-archive=verilated"); + println!("cargo::rustc-link-lib=stdc++"); + println!("cargo::rustc-link-lib=z"); + println!("cargo::rerun-if-env-changed=VERILATED_LIB_DIR"); +} diff --git a/t1rocketemu/online_drive/src/main.rs b/t1rocketemu/online_drive/src/main.rs new file mode 100644 index 000000000..3a6a2aa13 --- /dev/null +++ b/t1rocketemu/online_drive/src/main.rs @@ -0,0 +1,31 @@ +// force link with online_dpi +extern crate online_dpi; + +use std::{ + ffi::{c_char, c_int, CString}, + ptr, +}; + +fn main() { + let c_args: Vec = std::env::args().map(|arg| CString::new(arg).unwrap()).collect(); + + let mut c_args_ptr: Vec<*const c_char> = c_args.iter().map(|arg| arg.as_ptr()).collect(); + c_args_ptr.push(ptr::null()); + + let argc = c_args.len() as c_int; + let argv = c_args_ptr.as_ptr() as *mut *mut c_char; + + unsafe { + verilator_main_c(argc, argv); + } + + std::fs::write( + "perf.txt", + format!("total_cycles: {}", online_dpi::get_t()), + ) + .expect("fail to write into perf.txt"); +} + +extern "C" { + fn verilator_main_c(argc: c_int, argv: *mut *mut c_char) -> c_int; +} diff --git a/t1rocketemu/online_drive/verilator_shim/CMakeLists.txt b/t1rocketemu/online_drive/verilator_shim/CMakeLists.txt new file mode 100644 index 000000000..e7aefb74f --- /dev/null +++ b/t1rocketemu/online_drive/verilator_shim/CMakeLists.txt @@ -0,0 +1,38 @@ +cmake_minimum_required(VERSION 3.20) +project(verilator_shim) +set(CMAKE_CXX_STANDARD 17) + +message(STATUS "Project '${PROJECT_NAME}' build type: ${CMAKE_BUILD_TYPE}") + +set(THREADS_PREFER_PTHREAD_FLAG ON) + +add_library(verilator_shim + STATIC + verilator_shim.cc +) + +if (NOT DEFINED VERILATED_LIB_DIR) + set(VERILATED_LIB_DIR "$ENV{VERILATED_LIB_DIR}") + if (VERILATED_LIB_DIR STREQUAL "") + message(FATAL_ERROR "You should specify verilated libs via -DVERILATE_LIB_DIR or environment variable VERILATED_LIB_DIR, but it seems not") + endif() +endif() + +if (NOT DEFINED VERILATED_INC_DIR) + set(VERILATED_INC_DIR "$ENV{VERILATED_INC_DIR}") + if (VERILATED_INC_DIR STREQUAL "") + message(FATAL_ERROR "You should specify verilated libs via -DVERILATED_INC_DIR or environment variable VERILATED_INC_DIR, but it seems not") + endif() +endif() + +# include verilator headers +find_package(verilator REQUIRED) +message(STATUS "Found verilator: ${verilator_DIR}") +target_include_directories(verilator_shim PUBLIC ${verilator_DIR}/include) +target_include_directories(verilator_shim PUBLIC ${verilator_DIR}/include/vltstd) + +if(DEFINED VM_TRACE) + target_compile_definitions(verilator_shim PRIVATE VM_TRACE=1) +endif() + +install(TARGETS verilator_shim ARCHIVE) diff --git a/t1rocketemu/online_drive/verilator_shim/verilator_shim.cc b/t1rocketemu/online_drive/verilator_shim/verilator_shim.cc new file mode 100644 index 000000000..1c0a479d7 --- /dev/null +++ b/t1rocketemu/online_drive/verilator_shim/verilator_shim.cc @@ -0,0 +1,40 @@ +#include +#include + +class VTestBench; + +extern "C" int verilator_main_c(int argc, char **argv) { + // Setup context, defaults, and parse command line + Verilated::debug(0); + VerilatedContext* contextp = new VerilatedContext(); + contextp->fatalOnError(false); + contextp->commandArgs(argc, argv); +#ifdef VM_TRACE + contextp->traceEverOn(true); +#endif + + // Construct the Verilated model, from Vtop.h generated from Verilating + VTestBench* topp = new VTestBench(contextp); + + // Simulate until $finish + while (!contextp->gotFinish()) { + // Evaluate model + topp->eval(); + // Advance time + if (!topp->eventsPending()) + break; + contextp->time(topp->nextTimeSlot()); + } + + if (!contextp->gotFinish()) { + VL_DEBUG_IF(VL_PRINTF("+ Exiting without $finish; no events left\n");); + } + + // Final model cleanup + topp->final(); + + delete topp; + delete contextp; + + return 0; +} diff --git a/t1rocketemu/online_vcs/Cargo.toml b/t1rocketemu/online_vcs/Cargo.toml new file mode 100644 index 000000000..d85b2b690 --- /dev/null +++ b/t1rocketemu/online_vcs/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "online_vcs" +edition = "2021" +version.workspace = true + +[lib] +crate-type = ["staticlib"] +name = "dpi" + +[dependencies] +online_dpi = { path = "../online_dpi", features = ["svvpi"] } + +[features] +trace = ["online_dpi/trace"] diff --git a/t1rocketemu/online_vcs/default.nix b/t1rocketemu/online_vcs/default.nix new file mode 100644 index 000000000..b1400fe1f --- /dev/null +++ b/t1rocketemu/online_vcs/default.nix @@ -0,0 +1,48 @@ +{ lib +, elaborateConfig +, rustPlatform +, libspike +, libspike_interfaces +, enable-trace ? false +, vcStaticHome +}: + +rustPlatform.buildRustPackage { + name = "vcs-dpi-lib"; + src = with lib.fileset; toSource { + root = ../.; + fileset = unions [ + ../spike_rs + ../offline + ../online_dpi + ../online_drive + ../online_vcs + ../test_common + ../Cargo.lock + ../Cargo.toml + ]; + }; + + buildFeatures = lib.optionals enable-trace [ "trace" ]; + buildAndTestSubdir = "./online_vcs"; + + env = { + VCS_LIB_DIR = "${vcStaticHome}/vcs-mx/linux64/lib"; + SPIKE_LIB_DIR = "${libspike}/lib"; + SPIKE_INTERFACES_LIB_DIR = "${libspike_interfaces}/lib"; + DESIGN_VLEN = elaborateConfig.parameter.vLen; + DESIGN_DLEN = elaborateConfig.parameter.dLen; + SPIKE_ISA_STRING = + "rv32gc" + + (builtins.concatStringsSep "_" elaborateConfig.parameter.extensions) + + "_Zvl${toString elaborateConfig.parameter.vLen}b"; + }; + + cargoLock = { + lockFile = ../Cargo.lock; + }; + + passthru = { + inherit enable-trace; + }; +} diff --git a/t1rocketemu/online_vcs/src/lib.rs b/t1rocketemu/online_vcs/src/lib.rs new file mode 100644 index 000000000..be27f2116 --- /dev/null +++ b/t1rocketemu/online_vcs/src/lib.rs @@ -0,0 +1,2 @@ +// force link with online_dpi +extern crate online_dpi; diff --git a/t1rocketemu/readme.md b/t1rocketemu/readme.md new file mode 100644 index 000000000..dfd1c0380 --- /dev/null +++ b/t1rocketemu/readme.md @@ -0,0 +1,11 @@ +## Build + +```bash +nix build ".#t1..ip.difftest" +``` + +## Develop + +```bash +nix develop ".#t1..ip.difftest.devShell" +``` diff --git a/t1rocketemu/spike_interfaces/CMakeLists.txt b/t1rocketemu/spike_interfaces/CMakeLists.txt new file mode 100644 index 000000000..fe5272891 --- /dev/null +++ b/t1rocketemu/spike_interfaces/CMakeLists.txt @@ -0,0 +1,32 @@ +cmake_minimum_required(VERSION 3.20) +project(spike_interfaces LANGUAGES CXX) +set(CMAKE_CXX_STANDARD 17) + +find_package(libspike REQUIRED) + +add_library(${CMAKE_PROJECT_NAME} STATIC spike_interfaces.cc) + +target_link_libraries(${CMAKE_PROJECT_NAME} PUBLIC libspike) + +target_include_directories(${CMAKE_PROJECT_NAME} INTERFACE + $ + $ +) + +# just playing with CMake export, maybe not necessary +target_sources(${CMAKE_PROJECT_NAME} PUBLIC + FILE_SET HEADERS + FILES spike_interfaces.h spike_interfaces_c.h) + +install( + TARGETS ${CMAKE_PROJECT_NAME} + EXPORT ${CMAKE_PROJECT_NAME}-config + PUBLIC_HEADER + FILE_SET HEADERS +) + +install( + EXPORT ${CMAKE_PROJECT_NAME}-config + NAMESPACE ${CMAKE_PROJECT_NAME}:: + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${CMAKE_PROJECT_NAME} +) diff --git a/t1rocketemu/spike_interfaces/default.nix b/t1rocketemu/spike_interfaces/default.nix new file mode 100644 index 000000000..d95618793 --- /dev/null +++ b/t1rocketemu/spike_interfaces/default.nix @@ -0,0 +1,11 @@ +{ lib, stdenv, cmake, libspike }: + +stdenv.mkDerivation { + name = "spike_interfaces"; + src = with lib.fileset; toSource { + root = ./.; + fileset = fileFilter (file: file.name != "default.nix") ./.; + }; + nativeBuildInputs = [ cmake ]; + propagatedBuildInputs = [ libspike ]; +} diff --git a/t1rocketemu/spike_interfaces/spike_interfaces-config.cmake b/t1rocketemu/spike_interfaces/spike_interfaces-config.cmake new file mode 100644 index 000000000..8a0867895 --- /dev/null +++ b/t1rocketemu/spike_interfaces/spike_interfaces-config.cmake @@ -0,0 +1,3 @@ +include(CMakeFindDependencyMacro) +find_dependency(libspike 0.1.0) +include(${CMAKE_CURRENT_LIST_DIR}/libspike_interface_targets.cmake) diff --git a/t1rocketemu/spike_interfaces/spike_interfaces.cc b/t1rocketemu/spike_interfaces/spike_interfaces.cc new file mode 100644 index 000000000..479ebe11b --- /dev/null +++ b/t1rocketemu/spike_interfaces/spike_interfaces.cc @@ -0,0 +1,252 @@ +#include + +#include "spike_interfaces.h" + +constexpr uint32_t CSR_MSIMEND = 0x7cc; + +void *ffi_target; + +cfg_t make_spike_cfg() { + cfg_t cfg; + cfg.initrd_bounds = std::make_pair((reg_t)0, (reg_t)0), + cfg.bootargs = nullptr; + cfg.isa = DEFAULT_ISA; + cfg.priv = DEFAULT_PRIV; + cfg.misaligned = false; + cfg.endianness = endianness_little; + cfg.pmpregions = 16; + cfg.pmpgranularity = 4; + cfg.mem_layout = std::vector(); + cfg.hartids = std::vector(); + cfg.explicit_hartids = false; + cfg.real_time_clint = false; + cfg.trigger_count = 4; + return cfg; +} + +Spike::Spike(const char *set, const char *lvl, + size_t lane_number) + : sim(), isa(set, lvl), cfg(make_spike_cfg()), + proc( + /*isa*/ &isa, + /*cfg*/ &cfg, + /*sim*/ &sim, + /*id*/ 0, + /*halt on reset*/ true, + /*log_file_t*/ nullptr, + /*sout*/ std::cerr) { + proc.VU.lane_num = lane_number; + proc.VU.lane_granularity = 32; + + auto &csrmap = proc.get_state()->csrmap; + csrmap[CSR_MSIMEND] = std::make_shared(&proc, CSR_MSIMEND, 1); + proc.enable_log_commits(); +} + +spike_t *spike_new(const char *set, const char *lvl, + size_t lane_number) { + return new spike_t{new Spike(set, lvl, lane_number)}; +} + +const char *proc_disassemble(spike_processor_t *proc) { + auto pc = proc->p->get_state()->pc; + auto mmu = proc->p->get_mmu(); + auto disasm = proc->p->get_disassembler(); + auto fetch = mmu->load_insn(pc); + return strdup(disasm->disassemble(fetch.insn).c_str()); +} + +spike_processor_t *spike_get_proc(spike_t *spike) { + return new spike_processor_t{spike->s->get_proc()}; +} + +void proc_reset(spike_processor_t *proc) { proc->p->reset(); } + +spike_state_t *proc_get_state(spike_processor_t *proc) { + return new spike_state_t{proc->p->get_state()}; +} + +reg_t proc_func(spike_processor_t *proc) { + auto pc = proc->p->get_state()->pc; + auto mmu = proc->p->get_mmu(); + auto fetch = mmu->load_insn(pc); + try { + return fetch.func(proc->p, fetch.insn, pc); + } catch (trap_t &trap) { + std::cerr << "Error: spike trapped with " << trap.name() + << " (tval=" << std::uppercase << std::setfill('0') + << std::setw(8) << std::hex << trap.get_tval() + << ", tval2=" << std::setw(8) << std::hex << trap.get_tval2() + << ", tinst=" << std::setw(8) << std::hex << trap.get_tinst() + << ")" << std::endl; + throw trap; + } +} + +reg_t proc_get_insn(spike_processor_t *proc) { + auto pc = proc->p->get_state()->pc; + auto mmu = proc->p->get_mmu(); + auto fetch = mmu->load_insn(pc); + return fetch.insn.bits(); +} + +uint8_t proc_get_vreg_data(spike_processor_t *proc, uint32_t vreg_idx, + uint32_t vreg_offset) { + return proc->p->VU.elt(vreg_idx, vreg_offset); +} + +uint32_t extract_f32(freg_t f) { return (uint32_t)f.v[0]; } + +inline uint32_t clip(uint32_t binary, int a, int b) { + int nbits = b - a + 1; + uint32_t mask = nbits >= 32 ? (uint32_t)-1 : (1 << nbits) - 1; + return (binary >> a) & mask; +} + +uint32_t proc_get_rs1(spike_processor_t *proc) { + auto pc = proc->p->get_state()->pc; + auto fetch = proc->p->get_mmu()->load_insn(pc); + return (uint32_t)fetch.insn.rs1(); +} + +uint32_t proc_get_rs2(spike_processor_t *proc) { + auto pc = proc->p->get_state()->pc; + auto fetch = proc->p->get_mmu()->load_insn(pc); + return (uint32_t)fetch.insn.rs2(); +} + +uint32_t proc_get_rd(spike_processor_t *proc) { + auto pc = proc->p->get_state()->pc; + auto fetch = proc->p->get_mmu()->load_insn(pc); + return fetch.insn.rd(); +} + +uint64_t proc_vu_get_vtype(spike_processor_t *proc) { + return proc->p->VU.vtype->read(); +} + +uint32_t proc_vu_get_vxrm(spike_processor_t *proc) { + return proc->p->VU.vxrm->read(); +} + +uint32_t proc_vu_get_vnf(spike_processor_t *proc) { + auto pc = proc->p->get_state()->pc; + auto fetch = proc->p->get_mmu()->load_insn(pc); + return fetch.insn.v_nf(); +} + +bool proc_vu_get_vill(spike_processor_t *proc) { return proc->p->VU.vill; } + +bool proc_vu_get_vxsat(spike_processor_t *proc) { + return proc->p->VU.vxsat->read(); +} + +uint32_t proc_vu_get_vl(spike_processor_t *proc) { + return proc->p->VU.vl->read(); +} + +uint16_t proc_vu_get_vstart(spike_processor_t *proc) { + return proc->p->VU.vstart->read(); +} + +reg_t state_get_pc(spike_state_t *state) { return state->s->pc; } + +void state_set_mcycle(spike_state_t *state, size_t mcycle) { + state->s->mcycle->write((int64_t)mcycle); +} + +void state_clear(spike_state_t *state) { + state->s->log_reg_write.clear(); + state->s->log_mem_read.clear(); + state->s->log_mem_write.clear(); +} + +static void state_set_serialized(spike_state_t *state, bool serialized) { + state->s->serialized = serialized; +} + +uint64_t state_handle_pc(spike_state_t *state, uint64_t new_pc) { + if ((new_pc & 1) == 0) { + state_set_pc(state, new_pc); + } else { + switch (new_pc) { + case PC_SERIALIZE_BEFORE: + state_set_serialized(state, true); + break; + case PC_SERIALIZE_AFTER: + break; + default: + return -1; + } + } + return 0; +} + +void state_set_pc(spike_state_t *state, uint64_t pc) { state->s->pc = pc; } + +uint32_t state_get_reg(spike_state_t *state, uint32_t index, bool is_fp) { + if (is_fp) { + auto &fr = state->s->FPR; + return extract_f32(fr[index]); + } + auto &xr = state->s->XPR; + return (uint32_t)xr[index]; +} + +uint32_t state_get_reg_write_size(spike_state_t *state) { + reg_write_index_vec.clear(); + for (auto [idx, data] : state->s->log_reg_write) { + reg_write_index_vec.push_back(idx); + } + return state->s->log_reg_write.size(); +} + +uint32_t state_get_reg_write_index(spike_state_t *state, uint32_t index) { + return reg_write_index_vec[index]; +} + +uint32_t state_get_mem_write_size(spike_state_t *state) { + return state->s->log_mem_write.size(); +} + +uint32_t state_get_mem_write_addr(spike_state_t *state, uint32_t index) { + return std::get<0>(state->s->log_mem_write[index]) & 0xffffffff; +} + +uint64_t state_get_mem_write_value(spike_state_t *state, uint32_t index) { + return std::get<1>(state->s->log_mem_write[index]); +} + +uint8_t state_get_mem_write_size_by_byte(spike_state_t *state, uint32_t index) { + return std::get<2>(state->s->log_mem_write[index]); +} + +uint32_t state_get_mem_read_size(spike_state_t *state) { + return state->s->log_mem_read.size(); +} + +uint32_t state_get_mem_read_addr(spike_state_t *state, uint32_t index) { + return std::get<0>(state->s->log_mem_read[index]) & 0xffffffff; +} + +uint8_t state_get_mem_read_size_by_byte(spike_state_t *state, uint32_t index) { + return std::get<2>(state->s->log_mem_read[index]); +} + +reg_t state_exit(spike_state_t *state) { + auto &csrmap = state->s->csrmap; + return csrmap[CSR_MSIMEND]->read(); +} + +void spike_register_callback(void *ffi_target_, ffi_callback callback) { + ffi_addr_to_mem = callback; + ffi_target = ffi_target_; + + return; +} + +void spike_destruct(spike_t *spike) { delete spike; } + +void proc_destruct(spike_processor_t *proc) { delete proc; } + +void state_destruct(spike_state_t *state) { delete state; } diff --git a/t1rocketemu/spike_interfaces/spike_interfaces.h b/t1rocketemu/spike_interfaces/spike_interfaces.h new file mode 100644 index 000000000..ceffa8f7f --- /dev/null +++ b/t1rocketemu/spike_interfaces/spike_interfaces.h @@ -0,0 +1,76 @@ +#ifndef __SPIKE_INTERFCES_H__ +#define __SPIKE_INTERFCES_H__ + +#include "cfg.h" +#include "decode_macros.h" +#include "disasm.h" +#include "mmu.h" +#include "processor.h" +#include "simif.h" +#include "spike_interfaces_c.h" + +#ifdef __cplusplus +extern "C" { +#endif + +ffi_callback ffi_addr_to_mem; +extern void *ffi_target; +std::vector reg_write_index_vec; + +class sim_t : public simif_t { +public: + sim_t() {} + ~sim_t() {} + char *addr_to_mem(reg_t addr) override { + return ffi_addr_to_mem(ffi_target, addr); + } + bool mmio_load(reg_t addr, size_t len, uint8_t *bytes) override { + throw std::logic_error("not implemented"); + } + bool mmio_store(reg_t addr, size_t len, const uint8_t *bytes) override { + throw std::logic_error("not implemented"); + } + virtual void proc_reset(unsigned id) override {} + virtual const char *get_symbol(uint64_t addr) override { + throw std::logic_error("not implemented"); + } + [[nodiscard]] const cfg_t &get_cfg() const override { + throw std::logic_error("not implemented"); + } + [[nodiscard]] const std::map & + get_harts() const override { + throw std::logic_error("not implemented"); + } +}; + +class Spike { +public: + Spike(const char *set, const char *lvl, size_t lane_number); + processor_t *get_proc() { return &proc; } + +private: + cfg_t cfg; + sim_t sim; + isa_parser_t isa; + processor_t proc; +}; + +struct spike_t { + Spike *s; + ffi_callback ffi_addr_to_mem; +}; +struct spike_processor_t { + processor_t *p; +}; +struct spike_state_t { + state_t *s; +}; +struct spike_mmu_t { + mmu_t *m; +}; + +#ifdef __cplusplus +} +#endif + +#endif // __SPIKE_INTERFCES_H__ diff --git a/t1rocketemu/spike_interfaces/spike_interfaces_c.h b/t1rocketemu/spike_interfaces/spike_interfaces_c.h new file mode 100644 index 000000000..6c43acaf0 --- /dev/null +++ b/t1rocketemu/spike_interfaces/spike_interfaces_c.h @@ -0,0 +1,65 @@ +#ifndef __SPIKE_INTERFCES_C_H__ +#define __SPIKE_INTERFCES_C_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef char *(*ffi_callback)(void *, uint64_t); + +typedef struct spike_t spike_t; +typedef struct spike_processor_t spike_processor_t; +typedef struct spike_state_t spike_state_t; + +void spike_register_callback(void *ffi_target, ffi_callback callback); +spike_t *spike_new(const char *set, const char *lvl, + size_t lane_number); +const char *proc_disassemble(spike_processor_t *proc); +void proc_reset(spike_processor_t *proc); +spike_processor_t *spike_get_proc(spike_t *spike); +spike_state_t *proc_get_state(spike_processor_t *proc); + +uint64_t proc_func(spike_processor_t *proc); +uint64_t proc_get_insn(spike_processor_t *proc); +uint8_t proc_get_vreg_data(spike_processor_t *proc, uint32_t vreg_idx, + uint32_t vreg_offset); +uint32_t proc_get_rs1(spike_processor_t *proc); +uint32_t proc_get_rs2(spike_processor_t *proc); +uint32_t proc_get_rd(spike_processor_t *proc); + +uint64_t proc_vu_get_vtype(spike_processor_t *proc); +uint32_t proc_vu_get_vxrm(spike_processor_t *proc); +uint32_t proc_vu_get_vnf(spike_processor_t *proc); +bool proc_vu_get_vill(spike_processor_t *proc); +bool proc_vu_get_vxsat(spike_processor_t *proc); +uint32_t proc_vu_get_vl(spike_processor_t *proc); +uint16_t proc_vu_get_vstart(spike_processor_t *proc); + +uint64_t state_get_pc(spike_state_t *state); +uint64_t state_handle_pc(spike_state_t *state, uint64_t new_pc); +void state_set_pc(spike_state_t *state, uint64_t pc); +uint32_t state_get_reg(spike_state_t *state, uint32_t index, bool is_fp); +uint32_t state_get_reg_write_size(spike_state_t *state); +uint32_t state_get_reg_write_index(spike_state_t *state, uint32_t index); +uint32_t state_get_mem_write_size(spike_state_t *state); +uint32_t state_get_mem_write_addr(spike_state_t *state, uint32_t index); +uint64_t state_get_mem_write_value(spike_state_t *state, uint32_t index); +uint8_t state_get_mem_write_size_by_byte(spike_state_t *state, uint32_t index); +uint32_t state_get_mem_read_size(spike_state_t *state); +uint32_t state_get_mem_read_addr(spike_state_t *state, uint32_t index); +uint8_t state_get_mem_read_size_by_byte(spike_state_t *state, uint32_t index); +void state_set_mcycle(spike_state_t *state, size_t mcycle); +void state_clear(spike_state_t *state); + +void spike_destruct(spike_t *spike); +void proc_destruct(spike_processor_t *proc); +void state_destruct(spike_state_t *state); +uint64_t state_exit(spike_state_t *state); + +#ifdef __cplusplus +} +#endif + +#endif // __SPIKE_INTERFCES_C_H__ diff --git a/t1rocketemu/spike_rs/Cargo.toml b/t1rocketemu/spike_rs/Cargo.toml new file mode 100644 index 000000000..411d44f72 --- /dev/null +++ b/t1rocketemu/spike_rs/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "spike_rs" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +tracing = { workspace = true } +libc = "0.2.155" +xmas-elf = "0.9.1" diff --git a/t1rocketemu/spike_rs/build.rs b/t1rocketemu/spike_rs/build.rs new file mode 100644 index 000000000..9399fdaf0 --- /dev/null +++ b/t1rocketemu/spike_rs/build.rs @@ -0,0 +1,18 @@ +use std::env; + +fn main() { + println!("cargo::rustc-link-search=native={}", env::var("SPIKE_LIB_DIR").expect("SPIKE_LIB_DIR should be set")); + println!("cargo::rustc-link-lib=static=riscv"); + println!("cargo::rustc-link-lib=static=softfloat"); + println!("cargo::rustc-link-lib=static=disasm"); + println!("cargo::rustc-link-lib=static=fesvr"); + println!("cargo::rustc-link-lib=static=fdt"); + + println!("cargo::rustc-link-search=native={}", env::var("SPIKE_INTERFACES_LIB_DIR").expect("SPIKE_INTERFACES_LIB_DIR should be set")); + println!("cargo::rustc-link-lib=static=spike_interfaces"); + + println!("cargo::rerun-if-env-changed=SPIKE_LIB_DIR"); + println!("cargo::rerun-if-env-changed=SPIKE_INTERFACES_LIB_DIR"); + + println!("cargo::rustc-link-lib=stdc++"); +} diff --git a/t1rocketemu/spike_rs/src/lib.rs b/t1rocketemu/spike_rs/src/lib.rs new file mode 100644 index 000000000..0d64e6d24 --- /dev/null +++ b/t1rocketemu/spike_rs/src/lib.rs @@ -0,0 +1,287 @@ +pub mod spike_event; +pub mod util; + +use libc::c_char; +use std::ffi::{CStr, CString}; +use tracing::trace; + +pub fn clip(binary: u32, a: i32, b: i32) -> u32 { + assert!(a <= b, "a should be less than or equal to b"); + let nbits = b - a + 1; + let mask = if nbits >= 32 { + u32::MAX + } else { + (1 << nbits) - 1 + }; + (binary >> a) & mask +} + +pub struct Spike { + spike: *mut (), + pub mem: Vec, + pub size: usize, +} + +unsafe impl Send for Spike {} + +extern "C" fn default_addr_to_mem(target: *mut (), addr: u64) -> *mut u8 { + let spike = target as *mut Spike; + let addr = addr as usize; + unsafe { + let spike: &mut Spike = &mut *spike; + let ptr = spike.mem.as_mut_ptr().offset(addr as isize); + ptr + } +} + +type FfiCallback = extern "C" fn(*mut (), u64) -> *mut u8; + +impl Spike { + // we need to have a boxed SpikeCObject, since its pointer will be passed to C to perform FFI call + pub fn new(set: &str, lvl: &str, lane_number: usize, mem_size: usize) -> Box { + let set = CString::new(set).unwrap(); + let lvl = CString::new(lvl).unwrap(); + let spike = unsafe { spike_new(set.as_ptr(), lvl.as_ptr(), lane_number) }; + let mut self_: Box = Box::new(Spike { spike, mem: vec![0; mem_size], size: mem_size }); + + // TODO: support customized ffi + let ffi_target: *mut Spike = &mut *self_; + unsafe { + spike_register_callback(ffi_target as *mut (), default_addr_to_mem); + } + + self_ + } + + pub fn get_proc(&self) -> Processor { + let processor = unsafe { spike_get_proc(self.spike) }; + Processor { processor } + } + + pub fn load_bytes_to_mem( + &mut self, + addr: usize, + len: usize, + bytes: Vec, + ) -> anyhow::Result<()> { + trace!("ld: addr: 0x{:x}, len: 0x{:x}", addr, len); + assert!(addr + len <= self.size); + + let dst = &mut self.mem[addr..addr + len]; + for (i, byte) in bytes.iter().enumerate() { + dst[i] = *byte; + } + + Ok(()) + } + + pub fn mem_byte_on_addr(&self, addr: usize) -> anyhow::Result { + Ok(self.mem[addr]) + } +} + +impl Drop for Spike { + fn drop(&mut self) { + unsafe { spike_destruct(self.spike) } + } +} + +pub struct Processor { + processor: *mut (), +} + +impl Processor { + pub fn disassemble(&self) -> String { + let bytes = unsafe { proc_disassemble(self.processor) }; + let c_str = unsafe { CStr::from_ptr(bytes as *mut c_char) }; + format!("{}", c_str.to_string_lossy()) + } + + pub fn reset(&self) { + unsafe { proc_reset(self.processor) } + } + + pub fn get_state(&self) -> State { + let state = unsafe { proc_get_state(self.processor) }; + State { state } + } + + pub fn func(&self) -> u64 { + unsafe { proc_func(self.processor) } + } + + pub fn get_insn(&self) -> u32 { + unsafe { proc_get_insn(self.processor) as u32 } + } + + pub fn get_vreg_data(&self, idx: u32, offset: u32) -> u8 { + unsafe { proc_get_vreg_data(self.processor, idx, offset) } + } + + pub fn get_rs1(&self) -> u32 { + unsafe { proc_get_rs1(self.processor) } + } + + pub fn get_rs2(&self) -> u32 { + unsafe { proc_get_rs2(self.processor) } + } + + pub fn get_rd(&self) -> u32 { + unsafe { proc_get_rd(self.processor) } + } + + // vu + pub fn vu_get_vtype(&self) -> u32 { + unsafe { proc_vu_get_vtype(self.processor) as u32 } + } + + pub fn vu_get_vxrm(&self) -> u32 { + unsafe { proc_vu_get_vxrm(self.processor) } + } + + pub fn vu_get_vnf(&self) -> u32 { + unsafe { proc_vu_get_vnf(self.processor) } + } + + pub fn vu_get_vill(&self) -> bool { + unsafe { proc_vu_get_vill(self.processor) } + } + + pub fn vu_get_vxsat(&self) -> bool { + unsafe { proc_vu_get_vxsat(self.processor) } + } + + pub fn vu_get_vl(&self) -> u32 { + unsafe { proc_vu_get_vl(self.processor) } + } + + pub fn vu_get_vstart(&self) -> u16 { + unsafe { proc_vu_get_vstart(self.processor) } + } +} + +impl Drop for Processor { + fn drop(&mut self) { + unsafe { proc_destruct(self.processor) } + } +} + +pub struct State { + state: *mut (), +} + +impl State { + pub fn set_pc(&self, pc: u64) { + unsafe { state_set_pc(self.state, pc) } + } + + pub fn get_pc(&self) -> u64 { + unsafe { state_get_pc(self.state) } + } + + pub fn handle_pc(&self, pc: u64) -> anyhow::Result<()> { + match unsafe { state_handle_pc(self.state, pc) } { + 0 => Ok(()), + _ => Err(anyhow::anyhow!("Error handling pc")), + } + } + + pub fn get_reg(&self, idx: u32, is_fp: bool) -> u32 { + unsafe { state_get_reg(self.state, idx, is_fp) } + } + + pub fn get_reg_write_size(&self) -> u32 { + unsafe { state_get_reg_write_size(self.state) } + } + + pub fn get_reg_write_index(&self, index: u32) -> u32 { + unsafe { state_get_reg_write_index(self.state, index) } + } + + pub fn get_mem_write_size(&self) -> u32 { + unsafe { state_get_mem_write_size(self.state) } + } + + pub fn get_mem_write(&self, index: u32) -> (u32, u64, u8) { + let addr = unsafe { state_get_mem_write_addr(self.state, index) }; + let value = unsafe { state_get_mem_write_value(self.state, index) }; + let size_by_byte = unsafe { state_get_mem_write_size_by_byte(self.state, index) }; + (addr, value, size_by_byte) + } + + pub fn get_mem_read_size(&self) -> u32 { + unsafe { state_get_mem_read_size(self.state) } + } + + pub fn get_mem_read(&self, index: u32) -> (u32, u8) { + let addr = unsafe { state_get_mem_read_addr(self.state, index) }; + let size_by_byte = unsafe { state_get_mem_read_size_by_byte(self.state, index) }; + (addr, size_by_byte) + } + + pub fn set_mcycle(&self, mcycle: usize) { + unsafe { state_set_mcycle(self.state, mcycle) } + } + + pub fn clear(&self) { + unsafe { state_clear(self.state) } + } + + pub fn exit(&self) -> u64 { + unsafe { state_exit(self.state) } + } +} + +impl Drop for State { + fn drop(&mut self) { + unsafe { state_destruct(self.state) } + } +} + +#[link(name = "spike_interfaces")] +extern "C" { + pub fn spike_register_callback(target: *mut (), callback: FfiCallback); + fn spike_new( + set: *const c_char, + lvl: *const c_char, + lane_number: usize, + ) -> *mut (); + fn spike_get_proc(spike: *mut ()) -> *mut (); + fn spike_destruct(spike: *mut ()); + fn proc_disassemble(proc: *mut ()) -> *mut c_char; + fn proc_reset(proc: *mut ()); + fn proc_get_state(proc: *mut ()) -> *mut (); + fn proc_func(proc: *mut ()) -> u64; + fn proc_get_insn(proc: *mut ()) -> u64; + fn proc_get_vreg_data(proc: *mut (), vreg_idx: u32, vreg_offset: u32) -> u8; + fn proc_get_rs1(proc: *mut ()) -> u32; + fn proc_get_rs2(proc: *mut ()) -> u32; + fn proc_get_rd(proc: *mut ()) -> u32; + + fn proc_vu_get_vtype(proc: *mut ()) -> u64; + fn proc_vu_get_vxrm(proc: *mut ()) -> u32; + fn proc_vu_get_vnf(proc: *mut ()) -> u32; + fn proc_vu_get_vill(proc: *mut ()) -> bool; + fn proc_vu_get_vxsat(proc: *mut ()) -> bool; + fn proc_vu_get_vl(proc: *mut ()) -> u32; + fn proc_vu_get_vstart(proc: *mut ()) -> u16; + + fn proc_destruct(proc: *mut ()); + fn state_set_pc(state: *mut (), pc: u64); + fn state_get_pc(state: *mut ()) -> u64; + fn state_get_reg(state: *mut (), index: u32, is_fp: bool) -> u32; + fn state_get_reg_write_size(state: *mut ()) -> u32; + fn state_get_reg_write_index(state: *mut (), index: u32) -> u32; + fn state_get_mem_write_size(state: *mut ()) -> u32; + fn state_get_mem_write_addr(state: *mut (), index: u32) -> u32; + fn state_get_mem_write_value(state: *mut (), index: u32) -> u64; + fn state_get_mem_write_size_by_byte(state: *mut (), index: u32) -> u8; + fn state_get_mem_read_size(state: *mut ()) -> u32; + fn state_get_mem_read_addr(state: *mut (), index: u32) -> u32; + fn state_get_mem_read_size_by_byte(state: *mut (), index: u32) -> u8; + fn state_handle_pc(state: *mut (), pc: u64) -> u64; + fn state_set_mcycle(state: *mut (), mcycle: usize); + fn state_clear(state: *mut ()); + fn state_destruct(state: *mut ()); + fn state_exit(state: *mut ()) -> u64; +} diff --git a/t1rocketemu/spike_rs/src/spike_event.rs b/t1rocketemu/spike_rs/src/spike_event.rs new file mode 100644 index 000000000..611f7156b --- /dev/null +++ b/t1rocketemu/spike_rs/src/spike_event.rs @@ -0,0 +1,523 @@ +use std::collections::HashMap; +use tracing::trace; +use Default; + +use crate::clip; +use crate::Spike; + +#[derive(Debug, Clone)] +pub struct SingleMemWrite { + pub val: u8, + pub executed: bool, // set to true when rtl execute this mem access +} + +#[derive(Debug, Clone)] +pub struct SingleMemRead { + pub val: u8, + pub executed: bool, // set to true when rtl execute this mem access +} + +#[derive(Debug, Clone)] +pub struct MemWriteRecord { + pub writes: Vec, + pub num_completed_writes: usize, +} + +#[derive(Debug, Clone)] +pub struct MemReadRecord { + pub reads: Vec, + pub num_completed_reads: usize, +} + +#[derive(Debug, Clone)] +pub struct SingleVrfWrite { + pub byte: u8, + pub executed: bool, // set to true when rtl execute this mem access +} + +#[derive(Default, Debug, Clone)] +pub struct VdWriteRecord { + vd_bytes: Vec, +} + +#[derive(Default, Debug, Clone)] +pub struct MemAccessRecord { + pub all_writes: HashMap, + pub all_reads: HashMap, +} + +#[derive(Default, Debug, Clone)] +pub struct VrfAccessRecord { + pub all_writes: HashMap, + pub unretired_writes: Option, + pub retired_writes: u32, +} + +pub const LSU_IDX_DEFAULT: u8 = 0xff; +pub const ISSUE_IDX_DEFAULT: u8 = 0xff; + +#[derive(Default, Debug, Clone)] +pub struct SpikeEvent { + pub do_log_vrf: bool, + + // index + pub lsu_idx: u8, + pub issue_idx: u8, + + // instruction + pub disasm: String, + pub pc: u64, + pub inst_bits: u32, + + // scalar to vector interface(used for driver) + pub rs1: u32, + pub rs2: u32, + pub rs1_bits: u32, + pub rs2_bits: u32, + pub rd_idx: u32, + + // vtype + pub vtype: u32, + pub vxrm: u32, + pub vnf: u32, + + // other CSR + pub vill: bool, + pub vxsat: bool, + pub vl: u32, + pub vstart: u16, + + // rd + pub rd_bits: u32, + + // mutable states + pub is_rd_written: bool, + pub vd_write_record: VdWriteRecord, + pub mem_access_record: MemAccessRecord, + pub vrf_access_record: VrfAccessRecord, +} + +impl SpikeEvent { + pub fn new(spike: &Spike, do_log_vrf: bool) -> Self { + let proc = spike.get_proc(); + let state = proc.get_state(); + let inst_bits = proc.get_insn(); + + let opcode = clip(inst_bits, 0, 6); + let width = clip(inst_bits, 12, 14); + + let is_rs_fp = opcode == 0b1010111 && width == 0b101/* OPFVF */; + // early return vsetvl scalar instruction + + // rs1, rs2 + let (rs1, rs2) = (proc.get_rs1(), proc.get_rs2()); + + SpikeEvent { + do_log_vrf, + + lsu_idx: LSU_IDX_DEFAULT, + issue_idx: ISSUE_IDX_DEFAULT, + + disasm: spike.get_proc().disassemble(), + pc: proc.get_state().get_pc(), + inst_bits, + + rs1, + rs2, + rs1_bits: state.get_reg(rs1, is_rs_fp), + rs2_bits: state.get_reg(rs2, is_rs_fp), + rd_idx: proc.get_rd(), + + vtype: proc.vu_get_vtype(), + vxrm: proc.vu_get_vxrm(), + vnf: proc.vu_get_vnf(), + + vill: proc.vu_get_vill(), + vxsat: proc.vu_get_vxsat(), + vl: proc.vu_get_vl(), + vstart: proc.vu_get_vstart(), + + rd_bits: Default::default(), + + is_rd_written: false, + vd_write_record: Default::default(), + mem_access_record: Default::default(), + vrf_access_record: Default::default(), + } + } + + pub fn opcode(&self) -> u32 { + clip(self.inst_bits, 0, 6) + } + + pub fn width(&self) -> u32 { + clip(self.inst_bits, 12, 14) + } + + pub fn rs1(&self) -> u32 { + clip(self.inst_bits, 15, 19) + } + + pub fn csr(&self) -> u32 { + clip(self.inst_bits, 20, 31) + } + + pub fn funct6(&self) -> u32 { + clip(self.inst_bits, 26, 31) + } + + pub fn mop(&self) -> u32 { + clip(self.inst_bits, 26, 27) + } + + pub fn lumop(&self) -> u32 { + clip(self.inst_bits, 20, 24) + } + + pub fn vm(&self) -> bool { + clip(self.inst_bits, 25, 25) != 0 + } + + // check whether the instruction is a vector load + pub fn is_vload(&self) -> bool { + self.opcode() == 0b0000111 && self.width().wrapping_sub(1) & 0b100 != 0 + } + + // check whether the instruction is a vector store + pub fn is_vstore(&self) -> bool { + self.opcode() == 0b0100111 && self.width().wrapping_sub(1) & 0b100 != 0 + } + + pub fn is_v(&self) -> bool { + (self.opcode() == 0b1010111 || self.is_vload() || self.is_vstore()) && !self.is_vsetvl() + } + + pub fn is_vsetvl(&self) -> bool { + self.opcode() == 0b1010111 && self.width() == 0b111 + } + + pub fn is_scalar(&self) -> bool { + !self.is_v() + } + + // check whether the instruction is a scalar load + pub fn is_load(&self) -> bool { + self.opcode() == 0b0000011 || self.is_cl() + } + + // check whether the instruction is a scalar store + pub fn is_store(&self) -> bool { + self.opcode() == 0b0100011 || self.is_cw() + } + + pub fn is_whole(&self) -> bool { + self.mop() == 0 && self.lumop() == 8 + } + + pub fn is_widening(&self) -> bool { + self.opcode() == 0b1010111 && (self.funct6() >> 4) == 0b11 + } + + pub fn is_mask_vd(&self) -> bool { + self.opcode() == 0b1010111 && (self.funct6() >> 4) == 0b11 + } + + pub fn is_exit(&self) -> bool { + let is_csr_type = self.opcode() == 0b1110011 && ((self.width() & 0b011) != 0); + let is_csr_write = is_csr_type && (((self.width() & 0b100) | self.rs1()) != 0); + + is_csr_write && self.csr() == 0x7cc + } + + pub fn is_vfence(&self) -> bool { + self.is_exit() // only exit instruction is treated as fence now + } + + pub fn is_rd_fp(&self) -> bool { + (self.opcode() == 0b1010111) + && (self.rs1 == 0) + && (self.funct6() == 0b010000) + && self.vm() + && (self.width() == 0b001) + } + + pub fn c_op(&self) -> u32 { + clip(self.inst_bits, 0, 1) + } + + pub fn c_func3(&self) -> u32 { + clip(self.inst_bits, 13, 15) + } + + pub fn is_cl(&self) -> bool { + ( self.c_op() == 0b00 && self.c_func3() & 0b100 == 0 ) || /* c.lw */ + ( self.c_op() == 0b10 && self.c_func3() & 0b100 == 0 ) /* c.lwsp */ + } + + pub fn is_cw(&self) -> bool { + ( self.c_op() == 0b00 && self.c_func3() & 0b100 != 0 ) || /* c.sw */ + ( self.c_op() == 0b10 && self.c_func3() & 0b100 != 0 ) /* c.swsp */ + } + + pub fn vlmul(&self) -> u32 { + clip(self.vtype, 0, 2) + } + + pub fn vma(&self) -> bool { + clip(self.vtype, 7, 7) != 0 + } + + pub fn vta(&self) -> bool { + clip(self.vtype, 6, 6) != 0 + } + + pub fn vsew(&self) -> u32 { + clip(self.vtype, 3, 5) + } + + pub fn vcsr(&self) -> u32 { + self.vxsat as u32 | self.vxrm << 1 + } + + pub fn describe_insn(&self) -> String { + format!( + "pc={:#x}, disasm='{}', bits={:#x}", + self.pc, self.disasm, self.inst_bits + ) + } + + pub fn get_vrf_write_range(&self, vlen_in_bytes: u32) -> anyhow::Result<(u32, u32)> { + if self.is_vstore() { + return Ok((0, 0)); + } + + if self.is_vload() { + let vd_bytes_start = self.rd_idx * vlen_in_bytes; + if self.is_whole() { + return Ok((vd_bytes_start, vlen_in_bytes * (1 + self.vnf))); + } + let len = if self.vlmul() & 0b100 != 0 { + vlen_in_bytes * (1 + self.vnf) + } else { + (vlen_in_bytes * (1 + self.vnf)) << self.vlmul() + }; + return Ok((vd_bytes_start, len)); + } + + let vd_bytes_start = self.rd_idx * vlen_in_bytes; + + if self.is_mask_vd() { + return Ok((vd_bytes_start, vlen_in_bytes)); + } + + let len = if self.vlmul() & 0b100 != 0 { + vlen_in_bytes >> (8 - self.vlmul()) + } else { + vlen_in_bytes << self.vlmul() + }; + + Ok(( + vd_bytes_start, + if self.is_widening() { len * 2 } else { len }, + )) + } + + pub fn pre_log_arch_changes(&mut self, spike: &Spike, vlen: u32) -> anyhow::Result<()> { + if self.do_log_vrf { + self.rd_bits = spike.get_proc().get_rd(); + + // record the vrf writes before executing the insn + let vlen_in_bytes = vlen; + + let proc = spike.get_proc(); + let (start, len) = self.get_vrf_write_range(vlen_in_bytes).unwrap(); + self.vd_write_record.vd_bytes.resize(len as usize, 0u8); + for i in 0..len { + let offset = start + i; + let vreg_index = offset / vlen_in_bytes; + let vreg_offset = offset % vlen_in_bytes; + let cur_byte = proc.get_vreg_data(vreg_index, vreg_offset); + self.vd_write_record.vd_bytes[i as usize] = cur_byte; + } + } + + Ok(()) + } + + pub fn log_arch_changes(&mut self, spike: &Spike, vlen: u32) -> anyhow::Result<()> { + if self.do_log_vrf { + self.log_vrf_write(spike, vlen).unwrap(); + self.log_reg_write(spike).unwrap(); + } + self.log_mem_write(spike).unwrap(); + self.log_mem_read(spike).unwrap(); + + Ok(()) + } + + fn log_vrf_write(&mut self, spike: &Spike, vlen: u32) -> anyhow::Result<()> { + let proc = spike.get_proc(); + // record vrf writes + // note that we do not need log_reg_write to find records, we just decode the + // insn and compare bytes + let vlen_in_bytes = vlen / 8; + let (start, len) = self.get_vrf_write_range(vlen_in_bytes).unwrap(); + trace!("vrf write range: start: {start}, len: {len}"); + for i in 0..len { + let offset = start + i; + let origin_byte = self.vd_write_record.vd_bytes[i as usize]; + let vreg_index = offset / vlen_in_bytes; + let vreg_offset = offset % vlen_in_bytes; + let cur_byte = proc.get_vreg_data(vreg_index, vreg_offset); + if origin_byte != cur_byte { + self + .vrf_access_record + .all_writes + .entry(offset as usize) + .or_insert(SingleVrfWrite { byte: cur_byte, executed: false }); + trace!( + "SpikeVRFChange: vrf={:?}, change_from={origin_byte}, change_to={cur_byte}, vrf_idx={offset}", + vec![offset / vlen_in_bytes, offset % vlen_in_bytes], + ); + } else { + trace!( + "SpikeVRFChange: vrf={:?}, change_from={origin_byte}, not changed, vrf_idx={offset}", + vec![offset / vlen_in_bytes, offset % vlen_in_bytes], + ); + } + } + Ok(()) + } + + fn log_reg_write(&mut self, spike: &Spike) -> anyhow::Result<()> { + let proc = spike.get_proc(); + let state = proc.get_state(); + // in spike, log_reg_write is arrange: + // xx0000 <- x + // xx0001 <- f + // xx0010 <- vreg + // xx0011 <- vec + // xx0100 <- csr + let reg_write_size = state.get_reg_write_size(); + // TODO: refactor it. + (0..reg_write_size).for_each(|idx| match state.get_reg_write_index(idx) & 0xf { + 0b0000 => { + // scalar rf + let data = state.get_reg(self.rd_idx, false); + self.is_rd_written = true; + if data != self.rd_bits { + trace!( + "ScalarRFChange: idx={}, change_from={}, change_to={data}", + self.rd_idx, + self.rd_bits + ); + self.rd_bits = data; + } + } + 0b0001 => { + let data = state.get_reg(self.rd_idx, true); + self.is_rd_written = true; + if data != self.rd_bits { + trace!( + "FloatRFChange: idx={}, change_from={}, change_to={data}", + self.rd_idx, + self.rd_bits + ); + self.rd_bits = data; + } + } + _ => trace!( + "UnknownRegChange, idx={}, spike detect unknown reg change", + state.get_reg_write_index(idx) + ), + }); + + Ok(()) + } + + pub fn log_mem_write(&mut self, spike: &Spike) -> anyhow::Result<()> { + let proc = spike.get_proc(); + let state = proc.get_state(); + + let mem_write_size = state.get_mem_write_size(); + (0..mem_write_size).for_each(|i| { + let (addr, value, size) = state.get_mem_write(i); + (0..size).for_each(|offset| { + self + .mem_access_record + .all_writes + .entry(addr + offset as u32) + .or_insert(MemWriteRecord { writes: vec![], num_completed_writes: 0 }) + .writes + .push(SingleMemWrite { + val: (value >> (offset * 8)) as u8, + executed: false, + }); + }); + trace!("SpikeMemWrite: addr={addr:x}, value={value:x}, size={size}"); + }); + + Ok(()) + } + + fn log_mem_read(&mut self, spike: &Spike) -> anyhow::Result<()> { + let proc = spike.get_proc(); + let state = proc.get_state(); + + let mem_read_size = state.get_mem_read_size(); + (0..mem_read_size).for_each(|i| { + let (addr, size) = state.get_mem_read(i); + let mut value = 0; + (0..size).for_each(|offset| { + let byte = spike.mem_byte_on_addr(addr as usize + offset as usize).unwrap(); + value |= (byte as u64) << (offset * 8); + // record the read + self + .mem_access_record + .all_reads + .entry(addr + offset as u32) + .or_insert(MemReadRecord { reads: vec![], num_completed_reads: 0 }) + .reads + .push(SingleMemRead { val: byte, executed: false }); + }); + trace!("SpikeMemRead: addr={addr:08x}, value={value:08x}, size={size}"); + }); + + Ok(()) + } + + pub fn check_rd(&self, data: u32) -> anyhow::Result<()> { + // TODO: rtl should indicate whether resp_bits_data is valid + if self.is_rd_written { + assert_eq!( + data, self.rd_bits, + "expect to write rd[{}] = {}, actual {}", + self.rd_idx, self.rd_bits, data + ); + } + + Ok(()) + } + + pub fn check_is_ready_for_commit(&self, cycle: u64) -> anyhow::Result<()> { + for (addr, record) in &self.mem_access_record.all_writes { + assert_eq!( + record.num_completed_writes, + record.writes.len(), + "[{cycle}] expect to write mem {addr:#x}, not executed when commit, issue_idx={} ({})", + self.issue_idx, + self.describe_insn(), + ); + } + for (idx, record) in &self.vrf_access_record.all_writes { + assert!( + record.executed, + "[{cycle}] expect to write vrf {idx}, not executed when commit, issue_idx={} ({})", + self.issue_idx, + self.describe_insn() + ); + } + + Ok(()) + } +} diff --git a/t1rocketemu/spike_rs/src/util.rs b/t1rocketemu/spike_rs/src/util.rs new file mode 100644 index 000000000..6ded0eec5 --- /dev/null +++ b/t1rocketemu/spike_rs/src/util.rs @@ -0,0 +1,65 @@ +use crate::Spike; +use std::fs::File; +use std::io::Read; +use std::path::Path; +use xmas_elf::program::{ProgramHeader, Type}; +use xmas_elf::{header, ElfFile}; + +pub fn load_elf(spike: &mut Spike, fname: &Path) -> anyhow::Result { + let mut file = File::open(fname).unwrap(); + let mut buffer = Vec::new(); + file.read_to_end(&mut buffer).unwrap(); + + let elf_file = ElfFile::new(&buffer).unwrap(); + + let header = elf_file.header; + assert_eq!(header.pt2.machine().as_machine(), header::Machine::RISC_V); + assert_eq!(header.pt1.class(), header::Class::ThirtyTwo); + + for ph in elf_file.program_iter() { + if let ProgramHeader::Ph32(ph) = ph { + if ph.get_type() == Ok(Type::Load) { + let offset = ph.offset as usize; + let size = ph.file_size as usize; + let addr = ph.virtual_addr as usize; + + let slice = &buffer[offset..offset + size]; + spike.load_bytes_to_mem(addr, size, slice.to_vec()).unwrap(); + } + } + } + + Ok(header.pt2.entry_point()) +} + +// todo: unify load_elf and load_elf_to_buffer +pub fn load_elf_to_buffer(mem: &mut [u8], fname: &Path) -> anyhow::Result { + let mut file = File::open(fname).unwrap(); + let mut buffer = Vec::new(); + file.read_to_end(&mut buffer).unwrap(); + + let elf_file = ElfFile::new(&buffer).unwrap(); + + let header = elf_file.header; + assert_eq!(header.pt2.machine().as_machine(), header::Machine::RISC_V); + assert_eq!(header.pt1.class(), header::Class::ThirtyTwo); + + for ph in elf_file.program_iter() { + if let ProgramHeader::Ph32(ph) = ph { + if ph.get_type() == Ok(Type::Load) { + let offset = ph.offset as usize; + let size = ph.file_size as usize; + let addr = ph.virtual_addr as usize; + + let slice = &buffer[offset..offset + size]; + + let dst: &mut _ = &mut mem[addr..addr + size]; + for (i, byte) in slice.iter().enumerate() { + dst[i] = *byte; + } + } + } + } + + Ok(header.pt2.entry_point()) +} diff --git a/t1rocketemu/src/AXI4SlaveAgent.scala b/t1rocketemu/src/AXI4SlaveAgent.scala new file mode 100644 index 000000000..74da15a64 --- /dev/null +++ b/t1rocketemu/src/AXI4SlaveAgent.scala @@ -0,0 +1,203 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022-2024 Jiuyang Liu + +package org.chipsalliance.t1.t1rocketemu.dpi + +// TODO: upstream to AMBA as VIP +import chisel3._ +import chisel3.util.circt.dpi.{RawClockedVoidFunctionCall, RawUnclockedNonVoidFunctionCall} +import chisel3.util.{isPow2, log2Ceil} +import org.chipsalliance.amba.axi4.bundle.{ARChannel, ARFlowControl, AWChannel, AWFlowControl, AXI4BundleParameter, AXI4ROIrrevocableVerilog, AXI4RWIrrevocableVerilog, AXI4WOIrrevocableVerilog, BChannel, BFlowControl, RChannel, RFlowControl, WChannel, WFlowControl} + +case class AXI4SlaveAgentParameter(name: String, axiParameter: AXI4BundleParameter, outstanding: Int, readPayloadSize: Int, writePayloadSize: Int) + +class AXI4SlaveAgentInterface(parameter: AXI4SlaveAgentParameter) extends Bundle { + val clock: Clock = Input(Clock()) + val reset: Reset = Input(Reset()) + val channelId: UInt = Input(Const(UInt(64.W))) + // don't issue read DPI + val gateRead: Bool = Input(Bool()) + // don't issue write DPI + val gateWrite: Bool = Input(Bool()) + val channel = Flipped( + org.chipsalliance.amba.axi4.bundle.verilog.irrevocable(parameter.axiParameter) + ) +} + +class WritePayload(length: Int, dataWidth: Int) extends Bundle { + val data = Vec(length, UInt(dataWidth.W)) + // For dataWidth <= 8, align strb to u8 for a simple C-API + val strb = Vec(length, UInt(math.max(8, dataWidth / 8).W)) +} + +class ReadPayload(length: Int,dataWidth: Int) extends Bundle { + val data = Vec(length, UInt(dataWidth.W)) +} + +// consume transaction from DPI, drive RTL signal +class AXI4SlaveAgent(parameter: AXI4SlaveAgentParameter) + extends FixedIORawModule[AXI4SlaveAgentInterface](new AXI4SlaveAgentInterface(parameter)) { + dontTouch(io) + io.channel match { + case channel: AXI4RWIrrevocableVerilog => + new WriteManager(channel) + new ReadManager(channel) + case channel: AXI4ROIrrevocableVerilog => + new ReadManager(channel) + case channel: AXI4WOIrrevocableVerilog => + new WriteManager(channel) + } + + private class WriteManager( + channel: AWChannel with AWFlowControl with WChannel with WFlowControl with BChannel with BFlowControl) { + withClockAndReset(io.clock, io.reset) { + /** There is an aw in the register. */ + val awIssued = RegInit(false.B) + /** There is a w in the register. */ + val last = RegInit(false.B) + + /** memory to store the write payload + * @todo limit the payload size based on the RTL configuration. + */ + val writePayload = RegInit(0.U.asTypeOf(new WritePayload(parameter.writePayloadSize, parameter.axiParameter.dataWidth))) + /** AWID, latch at AW fire, used at B fire. */ + val awid = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWID))) + val awaddr = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWADDR))) + val awlen = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWLEN))) + val awsize = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWSIZE))) + val awburst = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWBURST))) + val awlock = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWLOCK))) + val awcache = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWCACHE))) + val awprot = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWPROT))) + val awqos = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWQOS))) + val awregion = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWREGION))) + val awuser = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWUSER))) + + /** index the payload, used to write [[writePayload]] */ + val writeIdx = RegInit(0.U.asTypeOf(UInt(8.W))) + val bFire = channel.BREADY && channel.BVALID + val awFire = channel.AWREADY && channel.AWVALID + val wLastFire = channel.WVALID && channel.WREADY && channel.WLAST + val awExist = channel.AWVALID || awIssued + val wExist = channel.WVALID && channel.WLAST || last + + // AW + channel.AWREADY := !awIssued || (wExist && channel.BREADY) + when(channel.AWREADY && channel.AWVALID) { + awid := channel.AWID + awaddr := channel.AWADDR + awlen := channel.AWLEN + awsize := channel.AWSIZE + awburst := channel.AWBURST + awlock := channel.AWLOCK + awcache := channel.AWCACHE + awprot := channel.AWPROT + awqos := channel.AWQOS + awregion := channel.AWREGION + awuser := channel.AWUSER + } + when(awFire ^ bFire) { + awIssued := awFire + } + + // W + val writePayloadUpdate = WireDefault(writePayload) + channel.WREADY := !last || (awExist && channel.BREADY) + when(channel.WVALID && channel.WREADY) { + writePayload.data(writeIdx) := channel.WDATA + writePayloadUpdate.data(writeIdx) := channel.WDATA + writePayload.strb(writeIdx) := channel.WSTRB.pad(writePayload.strb.getWidth) + writePayloadUpdate.strb(writeIdx) := channel.WSTRB.pad(writePayload.strb.getWidth) + writeIdx := writeIdx + 1.U + when(channel.WLAST) { + writeIdx := 0.U + } + } + when(wLastFire ^ bFire) { + last := wLastFire + } + + // B + channel.BVALID := awExist && wExist + channel.BID := Mux(awIssued, awid, channel.AWID) + channel.BRESP := 0.U(2.W) // OK + channel.BUSER := Mux(awIssued, awuser, channel.AWUSER) + when(channel.BVALID && channel.BREADY) { + RawClockedVoidFunctionCall(s"axi_write_${parameter.name}")( + io.clock, + when.cond && !io.gateWrite, + io.channelId, + // handle AW and W at same beat. + Mux(awIssued, awid.asTypeOf(UInt(64.W)), channel.AWID), + Mux(awIssued, awaddr.asTypeOf(UInt(64.W)), channel.AWADDR), + Mux(awIssued, awlen.asTypeOf(UInt(64.W)), channel.AWLEN), + Mux(awIssued, awsize.asTypeOf(UInt(64.W)), channel.AWSIZE), + Mux(awIssued, awburst.asTypeOf(UInt(64.W)), channel.AWBURST), + Mux(awIssued, awlock.asTypeOf(UInt(64.W)), channel.AWLOCK), + Mux(awIssued, awcache.asTypeOf(UInt(64.W)), channel.AWCACHE), + Mux(awIssued, awprot.asTypeOf(UInt(64.W)), channel.AWPROT), + Mux(awIssued, awqos.asTypeOf(UInt(64.W)), channel.AWQOS), + Mux(awIssued, awregion.asTypeOf(UInt(64.W)), channel.AWREGION), + writePayloadUpdate + ) + } + } + } + + private class ReadManager(channel: ARChannel with ARFlowControl with RChannel with RFlowControl) { + withClockAndReset(io.clock, io.reset) { + class CAMValue extends Bundle { + val arid = UInt(16.W) + val arlen = UInt(8.W) + val readPayload = new ReadPayload(parameter.readPayloadSize, parameter.axiParameter.dataWidth) + val readPayloadIndex = UInt(8.W) + val valid = Bool() + } + /** CAM to maintain order of read requests. This is maintained as FIFO. */ + val cam: Vec[CAMValue] = RegInit(0.U.asTypeOf(Vec(parameter.outstanding, new CAMValue))) + require(isPow2(parameter.outstanding), "Need to handle pointers") + val arPtr = RegInit(0.U.asTypeOf(UInt(log2Ceil(parameter.outstanding).W))) + val rPtr = RegInit(0.U.asTypeOf(UInt(log2Ceil(parameter.outstanding).W))) + + // AR + channel.ARREADY := !cam(arPtr).valid + when(channel.ARREADY && channel.ARVALID) { + cam(arPtr).arid := channel.ARID + cam(arPtr).arlen := channel.ARLEN + cam(arPtr).readPayload := RawUnclockedNonVoidFunctionCall(s"axi_read_${parameter.name}", new ReadPayload(parameter.readPayloadSize, parameter.axiParameter.dataWidth))( + when.cond && !io.gateRead, + io.channelId, + channel.ARID.asTypeOf(UInt(64.W)), + channel.ARADDR.asTypeOf(UInt(64.W)), + channel.ARLEN.asTypeOf(UInt(64.W)), + channel.ARSIZE.asTypeOf(UInt(64.W)), + channel.ARBURST.asTypeOf(UInt(64.W)), + channel.ARLOCK.asTypeOf(UInt(64.W)), + channel.ARCACHE.asTypeOf(UInt(64.W)), + channel.ARPROT.asTypeOf(UInt(64.W)), + channel.ARQOS.asTypeOf(UInt(64.W)), + channel.ARREGION.asTypeOf(UInt(64.W)) + ) + cam(arPtr).readPayloadIndex := 0.U + cam(arPtr).valid := true.B + arPtr := arPtr + 1.U + } + + // R + channel.RVALID := cam(rPtr).valid + channel.RID := cam(rPtr).arid + channel.RDATA := cam(rPtr).readPayload.data(cam(rPtr).readPayloadIndex) + channel.RRESP := 0.U // OK + channel.RLAST := (cam(rPtr).arlen === cam(rPtr).readPayloadIndex) && cam(rPtr).valid + channel.RUSER := DontCare + when(channel.RREADY && channel.RVALID) { + // increase index + cam(rPtr).readPayloadIndex := cam(rPtr).readPayloadIndex + 1.U + when(channel.RLAST) { + cam(rPtr).valid := false.B + rPtr := rPtr + 1.U + } + } + } + } +} diff --git a/t1rocketemu/src/TestBench.scala b/t1rocketemu/src/TestBench.scala new file mode 100644 index 000000000..aa72472c5 --- /dev/null +++ b/t1rocketemu/src/TestBench.scala @@ -0,0 +1,267 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 Jiuyang Liu + +package org.chipsalliance.t1.t1rocketemu + +import chisel3._ +import chisel3.experimental.{BaseModule, ExtModule, SerializableModuleGenerator} +import chisel3.experimental.dataview.DataViewable +import chisel3.util.circt.dpi.RawUnclockedNonVoidFunctionCall +import chisel3.util.{HasExtModuleInline, PopCount, UIntToOH, Valid} +import org.chipsalliance.amba.axi4.bundle._ +import org.chipsalliance.t1.t1rocketemu.dpi._ +import org.chipsalliance.t1.tile.{T1RocketTile, T1RocketTileParameter} + +class TestBench(generator: SerializableModuleGenerator[T1RocketTile, T1RocketTileParameter]) + extends RawModule + with ImplicitClock + with ImplicitReset { + val clockGen = Module(new ExtModule with HasExtModuleInline { + override def desiredName = "ClockGen" + setInline( + s"$desiredName.sv", + s"""module $desiredName(output reg clock, output reg reset); + | export "DPI-C" function dump_wave; + | function dump_wave(input string file); + |`ifdef VCS + | $$fsdbDumpfile(file); + | $$fsdbDumpvars("+all"); + | $$fsdbDumpon; + |`endif + |`ifdef VERILATOR + | $$dumpfile(file); + | $$dumpvars(0); + |`endif + | endfunction; + | + | export "DPI-C" function quit; + | function quit(); + | $$finish; + | endfunction; + | + | import "DPI-C" context function void t1rocket_cosim_init(); + | initial begin + | t1rocket_cosim_init(); + | clock = 1'b0; + | reset = 1'b1; + | end + | initial #(11) reset = 1'b0; + | always #10 clock = ~clock; + |endmodule + |""".stripMargin + ) + val clock = IO(Output(Bool())) + val reset = IO(Output(Bool())) + }) + def clock = clockGen.clock.asClock + def reset = clockGen.reset + override def implicitClock = clockGen.clock.asClock + override def implicitReset = clockGen.reset + val dut: T1RocketTile with BaseModule = Module(generator.module()) + dut.io.clock := clock + dut.io.reset := reset + + // control simulation + val simulationTime: UInt = RegInit(0.U(64.W)) + simulationTime := simulationTime + 1.U + + // this initial way cannot happen before reset + val initFlag: Bool = RegInit(false.B) + when(!initFlag) { + initFlag := true.B + printf(cf"""{"event":"SimulationStart","cycle":${simulationTime}}\n""") + } + val watchdog: UInt = RawUnclockedNonVoidFunctionCall("cosim_watchdog", UInt(8.W))(simulationTime(9, 0) === 0.U) + when(watchdog =/= 0.U) { + stop(cf"""{"event":"SimulationStop","reason": ${watchdog},"cycle":${simulationTime}}\n""") + } + + // get resetVector from simulator + dut.io.resetVector := RawUnclockedNonVoidFunctionCall("get_resetvector", Const(UInt(64.W)))(simulationTime === 0.U) + + dut.io.hartid := 0.U + dut.io.debug := 0.U + dut.io.mtip := 0.U + dut.io.msip := 0.U + dut.io.meip := 0.U + dut.io.buserror := 0.U + + // memory driver + Seq( + dut.io.highBandwidthAXI, // index 0 + dut.io.highOutstandingAXI // index 1 + ).map(_.viewAs[AXI4RWIrrevocableVerilog]) + .lazyZip( + Seq("highBandwidthAXI", "highOutstandingAXI") + ) + .zipWithIndex + .foreach { + case ((bundle: AXI4RWIrrevocableVerilog, channelName: String), index: Int) => + val agent = Module( + new AXI4SlaveAgent( + AXI4SlaveAgentParameter( + name = channelName, + axiParameter = bundle.parameter, + outstanding = 4, + readPayloadSize = 1, + writePayloadSize = 1 + ) + ) + ).suggestName(s"axi4_channel${index}_${channelName}") + agent.io.channel match { + case io: AXI4RWIrrevocableVerilog => io <> bundle + } + agent.io.clock := clock + agent.io.reset := reset + agent.io.channelId := index.U + agent.io.gateRead := false.B + agent.io.gateWrite := false.B + } + + val instFetchAXI = dut.io.instructionFetchAXI.viewAs[AXI4ROIrrevocableVerilog] + val instFetchAgent = Module( + new AXI4SlaveAgent( + AXI4SlaveAgentParameter( + name = "instructionFetchAXI", + axiParameter = instFetchAXI.parameter, + outstanding = 4, + readPayloadSize = 1, + writePayloadSize = 1 + ) + ).suggestName("axi4_channel2_instructionFetchAXI") + ) + instFetchAgent.io.channel match { + case io: AXI4ROIrrevocableVerilog => io <> instFetchAXI + } + instFetchAgent.io.clock := clock + instFetchAgent.io.reset := reset + instFetchAgent.io.channelId := 0.U + instFetchAgent.io.gateRead := false.B + instFetchAgent.io.gateWrite := false.B + + val loadStoreAXI = dut.io.loadStoreAXI.viewAs[AXI4RWIrrevocableVerilog] + val loadStoreAgent = Module( + new AXI4SlaveAgent( + AXI4SlaveAgentParameter( + name = "loadStoreAXI", + axiParameter = loadStoreAXI.parameter, + outstanding = 4, + // TODO: add payloadSize config to parameter + readPayloadSize = 8, // todo: align with parameter in the future + writePayloadSize = 8 + ) + ).suggestName("axi4_channel3_loadStoreAXI") + ) + loadStoreAgent.io.channel match { + case io: AXI4RWIrrevocableVerilog => io <> loadStoreAXI + } + loadStoreAgent.io.clock := clock + loadStoreAgent.io.reset := reset + loadStoreAgent.io.channelId := 3.U + loadStoreAgent.io.gateRead := false.B + loadStoreAgent.io.gateWrite := false.B + + // probes + val t1RocketProbe = probe.read(dut.io.t1RocketProbe) + val rocketProbe = t1RocketProbe.rocketProbe.suggestName(s"rocketProbe") + val t1Probe = t1RocketProbe.t1Probe.suggestName(s"t1Probe") + val lsuProbe = t1Probe.lsuProbe.suggestName(s"t1LSUProbe") + val laneProbes = t1Probe.laneProbes.zipWithIndex.map { + case (p, idx) => + val wire = WireDefault(p).suggestName(s"lane${idx}Probe") + wire + } + val laneVrfProbes = t1Probe.laneProbes.map(_.vrfProbe).zipWithIndex.map { + case (p, idx) => + val wire = WireDefault(p).suggestName(s"lane${idx}VrfProbe") + wire + } + val storeUnitProbe = t1Probe.lsuProbe.storeUnitProbe.suggestName("storeUnitProbe") + val otherUnitProbe = t1Probe.lsuProbe.otherUnitProbe.suggestName("otherUnitProbe") + + // output the probes + // rocket reg write + when(rocketProbe.rfWen)( + printf( + cf"""{"event":"RegWrite","idx":${rocketProbe.rfWaddr},"data":"${rocketProbe.rfWdata}%x","cycle":${simulationTime}}\n""" + ) + ) + + // t1 vrf write + laneVrfProbes.zipWithIndex.foreach { + case (lane, i) => + when(lane.valid)( + printf( + cf"""{"event":"VrfWrite","issue_idx":${lane.requestInstruction},"vd":${lane.requestVd},"offset":${lane.requestOffset},"mask":"${lane.requestMask}%x","data":"${lane.requestData}%x","lane":$i,"cycle":${simulationTime}}\n""" + ) + ) + } + + // t1 memory write from store unit + when(storeUnitProbe.valid)( + printf( + cf"""{"event":"MemoryWrite","lsu_idx":${storeUnitProbe.index},"mask":"${storeUnitProbe.mask}%x","data":"${storeUnitProbe.data}%x","address":"${storeUnitProbe.address}%x","cycle":${simulationTime}}\n""" + ) + ) + + // t1 memory write from other unit + when(otherUnitProbe.valid)( + printf( + cf"""{"event":"MemoryWrite","lsu_idx":${otherUnitProbe.index},"mask":"${otherUnitProbe.mask}%x","data":"${otherUnitProbe.data}%x","address":"${otherUnitProbe.address}%x","cycle":${simulationTime}}\n""" + ) + ) + + // t1 issue + when(t1Probe.issue.valid)( + printf(cf"""{"event":"Issue","idx":${t1Probe.issue.bits},"cycle":${simulationTime}}\n""") + ) + + // t1 retire + when(t1Probe.retire.valid)( + printf( + cf"""{"event":"CheckRd","data":"${t1Probe.retire.bits}%x","issue_idx":${t1Probe.responseCounter},"cycle":${simulationTime}}\n""" + ) + ) + + // t1 lsu enq + when(t1Probe.lsuProbe.reqEnq.orR)(printf(cf"""{"event":"LsuEnq","enq":${t1Probe.lsuProbe.reqEnq},"cycle":${simulationTime}}\n""")) + + // t1 vrf scoreboard + val vrfWriteScoreboard: Seq[Valid[UInt]] = Seq.tabulate(2 * generator.parameter.t1Parameter.chainingSize) { _ => + RegInit(0.U.asTypeOf(Valid(UInt(16.W)))) + } + vrfWriteScoreboard.foreach(scoreboard => dontTouch(scoreboard)) + val instructionValid = + (laneProbes.map(laneProbe => laneProbe.instructionValid ## laneProbe.instructionValid) :+ + lsuProbe.lsuInstructionValid :+ t1Probe.instructionValid).reduce(_ | _) + val scoreboardEnq = + Mux(t1Probe.instructionIssue, UIntToOH(t1Probe.issueTag), 0.U((2 * generator.parameter.t1Parameter.chainingSize).W)) + vrfWriteScoreboard.zipWithIndex.foreach { + case (scoreboard, tag) => + val writeEnq: UInt = VecInit( + // vrf write from lane + laneProbes.flatMap(laneProbe => + laneProbe.slots.map(slot => slot.writeTag === tag.U && slot.writeQueueEnq && slot.writeMask.orR) + ) ++ laneProbes.flatMap(laneProbe => + laneProbe.crossWriteProbe.map(cp => cp.bits.writeTag === tag.U && cp.valid && cp.bits.writeMask.orR) + ) ++ + // vrf write from lsu + lsuProbe.slots.map(slot => slot.dataInstruction === tag.U && slot.writeValid && slot.dataMask.orR) ++ + // vrf write from Sequencer + Some(t1Probe.writeQueueEnq.bits === tag.U && t1Probe.writeQueueEnq.valid && t1Probe.writeQueueEnqMask.orR) + ).asUInt + // always equal to array index + scoreboard.bits := scoreboard.bits + PopCount(writeEnq) + when(scoreboard.valid && !instructionValid(tag)) { + printf( + cf"""{"event":"VrfScoreboard","count":${scoreboard.bits},"issue_idx":${tag},"cycle":${simulationTime}}\n""" + ) + scoreboard.valid := false.B + } + when(scoreboardEnq(tag)) { + scoreboard.valid := true.B + assert(!scoreboard.valid) + scoreboard.bits := 0.U + } + } +} diff --git a/t1rocketemu/test_common/Cargo.toml b/t1rocketemu/test_common/Cargo.toml new file mode 100644 index 000000000..d5b3f32aa --- /dev/null +++ b/t1rocketemu/test_common/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "common" +version.workspace = true +edition = "2021" + +[dependencies] +spike_rs = { path = "../spike_rs" } +clap = { workspace = true } +tracing = { workspace = true } +tracing-subscriber = { workspace = true } +anyhow = { workspace = true } diff --git a/t1rocketemu/test_common/src/lib.rs b/t1rocketemu/test_common/src/lib.rs new file mode 100644 index 000000000..18f2a4d42 --- /dev/null +++ b/t1rocketemu/test_common/src/lib.rs @@ -0,0 +1,63 @@ +use anyhow::Result; +use clap::Parser; +use spike_rs::Spike; +use std::path::PathBuf; +use tracing::Level; +use tracing_subscriber::{EnvFilter, FmtSubscriber}; + +pub mod rtl_config; +pub mod spike_runner; + +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +pub struct CommonArgs { + /// Path to the ELF file + #[arg(long)] + pub elf_file: PathBuf, + + /// Path to the log file + #[arg(long)] + pub log_file: Option, + + /// Log level: trace, debug, info, warn, error + #[arg(long, default_value = "info")] + pub log_level: String, + + /// vlen config + #[arg(long)] + pub vlen: u32, + + /// dlen config + #[arg(long)] + pub dlen: u32, + + /// ISA config + #[arg(long)] + pub set: String, +} + +pub static MEM_SIZE: usize = 1usize << 32; + +impl CommonArgs { + pub fn to_spike_c_handler(&self) -> Box { + let lvl = "M"; + + Spike::new(&self.set, lvl, (self.dlen / 32) as usize, MEM_SIZE) + } + + pub fn setup_logger(&self) -> Result<()> { + // setup log + let log_level: Level = self.log_level.parse()?; + let global_logger = FmtSubscriber::builder() + .with_env_filter(EnvFilter::from_default_env()) + .with_max_level(log_level) + .without_time() + .with_target(false) + .with_ansi(true) + .compact() + .finish(); + tracing::subscriber::set_global_default(global_logger) + .expect("internal error: fail to setup log subscriber"); + Ok(()) + } +} diff --git a/t1rocketemu/test_common/src/rtl_config.rs b/t1rocketemu/test_common/src/rtl_config.rs new file mode 100644 index 000000000..0daf72624 --- /dev/null +++ b/t1rocketemu/test_common/src/rtl_config.rs @@ -0,0 +1,20 @@ +pub struct RTLConfig { + pub vlen: u32, + pub dlen: u32, +} + +// TODO: read from json + +impl RTLConfig { + pub fn xlen(&self) -> u32 { + 32 // TODO: configurable + } + + pub fn vlen_in_bytes(&self) -> u32 { + self.vlen / 8 + } + + pub fn lane_num(&self) -> u32 { + self.dlen / self.xlen() + } +} diff --git a/t1rocketemu/test_common/src/spike_runner.rs b/t1rocketemu/test_common/src/spike_runner.rs new file mode 100644 index 000000000..3d8712708 --- /dev/null +++ b/t1rocketemu/test_common/src/spike_runner.rs @@ -0,0 +1,150 @@ +use std::collections::VecDeque; +use std::path::Path; +use tracing::debug; + +use spike_rs::spike_event::SpikeEvent; +use spike_rs::util::load_elf; +use spike_rs::Spike; + +use crate::CommonArgs; + +pub struct SpikeRunner { + spike: Box, + + /// commit queue + /// in the spike thread, spike should detech if this queue is full, if not + /// full, execute until a vector instruction, record the behavior of this + /// instruction, and send to commit queue. + /// Note: + /// - The event issued earliest is at the back of the queue + /// - The queue may contain at most one unissued event. If so, the unissued event must be at the + /// front of the queue, and it must be a fence + pub commit_queue: VecDeque, + + /// config for v extension + pub vlen: u32, + pub dlen: u32, + + /// implement the get_t() for mcycle csr update + pub cycle: u64, + + /// for mcycle csr update + pub spike_cycle: u64, + + pub do_log_vrf: bool, +} + +impl SpikeRunner { + pub fn new(args: &CommonArgs, do_log_vrf: bool) -> Self { + // load the elf file + // initialize spike + let mut spike = args.to_spike_c_handler(); + + let entry_addr = load_elf(&mut spike, Path::new(&args.elf_file)).unwrap(); + + // initialize processor + let proc = spike.get_proc(); + let state = proc.get_state(); + proc.reset(); + state.set_pc(entry_addr); + + SpikeRunner { + spike, + commit_queue: VecDeque::new(), + vlen: args.vlen, + dlen: args.dlen, + cycle: 0, + spike_cycle: 0, + do_log_vrf, + } + } + + pub fn load_elf(&mut self, fname: &Path) -> anyhow::Result { + load_elf(&mut *self.spike, fname) + } + + // just execute one instruction for non-difftest + pub fn exec(&self) -> anyhow::Result<()> { + let spike = &self.spike; + let proc = spike.get_proc(); + let state = proc.get_state(); + + let new_pc = proc.func(); + + state.handle_pc(new_pc).unwrap(); + + let ret = state.exit(); + + if ret == 0 { + return Err(anyhow::anyhow!("simulation finished!")); + } + + Ok(()) + } + + // execute the spike processor for one instruction and record + // the spike event for difftest + pub fn spike_step(&mut self) -> SpikeEvent { + let spike = &self.spike; + let proc = self.spike.get_proc(); + let state = proc.get_state(); + + let mcycle = (self.cycle + self.spike_cycle) as usize; + state.set_mcycle(0); + + let mut event = SpikeEvent::new(spike, self.do_log_vrf); + state.clear(); + + let new_pc = if event.is_v() || event.is_exit() { + // inst is v / quit + debug!( + "SpikeStep: spike run vector insn ({}), mcycle={mcycle}", + event.describe_insn(), + ); + event.pre_log_arch_changes(spike, self.vlen).unwrap(); + let new_pc_ = proc.func(); + event.log_arch_changes(spike, self.vlen).unwrap(); + new_pc_ + } else { + // inst is scalar + debug!( + "SpikeStep: spike run scalar insn ({}), mcycle={mcycle}", + event.describe_insn(), + ); + let new_pc_ = proc.func(); + event.log_mem_write(spike).unwrap(); + new_pc_ + }; + + state.handle_pc(new_pc).unwrap(); + + self.spike_cycle += 1; + + event + } + + pub fn find_reg_write(&mut self) -> SpikeEvent { + loop { + let se = self.spike_step(); + if se.is_scalar() && se.is_rd_written { + return se; + } + } + } + + pub fn find_v_se_to_issue(&mut self) -> SpikeEvent { + if !self.commit_queue.is_empty() && self.commit_queue.front().unwrap().is_vfence() { + // if the front (latest) se is a vfence, return the vfence + self.commit_queue.front().unwrap().clone() + } else { + // else, loop until find a se, and push the se to the front + loop { + let se = self.spike_step(); + if se.is_v() { + self.commit_queue.push_front(se.clone()); + break se.clone(); + } + } + } + } +} diff --git a/t1rocketemu/vcs.nix b/t1rocketemu/vcs.nix new file mode 100644 index 000000000..e69de29bb diff --git a/t1rocketemu/verilator.nix b/t1rocketemu/verilator.nix new file mode 100644 index 000000000..9fb575a35 --- /dev/null +++ b/t1rocketemu/verilator.nix @@ -0,0 +1,90 @@ +{ lib +, enableDebugging +, libspike +, libspike_interfaces +, callPackage +, elaborateConfig + +, rustPlatform + +, rust-analyzer +, rust-bindgen + +, verilator +, verilated +, cmake +, clang-tools +}: + +let + self = rustPlatform.buildRustPackage { + name = "verilator-emu" + (lib.optionalString verilated.enable-trace "-trace"); + + src = with lib.fileset; toSource { + root = ./.; + fileset = unions [ + ./spike_rs + ./offline + ./online_dpi + ./online_drive + ./online_vcs + ./test_common + ./Cargo.lock + ./Cargo.toml + ]; + }; + + buildInputs = [ + libspike_interfaces + verilated + ]; + + nativeBuildInputs = [ + verilator + cmake + ]; + + buildFeatures = lib.optionals verilated.enable-trace [ "trace" ]; + + env = { + VERILATED_INC_DIR = "${verilated}/include"; + VERILATED_LIB_DIR = "${verilated}/lib"; + SPIKE_LIB_DIR = "${libspike}/lib"; + SPIKE_INTERFACES_LIB_DIR = "${libspike_interfaces}/lib"; + SPIKE_ISA_STRING = + "rv32gc" + + (builtins.concatStringsSep "_" elaborateConfig.parameter.extensions) + + "_Zvl${toString elaborateConfig.parameter.vLen}b"; + DESIGN_VLEN = elaborateConfig.parameter.vLen; + DESIGN_DLEN = elaborateConfig.parameter.dLen; + }; + + cargoLock = { + lockFile = ./Cargo.lock; + }; + + dontUseCmakeConfigure = true; + + passthru = { + devShell = self.overrideAttrs (old: { + nativeBuildInputs = old.nativeBuildInputs ++ [ + rust-analyzer + rust-bindgen + clang-tools + ]; + }); + inherit libspike_interfaces; + + # enable debug info for difftest itself and libspike + withDebug = self.overrideAttrs (old: { + cargoBuildType = "debug"; + doCheck = false; + env = old.env // { + SPIKE_LIB_DIR = "${enableDebugging libspike}/lib"; + }; + dontStrip = true; + }); + }; + }; +in +self diff --git a/tests/asm/default.nix b/tests/asm/default.nix index debd78f5c..1fb5c6c19 100644 --- a/tests/asm/default.nix +++ b/tests/asm/default.nix @@ -14,7 +14,7 @@ let src = sourcePath; - featuresRequired = getTestRequiredFeatures sourcePath; + passthru.featuresRequired = getTestRequiredFeatures sourcePath; isFp = lib.pathExists (lib.path.append sourcePath "isFp"); buildPhase = '' diff --git a/tests/asm/fpsmoke/features-required.json b/tests/asm/fpsmoke/features-required.json index 892f81d20..08c7567d8 100644 --- a/tests/asm/fpsmoke/features-required.json +++ b/tests/asm/fpsmoke/features-required.json @@ -1 +1 @@ -["zve32f"] +{ "extensions": ["zve32f"] } diff --git a/tests/builder.nix b/tests/builder.nix index f250a8b13..e029699af 100644 --- a/tests/builder.nix +++ b/tests/builder.nix @@ -19,48 +19,57 @@ let # avoid adding jq to buildInputs, since it will make overriding buildInputs more error prone jqBin = "${jq}/bin/jq"; - caseDrv = stdenv.mkDerivation (self: rec { - # don't set name directory, since it will be suffixed with target triple - pname = "${casePrefix}.${caseName}"; - name = pname; + caseDrv = stdenv.mkDerivation (self: lib.recursiveUpdate + rec { + # don't set name directory, since it will be suffixed with target triple + pname = "${casePrefix}.${caseName}"; + name = pname; - CC = "${stdenv.targetPlatform.config}-cc"; + CC = "${stdenv.targetPlatform.config}-cc"; - NIX_CFLAGS_COMPILE = [ - "-mabi=ilp32f" - "-march=${rtlDesignMetadata.march}" - "-mno-relax" - "-static" - "-mcmodel=medany" - "-fvisibility=hidden" - "-fno-PIC" - "-g" - "-O3" - ]; + NIX_CFLAGS_COMPILE = + let + march = lib.pipe rtlDesignMetadata.march [ + (lib.splitString "_") + (map (ext: if ext == "zvbb" then "zvbb1" else ext)) + (lib.concatStringsSep "_") + ]; + in + [ + "-mabi=ilp32f" + "-march=${march}" + "-mno-relax" + "-static" + "-mcmodel=medany" + "-fvisibility=hidden" + "-fno-PIC" + "-g" + "-O3" + ] ++ lib.optionals (lib.elem "zvbb" (lib.splitString "_" rtlDesignMetadata.march)) [ "-menable-experimental-extensions" ]; - installPhase = '' - runHook preInstall + installPhase = '' + runHook preInstall - mkdir -p $out/bin - cp ${pname}.elf $out/bin + mkdir -p $out/bin + cp ${pname}.elf $out/bin - ${jqBin} --null-input \ - --arg name ${pname} \ - --arg type ${casePrefix} \ - --arg elfPath "$out/bin/${pname}.elf" \ - '{ "name": $name, "elf": { "path": $elfPath } }' \ - > $out/${pname}.json + ${jqBin} --null-input \ + --arg name ${pname} \ + --arg type ${casePrefix} \ + --arg elfPath "$out/bin/${pname}.elf" \ + '{ "name": $name, "elf": { "path": $elfPath } }' \ + > $out/${pname}.json - runHook postInstall - ''; + runHook postInstall + ''; - dontFixup = true; + dontFixup = true; - passthru = { - inherit rtlDesignMetadata; - emu-result = makeEmuResult caseDrv; - }; - - } // overrides); + passthru = { + inherit rtlDesignMetadata; + emu-result = makeEmuResult caseDrv; + }; + } + overrides); # end of recursiveUpdate in caseDrv diff --git a/tests/codegen/common.txt b/tests/codegen/common.txt index cb27e22f4..98fc1ae39 100644 --- a/tests/codegen/common.txt +++ b/tests/codegen/common.txt @@ -24,118 +24,118 @@ vdivu.vx vfirst.m vid.v viota.m -vl1re8.v vl1re16.v vl1re32.v -vl2re8.v +vl1re8.v vl2re16.v vl2re32.v -vl4re8.v +vl2re8.v vl4re16.v vl4re32.v -vl8re8.v +vl4re8.v vl8re16.v vl8re32.v -vle8.v -vle8ff.v +vl8re8.v vle16.v vle16ff.v vle32.v vle32ff.v +vle8.v +vle8ff.v vlm.v -vloxei8.v vloxei16.v vloxei32.v -vloxseg2ei8.v +vloxei8.v vloxseg2ei16.v vloxseg2ei32.v -vloxseg3ei8.v +vloxseg2ei8.v vloxseg3ei16.v vloxseg3ei32.v -vloxseg4ei8.v +vloxseg3ei8.v vloxseg4ei16.v vloxseg4ei32.v -vloxseg5ei8.v +vloxseg4ei8.v vloxseg5ei16.v vloxseg5ei32.v -vloxseg6ei8.v +vloxseg5ei8.v vloxseg6ei16.v vloxseg6ei32.v -vloxseg7ei8.v +vloxseg6ei8.v vloxseg7ei16.v vloxseg7ei32.v -vloxseg8ei8.v +vloxseg7ei8.v vloxseg8ei16.v vloxseg8ei32.v -vlse8.v +vloxseg8ei8.v vlse16.v vlse32.v -vlseg2e8.v +vlse8.v vlseg2e16.v vlseg2e32.v -vlseg3e8.v +vlseg2e8.v vlseg3e16.v vlseg3e32.v -vlseg4e8.v +vlseg3e8.v vlseg4e16.v vlseg4e32.v -vlseg5e8.v +vlseg4e8.v vlseg5e16.v vlseg5e32.v -vlseg6e8.v +vlseg5e8.v vlseg6e16.v vlseg6e32.v -vlseg7e8.v +vlseg6e8.v vlseg7e16.v vlseg7e32.v -vlseg8e8.v +vlseg7e8.v vlseg8e16.v vlseg8e32.v -vlsseg2e8.v +vlseg8e8.v vlsseg2e16.v vlsseg2e32.v -vlsseg3e8.v +vlsseg2e8.v vlsseg3e16.v vlsseg3e32.v -vlsseg4e8.v +vlsseg3e8.v vlsseg4e16.v vlsseg4e32.v -vlsseg5e8.v +vlsseg4e8.v vlsseg5e16.v vlsseg5e32.v -vlsseg6e8.v +vlsseg5e8.v vlsseg6e16.v vlsseg6e32.v -vlsseg7e8.v +vlsseg6e8.v vlsseg7e16.v vlsseg7e32.v -vlsseg8e8.v +vlsseg7e8.v vlsseg8e16.v vlsseg8e32.v -vluxei8.v +vlsseg8e8.v vluxei16.v vluxei32.v -vluxseg2ei8.v +vluxei8.v vluxseg2ei16.v vluxseg2ei32.v -vluxseg3ei8.v +vluxseg2ei8.v vluxseg3ei16.v vluxseg3ei32.v -vluxseg4ei8.v +vluxseg3ei8.v vluxseg4ei16.v vluxseg4ei32.v -vluxseg5ei8.v +vluxseg4ei8.v vluxseg5ei16.v vluxseg5ei32.v -vluxseg6ei8.v +vluxseg5ei8.v vluxseg6ei16.v vluxseg6ei32.v -vluxseg7ei8.v +vluxseg6ei8.v vluxseg7ei16.v vluxseg7ei32.v -vluxseg8ei8.v +vluxseg7ei8.v vluxseg8ei16.v vluxseg8ei32.v +vluxseg8ei8.v vmacc.vv vmacc.vx vmadc.vi @@ -260,9 +260,9 @@ vsaddu.vv vsaddu.vx vsbc.vvm vsbc.vxm -vse8.v vse16.v vse32.v +vse8.v vsetivli vsetvl vsetvli @@ -280,117 +280,117 @@ vsll.vx vsm.v vsmul.vv vsmul.vx -vsoxei8.v vsoxei16.v vsoxei32.v -vsoxseg2ei8.v +vsoxei8.v vsoxseg2ei16.v vsoxseg2ei32.v -vsoxseg3ei8.v +vsoxseg2ei8.v vsoxseg3ei16.v vsoxseg3ei32.v -vsoxseg4ei8.v +vsoxseg3ei8.v vsoxseg4ei16.v vsoxseg4ei32.v -vsoxseg5ei8.v +vsoxseg4ei8.v vsoxseg5ei16.v vsoxseg5ei32.v -vsoxseg6ei8.v +vsoxseg5ei8.v vsoxseg6ei16.v vsoxseg6ei32.v -vsoxseg7ei8.v +vsoxseg6ei8.v vsoxseg7ei16.v vsoxseg7ei32.v -vsoxseg8ei8.v +vsoxseg7ei8.v vsoxseg8ei16.v vsoxseg8ei32.v +vsoxseg8ei8.v vsra.vi vsra.vv vsra.vx vsrl.vi vsrl.vv vsrl.vx -vsse8.v vsse16.v vsse32.v -vsseg2e8.v +vsse8.v vsseg2e16.v vsseg2e32.v -vsseg3e8.v +vsseg2e8.v vsseg3e16.v vsseg3e32.v -vsseg4e8.v +vsseg3e8.v vsseg4e16.v vsseg4e32.v -vsseg5e8.v +vsseg4e8.v vsseg5e16.v vsseg5e32.v -vsseg6e8.v +vsseg5e8.v vsseg6e16.v vsseg6e32.v -vsseg7e8.v +vsseg6e8.v vsseg7e16.v vsseg7e32.v -vsseg8e8.v +vsseg7e8.v vsseg8e16.v vsseg8e32.v +vsseg8e8.v vssra.vi vssra.vv vssra.vx vssrl.vi vssrl.vv vssrl.vx -vssseg2e8.v vssseg2e16.v vssseg2e32.v -vssseg3e8.v +vssseg2e8.v vssseg3e16.v vssseg3e32.v -vssseg4e8.v +vssseg3e8.v vssseg4e16.v vssseg4e32.v -vssseg5e8.v +vssseg4e8.v vssseg5e16.v vssseg5e32.v -vssseg6e8.v +vssseg5e8.v vssseg6e16.v vssseg6e32.v -vssseg7e8.v +vssseg6e8.v vssseg7e16.v vssseg7e32.v -vssseg8e8.v +vssseg7e8.v vssseg8e16.v vssseg8e32.v +vssseg8e8.v vssub.vv vssub.vx vssubu.vv vssubu.vx vsub.vv vsub.vx -vsuxei8.v vsuxei16.v vsuxei32.v -vsuxseg2ei8.v +vsuxei8.v vsuxseg2ei16.v vsuxseg2ei32.v -vsuxseg3ei8.v +vsuxseg2ei8.v vsuxseg3ei16.v vsuxseg3ei32.v -vsuxseg4ei8.v +vsuxseg3ei8.v vsuxseg4ei16.v vsuxseg4ei32.v -vsuxseg5ei8.v +vsuxseg4ei8.v vsuxseg5ei16.v vsuxseg5ei32.v -vsuxseg6ei8.v +vsuxseg5ei8.v vsuxseg6ei16.v vsuxseg6ei32.v -vsuxseg7ei8.v +vsuxseg6ei8.v vsuxseg7ei16.v vsuxseg7ei32.v -vsuxseg8ei8.v +vsuxseg7ei8.v vsuxseg8ei16.v vsuxseg8ei32.v +vsuxseg8ei8.v vwadd.vv vwadd.vx vwadd.wv diff --git a/tests/codegen/default.nix b/tests/codegen/default.nix index fd8edb612..481a54e30 100644 --- a/tests/codegen/default.nix +++ b/tests/codegen/default.nix @@ -4,21 +4,12 @@ , makeBuilder # Instead of testing feature is supported on TOP level, # codegen case are always generated with supported code. -, currentFeatures +, featuresSet }: let builder = makeBuilder { casePrefix = "codegen"; }; makeCaseName = lib.replaceStrings [ "." ] [ "_" ]; - extraValueFromFeatures = pattern: - lib.last - (lib.splitString ":" - (lib.head - (lib.filter - (lib.hasPrefix pattern) - currentFeatures))); - vlen = extraValueFromFeatures "vlen"; - xlen = extraValueFromFeatures "xlen"; build = { rawCaseName, extra }: builder @@ -36,8 +27,8 @@ let runHook preBuild ${rvv-codegen}/bin/single \ - -VLEN "${vlen}" \ - -XLEN "${xlen}" \ + -VLEN "${featuresSet.vlen}" \ + -XLEN "${featuresSet.xlen}" \ -repeat 16 \ -testfloat3level 2 \ -configfile ${rvv-codegen}/configs/${rawCaseName}.toml \ @@ -71,14 +62,13 @@ let ) rawCaseNames)); - commonTests = buildTestsFromFile ./common.txt { featuresRequired = [ ]; }; - fpTests = buildTestsFromFile ./fp.txt { featuresRequired = [ "zve32f" ]; }; - zvbbTests = buildTestsFromFile ./zvbb.txt { featuresRequired = [ "zvbb" ]; }; - hasFeature = feat: lib.any (f: feat == f) currentFeatures; + commonTests = buildTestsFromFile ./common.txt { passthru.featuresRequired = { extensions = [ ]; }; }; + fpTests = buildTestsFromFile ./fp.txt { passthru.featuresRequired = { extensions = [ "zve32f" ]; }; }; + zvbbTests = buildTestsFromFile ./zvbb.txt { passthru.featuresRequired = { extensions = [ "zvbb" ]; }; }; in lib.recurseIntoAttrs ( commonTests // - lib.optionalAttrs (hasFeature "zve32f") fpTests // - lib.optionalAttrs (hasFeature "zvbb") zvbbTests + lib.optionalAttrs (lib.elem "zve32f" featuresSet.extensions) fpTests // + lib.optionalAttrs (lib.elem "zvbb" featuresSet.extensions) zvbbTests ) diff --git a/tests/codegen/zvbb.txt b/tests/codegen/zvbb.txt index 77ed67621..d109f5570 100644 --- a/tests/codegen/zvbb.txt +++ b/tests/codegen/zvbb.txt @@ -1,7 +1,7 @@ vandn.vv vandn.vx vbrev.v -vbreav8.v +vbrev8.v vclz.v vcpop.v vctz.v diff --git a/tests/default.nix b/tests/default.nix index 8a607d59e..a1711c540 100644 --- a/tests/default.nix +++ b/tests/default.nix @@ -1,35 +1,85 @@ { lib -, configName -, rtlDesignMetadata , newScope , rv32-stdenv , runCommand -, verilator-emu -, verilator-emu-trace -, vcs-emu -, vcs-emu-trace + +, configName +, rtlDesignMetadata + +, t1rocket-emu ? null +, t1rocket-emu-trace ? null + +, verilator-emu ? null +, verilator-emu-trace ? null + +, vcs-emu ? null +, vcs-emu-trace ? null }: let - hasExt = cmp: lib.any (ext: cmp == (lib.toLower ext)) rtlDesignMetadata.extensions; - - # Add an extra abstract layer between test case and RTL design, so that we can have clean and organized way - # for developer to specify their required features without the need to parse ISA string themselves. - currentFeatures = [ - "vlen:${rtlDesignMetadata.vlen}" - "dlen:${rtlDesignMetadata.dlen}" - "xlen:${if (lib.hasPrefix "rv32" rtlDesignMetadata.march) then "32" else "64"}" - ] - ++ lib.optionals (hasExt "zve32f") [ "zve32f" ] - ++ lib.optionals (hasExt "zvbb") [ "zvbb" ]; + getVLen = ext: + let + val = builtins.tryEval + (lib.toInt + (lib.removeSuffix "b" + (lib.removePrefix "zvl" + (lib.toLower ext)))); + in + if val.success then + val.value + else + throw "Invalid vlen extension `${ext}` specify, expect Zvl{N}b"; + + featuresSet = { + extensions = lib.splitString "_" rtlDesignMetadata.march; + xlen = if (lib.hasPrefix "rv32" rtlDesignMetadata.march) then 32 else 64; + vlen = getVLen (lib.last + (lib.filter + (x: lib.hasPrefix "zvl" + (lib.toLower x)))); + inherit (rtlDesignMetadata) dlen; + }; # isSubSetOf m n: n is subset of m isSubsetOf = m: n: lib.all (x: lib.elem x m) n; + # Return true if attribute in first argument exists in second argument, and the value is also equal. + # + # Example: + # + # hasIntersect { } { a = [1 2 3]; b = 4; } # true + # hasIntersect { a = [1]; } { a = [1 2 3]; b = 4; } # true + # hasIntersect { a = [1]; b = 4; } { a = [1 2 3]; b = 4; } # true + # hasIntersect { a = [4]; } { a = [1 2 3]; b = 4; } # false + # hasIntersect { c = 4; } { a = [1 2 3]; b = 4; } # false + # + # hasIntersect :: AttrSet -> AttrSet -> Bool + hasIntersect = ma: na: with builtins; let + keysMa = attrNames ma; + keysNa = attrNames na; + intersectKeys = lib.filter (n: lib.elem n keysNa) (attrNames ma); + intersectValEquality = map + (key: + if typeOf (ma.${key}) == "list" then + isSubsetOf na.${key} ma.${key} + else ma.${key} == na.${key}) + intersectKeys; + in + (length keysMa == 0) || + ((length intersectKeys > 0) && all (isEqual: isEqual) intersectValEquality); + scope = lib.recurseIntoAttrs (lib.makeScope newScope (casesSelf: { recurseForDerivations = true; - inherit verilator-emu verilator-emu-trace vcs-emu vcs-emu-trace rtlDesignMetadata currentFeatures; + inherit + verilator-emu + verilator-emu-trace + vcs-emu + vcs-emu-trace + t1rocket-emu + t1rocket-emu-trace + rtlDesignMetadata + featuresSet; makeEmuResult = casesSelf.callPackage ./make-emu-result.nix { }; @@ -46,7 +96,12 @@ let in if lib.pathExists extraFeatures then builtins.fromJSON (lib.fileContents extraFeatures) - else [ ]; + else { }; + + filterByFeatures = caseName: caseDrv: + assert lib.assertMsg (caseDrv ? featuresRequired) "${caseName} doesn't have features specified"; + # Test the case required extensions is supported by rtl design + hasIntersect caseDrv.featuresRequired featuresSet; findAndBuild = dir: build: lib.recurseIntoAttrs (lib.pipe (builtins.readDir dir) [ @@ -63,10 +118,7 @@ let inherit caseName sourcePath; }) ) - (lib.filterAttrs (caseName: caseDrv: - assert lib.assertMsg (caseDrv ? featuresRequired) "${caseName} doesn't have features specified"; - # Test the case required extensions is supported by rtl design - isSubsetOf currentFeatures caseDrv.featuresRequired)) + (lib.filterAttrs casesSelf.filterByFeatures) ]); t1main = ./t1_main.S; linkerScript = ./t1.ld; @@ -79,11 +131,12 @@ let perf = casesSelf.callPackage ./perf { }; codegen = casesSelf.callPackage ./codegen { }; rvv_bench = casesSelf.callPackage ./rvv_bench { }; + pytorch = casesSelf.callPackage ./pytorch { }; })); # remove non-case attributes in scope scopeStripped = { - inherit (scope) mlir intrinsic asm perf codegen rvv_bench; + inherit (scope) mlir intrinsic asm perf codegen rvv_bench pytorch; }; # This derivation is for internal CI use only. @@ -135,7 +188,7 @@ let in runCommand "catch-${configName}-all-vcs-emu-result-for-ci" { } script; - all = + _all = let allCases = lib.filter lib.isDerivation @@ -155,4 +208,4 @@ let { } script; in -lib.recurseIntoAttrs (scopeStripped // { inherit all _allEmuResult _allVCSEmuResult; }) +lib.recurseIntoAttrs (scopeStripped // { inherit _all _allEmuResult _allVCSEmuResult; }) diff --git a/tests/emurt/emurt.c b/tests/emurt/emurt.c index f52a57d36..77ebc6fad 100644 --- a/tests/emurt/emurt.c +++ b/tests/emurt/emurt.c @@ -54,7 +54,9 @@ int _write(int file, char* ptr, int len) { } void _exit(int code) { - __asm__("csrwi 0x7cc, 0"); + __asm__("li x1, 0x40000000"); + __asm__("li x2, 0xdeadbeef"); + __asm__("sw x2, 0(x1)"); __builtin_unreachable(); } diff --git a/tests/intrinsic/default.nix b/tests/intrinsic/default.nix index 3dadca131..146d8efd0 100644 --- a/tests/intrinsic/default.nix +++ b/tests/intrinsic/default.nix @@ -14,7 +14,7 @@ let src = sourcePath; - featuresRequired = getTestRequiredFeatures sourcePath; + passthru.featuresRequired = getTestRequiredFeatures sourcePath; buildPhase = '' runHook preBuild diff --git a/tests/intrinsic/linear_normalization/features-required.json b/tests/intrinsic/linear_normalization/features-required.json index 892f81d20..08c7567d8 100644 --- a/tests/intrinsic/linear_normalization/features-required.json +++ b/tests/intrinsic/linear_normalization/features-required.json @@ -1 +1 @@ -["zve32f"] +{ "extensions": ["zve32f"] } diff --git a/tests/intrinsic/softmax/features-required.json b/tests/intrinsic/softmax/features-required.json index 892f81d20..08c7567d8 100644 --- a/tests/intrinsic/softmax/features-required.json +++ b/tests/intrinsic/softmax/features-required.json @@ -1 +1 @@ -["zve32f"] +{ "extensions": ["zve32f"] } diff --git a/tests/make-emu-result.nix b/tests/make-emu-result.nix index a8540f806..9b15191b3 100644 --- a/tests/make-emu-result.nix +++ b/tests/make-emu-result.nix @@ -3,26 +3,30 @@ , stdenvNoCC , jq , zstd -, verilator-emu -, verilator-emu-trace -, vcs-emu -, vcs-emu-trace -, elaborateConfigJson + +, t1rocket-emu ? null +, t1rocket-emu-trace ? null + +, verilator-emu ? null +, verilator-emu-trace ? null + +, vcs-emu ? null +, vcs-emu-trace ? null }: # makeEmuResult arg testCase: -let - self = stdenvNoCC.mkDerivation { +rec { + verilator-check = stdenvNoCC.mkDerivation { name = "${testCase.pname}-emu-result"; nativeBuildInputs = [ zstd jq ]; dontUnpack = true; - difftestDriver = "${verilator-emu}/bin/online_drive"; - difftestArgs = [ + emuDriver = "${verilator-emu}/bin/online_drive"; + emuDriverArgs = [ "--elf-file" "${testCase}/bin/${testCase.pname}.elf" "--log-file" @@ -37,9 +41,15 @@ let mkdir -p "$out" - echo "[nix] Running test case ${testCase.pname} with args $difftestArgs" + echo "[nix] Running test case ${testCase.pname} with args $emuDriverArgs" - RUST_BACKTRACE=full "$difftestDriver" $difftestArgs 2> "$rtlEventOutPath" + export RUST_BACKTRACE=full + if ! "$emuDriver" $emuDriverArgs 2> "$rtlEventOutPath"; then + echo "[nix] online driver run failed" + cat $rtlEventOutPath + echo "[nix] Rerun with command: '$emuDriver $emuDriverArgs'" + exit 1 + fi echo "[nix] online driver done" @@ -63,6 +73,17 @@ let exit 1 fi + if [ -z "$postCheck" ]; then + set +e + mkdir -p "$out" + "${verilator-emu}/bin/offline" \ + --elf-file ${testCase}/bin/${testCase.pname}.elf \ + --log-file $rtlEventOutPath \ + --log-level ERROR &> $out/offline-check-journal + printf "$?" > $out/offline-check-status + set -e + fi + runHook postCheck ''; @@ -79,112 +100,98 @@ let runHook postInstall ''; + }; - passthru.with-trace = self.overrideAttrs (old: { - name = old.name + "-with-trace"; - emuDriver = "${verilator-emu-trace}/bin/online_drive"; - emuDriverArgs = old.emuDriverArgs ++ [ "--wave-path" "${placeholder "out"}/wave.fst" ]; - postCheck = '' - if [ ! -r "$out/wave.fst" ]; then - echo -e "[nix] \033[0;31mInternal Error\033[0m: waveform not found in output" - exit 1 - fi - ''; - }); - - passthru.with-offline = self.overrideAttrs (old: { - name = old.name + "-with-offline"; - preInstall = '' - set +e - "${verilator-emu}/bin/offline" \ - --elf-file ${testCase}/bin/${testCase.pname}.elf \ - --log-file $rtlEventOutPath \ - --log-level ERROR &> $out/offline-check-journal - printf "$?" > $out/offline-check-status - set -e - ''; - }); + verilator-check-trace = lib.overrideDerivation verilator-check (old: { + name = old.name + "-with-trace"; + emuDriver = "${verilator-emu-trace}/bin/online_drive"; + emuDriverArgs = old.emuDriverArgs or [ ] ++ [ "--wave-path" "${placeholder "out"}/wave.fst" ]; + postCheck = '' + if [ ! -r "$out/wave.fst" ]; then + echo -e "[nix] \033[0;31mInternal Error\033[0m: waveform not found in output" + exit 1 + fi + ''; + }); - passthru.with-vcs = self.overrideAttrs (old: { - name = old.name + "-with-vcs"; - __noChroot = true; + vcs-check = lib.overrideDerivation verilator-check (old: { + name = old.name + "-with-vcs"; + __noChroot = true; + dontPatchELF = true; - buildPhase = '' - runHook preBuild + buildPhase = '' + runHook preBuild - mkdir -p "$out" - echo "[nix] Running VCS for ${testCase.pname}" + mkdir -p "$out" + echo "[nix] Running VCS for ${testCase.pname}" - RUST_BACKTRACE=full "${vcs-emu}/bin/t1-vcs-simulator" \ - --elf-file ${testCase}/bin/${testCase.pname}.elf \ - 1> /dev/null \ - 2> $rtlEventOutPath + RUST_BACKTRACE=full "${vcs-emu}/bin/t1-vcs-simulator" \ + --elf-file ${testCase}/bin/${testCase.pname}.elf \ + 1> /dev/null \ + 2> $rtlEventOutPath - echo "[nix] VCS emu done" + echo "[nix] VCS emu done" - runHook postBuild - ''; + runHook postBuild + ''; - postCheck = '' - set +e + postCheck = '' + set +e - "${verilator-emu}/bin/offline" \ - --elf-file ${testCase}/bin/${testCase.pname}.elf \ - --log-file $rtlEventOutPath \ - --log-level ERROR &> $out/offline-check-journal - printf "$?" > $out/offline-check-status + "${vcs-emu}/bin/offline" \ + --elf-file ${testCase}/bin/${testCase.pname}.elf \ + --log-file $rtlEventOutPath \ + --log-level ERROR &> $out/offline-check-journal + printf "$?" > $out/offline-check-status - set -e - ''; - }); + set -e + ''; + }); - # TODO: We should write some framework like NixOS module to overlay these attribute, instead - # of override attribute one by one. - passthru.with-vcs-trace = self.overrideAttrs (old: { - name = old.name + "-with-vcs-trace"; - __noChroot = true; - buildPhase = '' - runHook preBuild + vcs-trace-check = lib.overrideDerivation verilator-check (old: { + name = old.name + "-with-vcs-trace"; + __noChroot = true; + dontPatchELF = true; + buildPhase = '' + runHook preBuild - mkdir -p "$out" - echo "[nix] Running VCS(TRACE) for ${testCase.pname}" + mkdir -p "$out" + echo "[nix] Running VCS(TRACE) for ${testCase.pname}" - RUST_BACKTRACE=full "${vcs-emu-trace}/bin/t1-vcs-simulator" \ - --elf-file ${testCase}/bin/${testCase.pname}.elf \ - --wave-path ${testCase.pname}.fsdb \ - 1> /dev/null \ - 2> $rtlEventOutPath + RUST_BACKTRACE=full "${vcs-emu-trace}/bin/t1-vcs-simulator" \ + --elf-file ${testCase}/bin/${testCase.pname}.elf \ + --wave-path ${testCase.pname}.fsdb \ + 1> /dev/null \ + 2> $rtlEventOutPath - echo "[nix] VCS emu done" + echo "[nix] VCS emu done" - runHook postBuild - ''; + runHook postBuild + ''; - postCheck = '' - set +e + postCheck = '' + set +e + + echo "[nix] Checking VCS event log" + "${vcs-emu-trace}/bin/offline" \ + --elf-file ${testCase}/bin/${testCase.pname}.elf \ + --log-file $rtlEventOutPath \ + --log-level ERROR &> $out/offline-check-journal + printf "$?" > $out/offline-check-status + if [ "$(cat $out/offline-check-status)" == "0" ]; then + echo "[nix] VCS difftest PASS" + else + echo "[nix] VCS difftest FAIL" + fi - echo "[nix] Checking VCS event log" - "${verilator-emu}/bin/offline" \ - --elf-file ${testCase}/bin/${testCase.pname}.elf \ - --log-file $rtlEventOutPath \ - --log-level ERROR &> $out/offline-check-journal - printf "$?" > $out/offline-check-status - if [ "$(cat $out/offline-check-status)" == "0" ]; then - echo "[nix] VCS difftest PASS" - else - echo "[nix] VCS difftest FAIL" - fi + set -e + ''; - set -e - ''; - - postInstall = '' - # VCS have weird behavior on file creation, it will report read-only filesystem on our output, - # while other tools can mutate file system correctly. - cp ${testCase.pname}.fsdb "$out" - cp -r ${vcs-emu-trace}/lib/t1-vcs-simulator.daidir "$out" - ''; - }); - }; -in -self + postInstall = '' + # VCS have weird behavior on file creation, it will report read-only filesystem on our output, + # while other tools can mutate file system correctly. + cp ${testCase.pname}.fsdb "$out" + cp -r ${vcs-emu-trace}/lib/t1-vcs-simulator.daidir "$out" + ''; + }); +} diff --git a/tests/mlir/default.nix b/tests/mlir/default.nix index 96ba1218f..486506932 100644 --- a/tests/mlir/default.nix +++ b/tests/mlir/default.nix @@ -14,7 +14,7 @@ let src = sourcePath; - featuresRequired = getTestRequiredFeatures sourcePath; + passthru.featuresRequired = getTestRequiredFeatures sourcePath; nativeBuildInputs = [ buddy-mlir ]; diff --git a/tests/perf/llama/default.nix b/tests/perf/llama/default.nix index 74d111dd7..c72efb72d 100644 --- a/tests/perf/llama/default.nix +++ b/tests/perf/llama/default.nix @@ -21,7 +21,9 @@ let in build { - featuresRequired = [ "zve32f" ]; + passthru.featuresRequired = { + extensions = [ "zve32f" ]; + }; caseName = "llama"; diff --git a/tests/pytorch/README.md b/tests/pytorch/README.md new file mode 100644 index 000000000..cf638e951 --- /dev/null +++ b/tests/pytorch/README.md @@ -0,0 +1,140 @@ +## How to add tests + +To create a new PyTorch test, you can follow the below instruction. + +Assuming that the new PyTorch test have project name call `demo`, let's create the test skeleton: + +```bash +cd tests/pytorch +mkdir -p demo +cd demo +touch demo.c demo.py config.nix +``` + +Developers should put their PyTorch implementation into ".py" file. +For each PyTorch tests, developers must write the MLIR model to "forward.mlir" file. + +```python +# demo.py +#... +with open("forward.mlir", "w") as mlir_module: + print(graph._imported_module, file = mlir_module) +``` + +For each PyTorch tests, developers should call the MLIR model from ".c" file. +In our case, here is an example "demo.c" file: + +```c +// 1. Include the MemRef wrapper +#include "memref.h" + +// 2. Create corresponding MemRef struct with data type `float` and one dimension. +NEW_MEMREF(float, 1); + +// 3. Declare the MLIR model interface +extern void _mlir_ciface_forward(struct MemRef_float_dim1 *output, + struct MemRef_float_dim1 *arg1, + struct MemRef_float_dim1 *arg2); + +// 4. Create example data array. The ".vdata" attribute will help emulator load the data into correct memory. +__attribute((section(".vdata"))) float input_float_0[512] = {1, 2, 3}; +struct MemRef_float_dim1 input1 = { + .allocatedPtr = input_float_0, + .alignedPtr = input_float_0, + .offset = 0, + .sizes = {512}, + .strides = {1}, +}; + +// 5. Declare the main entry. In t1 all tests entry should be `int test()` instead of main(). +int test() { + _mlir_ciface_forward(&output, &input1, &input2); + return 0; +} +``` + +After PyTorch model and the C entry is correctly created, developers should declare a "config.nix" +file to indicate our build system to find and build the test case: + +```nix +{ + # Tell our build system to include the memref.h header. + # Developer could add extra headers here. + includes = [ + ../memref.h + ]; + + # Tell the build system to run buddy-opt with three phrase, with arguments to run in each phrase + buddyOptArgs = [ + [ + "--pass-pipeline" + "builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg, tosa-to-tensor, tosa-to-arith), empty-tensor-to-alloc-tensor, convert-elementwise-to-linalg, arith-bufferize, func.func(linalg-bufferize, tensor-bufferize), func-bufferize)" + ] + [ + "--pass-pipeline" + "builtin.module(func.func(buffer-deallocation-simplification, convert-linalg-to-loops), eliminate-empty-tensors, func.func(llvm-request-c-wrappers))" + ] + [ + "--lower-affine" + "--convert-math-to-llvm" + "--convert-math-to-libm" + "--convert-scf-to-cf" + "--convert-arith-to-llvm" + "--expand-strided-metadata" + "--finalize-memref-to-llvm" + "--lower-vector-exp" + "--lower-rvv=rv32" + "--convert-vector-to-llvm" + "--convert-func-to-llvm" + "--reconcile-unrealized-casts" + ] + ]; +} +``` + +Our build system accept the below data layout for the "config.nix" file: + +```text +Set { + buddyOptArgs: Array>, + + includes: Optional>, + pythonArgs: Optional>, + buddyTranslateArgs: Optional>, + buddyLLCArgs: Optional>, +} +``` + +After the project have been implemented, developers can run the below commands to build and test the ELF: + +```bash +git add . +nix build '.#t1.blastoise.ip.cases.pytorch.demo' -L +ls ./result/bin/pytorch-demo.elf + +# To start the emulator and get waveform, run: +nix build '.#t1.blastoise.ip.cases.pytorch.demo.emu-result.with-trace' -L +``` + +## FAQ + +* How to debug the PyTorch code + +```bash +nix run '.#buddy-mlir-pyenv' -- demo.py +``` + +* How to run buddy compiler tools manually + +```bash +nix develop '.#t1.blastoise.ip.cases.pytorch.demo' -L +cd $(mktemp -d -t 'pytorch-debug-XXX') +pwd + +# Unpack sources +unpackPhase +# Check commands: +echo -e "$buildPhase" +# Run build +bash -c "$buildPhase" +``` diff --git a/tests/pytorch/default.nix b/tests/pytorch/default.nix new file mode 100644 index 000000000..719ac58cd --- /dev/null +++ b/tests/pytorch/default.nix @@ -0,0 +1,101 @@ +{ lib +, linkerScript +, buddy-mlir +, buddy-mlir-pyenv +, makeBuilder +, findAndBuild +, getTestRequiredFeatures +, t1main +}: + +let + + builder = makeBuilder { casePrefix = "mlir"; }; + build = { caseName, sourcePath }: + let + buddyBuildConfig = import (sourcePath + "/config.nix"); + defaultBuddyTranslateArgs = [ "--buddy-to-llvmir" ]; + defaultBuddyLLCArgs = [ + "-mtriple=riscv32" + "-target-abi=ilp32f" + "-mattr=+m,+f,+zve32f" + "-riscv-v-vector-bits-min=128" + ]; + in + builder rec { + inherit caseName; + + src = sourcePath; + + passthru.featuresRequired = getTestRequiredFeatures sourcePath; + + nativeBuildInputs = [ buddy-mlir-pyenv buddy-mlir ]; + + pythonArgs = buddyBuildConfig.pythonArgs or [ ]; + buddyTranslateArgs = buddyBuildConfig.buddyTranslateArgs or defaultBuddyTranslateArgs; + buddyLLCArgs = buddyBuildConfig.buddyLLCArgs or defaultBuddyLLCArgs; + buddyIncludes = buddyBuildConfig.includes or [ ]; + + postUnpack = '' + buddyIncludeDir="." + if [ "x$buddyIncludes" != "x" ]; then + mkdir -p buddyInclude + _buddyHeaderArray=( $buddyIncludes ) + for h in "''${_buddyHeaderArray}"; do + cp -v "$h" buddyInclude/"$(stripHash $h)" + done + + buddyIncludeDir=$PWD/buddyInclude + fi + ''; + + buildPhase = '' + runHook preBuild + + echo "Running python with args $pythonArgs" + python $pythonArgs ${caseName}.py + + # Generate multiple buddy-opt call, each will read input from former pipeline + # For example, for buddyOptArgs = [ [ "--arg-a" ], [ "--arg-b" ], [ "--arg-c" ] ] + # This will generate + # + # echo "..." + # buddy-opt forward.mlir --arg-a -o forward-1.mlir + # echo "..." + # buddy-opt forward-1.mlir --arg-b -o forward-2.mlir + # echo "..." + # buddy-opt forward-2.mlir --arg-c -o forward-3.mlir + # + ${lib.concatStringsSep "\n" ( + lib.imap0 + (idx: args: '' + echo "Running buddy-opt with args ${lib.escapeShellArgs args}" + buddy-opt \ + forward${if idx == 0 then "" else "-${toString idx}"}.mlir \ + ${lib.escapeShellArgs args} \ + -o forward-${toString (idx+1)}.mlir + '') + buddyBuildConfig.buddyOptArgs + )} + + # Pick up the last optimized MLIR file + echo "Running buddy-translate with args $buddyTranslateArgs" + buddy-translate forward-${with builtins; toString (length buddyBuildConfig.buddyOptArgs)}.mlir \ + $buddyTranslateArgs -o forward.ll + + echo "Running buddy-llc with args $buddyLLCArgs" + buddy-llc forward.ll $buddyLLCArgs --filetype=obj -o forward.o + + echo "Using include dir $buddyIncludeDir" + $CC -T${linkerScript} \ + -I$buddyIncludeDir \ + ${caseName}.c forward.o ${t1main} \ + -o $pname.elf + + runHook postBuild + ''; + + meta.description = "testcase '${caseName}', written in MLIR"; + }; +in +findAndBuild ./. build diff --git a/tests/pytorch/demo/config.nix b/tests/pytorch/demo/config.nix new file mode 100644 index 000000000..d00359f79 --- /dev/null +++ b/tests/pytorch/demo/config.nix @@ -0,0 +1,30 @@ +{ + includes = [ + ../memref.h + ]; + + buddyOptArgs = [ + [ + "--pass-pipeline" + "builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg, tosa-to-tensor, tosa-to-arith), empty-tensor-to-alloc-tensor, convert-elementwise-to-linalg, arith-bufferize, func.func(linalg-bufferize, tensor-bufferize), func-bufferize)" + ] + [ + "--pass-pipeline" + "builtin.module(func.func(buffer-deallocation-simplification, convert-linalg-to-loops), eliminate-empty-tensors, func.func(llvm-request-c-wrappers))" + ] + [ + "--lower-affine" + "--convert-math-to-llvm" + "--convert-math-to-libm" + "--convert-scf-to-cf" + "--convert-arith-to-llvm" + "--expand-strided-metadata" + "--finalize-memref-to-llvm" + "--lower-vector-exp" + "--lower-rvv=rv32" + "--convert-vector-to-llvm" + "--convert-func-to-llvm" + "--reconcile-unrealized-casts" + ] + ]; +} diff --git a/tests/pytorch/demo/demo.c b/tests/pytorch/demo/demo.c new file mode 100644 index 000000000..6b8b971bb --- /dev/null +++ b/tests/pytorch/demo/demo.c @@ -0,0 +1,39 @@ +#include "memref.h" + +NEW_MEMREF(float, 1); + +extern void _mlir_ciface_forward(struct MemRef_float_dim1 *output, + struct MemRef_float_dim1 *arg1, + struct MemRef_float_dim1 *arg2); + +__attribute((section(".vdata"))) float input_float_0[512] = {1, 2, 3}; +struct MemRef_float_dim1 input1 = { + .allocatedPtr = input_float_0, + .alignedPtr = input_float_0, + .offset = 0, + .sizes = {512}, + .strides = {1}, +}; + +__attribute((section(".vdata"))) float input_float_1[512] = {4, 5, 6}; +struct MemRef_float_dim1 input2 = { + .allocatedPtr = input_float_1, + .alignedPtr = input_float_1, + .offset = 0, + .sizes = {512}, + .strides = {1}, +}; + +__attribute((section(".vdata"))) float output_float_0[512]; +struct MemRef_float_dim1 output = { + .allocatedPtr = output_float_0, + .alignedPtr = output_float_0, + .offset = 0, + .sizes = {512}, + .strides = {1}, +}; + +int test() { + _mlir_ciface_forward(&output, &input1, &input2); + return 0; +} diff --git a/tests/pytorch/demo/demo.py b/tests/pytorch/demo/demo.py new file mode 100644 index 000000000..650227eca --- /dev/null +++ b/tests/pytorch/demo/demo.py @@ -0,0 +1,30 @@ +import torch +import torch._dynamo as dynamo +from torch._inductor.decomposition import decompositions as inductor_decomp + +from buddy.compiler.frontend import DynamoCompiler +from buddy.compiler.ops import tosa + +# Define the target function or model. +def foo(x, y): + return x * y + x + +# Define the input data. +float32_in1 = torch.randn(512).to(torch.float32) +float32_in2 = torch.randn(512).to(torch.float32) + +# Initialize the dynamo compiler. +dynamo_compiler = DynamoCompiler( + primary_registry=tosa.ops_registry, + aot_autograd_decomposition=inductor_decomp, +) + +# Pass the function and input data to the dynamo compiler's importer, the +# importer will first build a graph. Then, lower the graph to top-level IR. +# (tosa, linalg, etc.). Finally, accepts the generated module and weight parameters. +graphs = dynamo_compiler.importer(foo, *(float32_in1, float32_in2)) +graph = graphs[0] +graph.lower_to_top_level_ir() + +with open("forward.mlir", "w") as mlir_module: + print(graph._imported_module, file = mlir_module) diff --git a/tests/pytorch/memref.h b/tests/pytorch/memref.h new file mode 100644 index 000000000..c468af15f --- /dev/null +++ b/tests/pytorch/memref.h @@ -0,0 +1,28 @@ +#ifndef MEMREF_H +#define MEMREF_H + +// Generate a new struct with T-type, N-dimension memref with name +// MemRef_T_dimN. +// +// Example: +// +// NEW_MEMREF(float, 2); +// // Equals to +// struct MemRef_float_dim2 { +// float *allocatedPtr; +// float *alignedPtr; +// int offset; +// int sizes[2]; +// int strides[2]; +// }; +// +#define NEW_MEMREF(T, N) \ + struct MemRef_##T##_dim##N { \ + T *allocatedPtr; \ + T *alignedPtr; \ + int offset; \ + int sizes[N]; \ + int strides[N]; \ + } + +#endif diff --git a/tests/riscv-test-env/LICENSE b/tests/riscv-test-env/LICENSE new file mode 100644 index 000000000..48fe522ac --- /dev/null +++ b/tests/riscv-test-env/LICENSE @@ -0,0 +1,24 @@ +Copyright (c) 2012-2015, The Regents of the University of California (Regents). +All Rights Reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. Neither the name of the Regents nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, +SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING +OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS +BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED +HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE +MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. diff --git a/tests/riscv-test-env/encoding.h b/tests/riscv-test-env/encoding.h new file mode 100644 index 000000000..01889d1a9 --- /dev/null +++ b/tests/riscv-test-env/encoding.h @@ -0,0 +1,5013 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ + +/* Copyright (c) 2023 RISC-V International */ + +/* + * This file is auto-generated by running 'make' in + * https://github.com/riscv/riscv-opcodes (02b4866) + */ + +#ifndef RISCV_CSR_ENCODING_H +#define RISCV_CSR_ENCODING_H + +#define MSTATUS_UIE 0x00000001 +#define MSTATUS_SIE 0x00000002 +#define MSTATUS_HIE 0x00000004 +#define MSTATUS_MIE 0x00000008 +#define MSTATUS_UPIE 0x00000010 +#define MSTATUS_SPIE 0x00000020 +#define MSTATUS_UBE 0x00000040 +#define MSTATUS_MPIE 0x00000080 +#define MSTATUS_SPP 0x00000100 +#define MSTATUS_VS 0x00000600 +#define MSTATUS_MPP 0x00001800 +#define MSTATUS_FS 0x00006000 +#define MSTATUS_XS 0x00018000 +#define MSTATUS_MPRV 0x00020000 +#define MSTATUS_SUM 0x00040000 +#define MSTATUS_MXR 0x00080000 +#define MSTATUS_TVM 0x00100000 +#define MSTATUS_TW 0x00200000 +#define MSTATUS_TSR 0x00400000 +#define MSTATUS32_SD 0x80000000 +#define MSTATUS_UXL 0x0000000300000000 +#define MSTATUS_SXL 0x0000000C00000000 +#define MSTATUS_SBE 0x0000001000000000 +#define MSTATUS_MBE 0x0000002000000000 +#define MSTATUS_GVA 0x0000004000000000 +#define MSTATUS_MPV 0x0000008000000000 +#define MSTATUS64_SD 0x8000000000000000 + +#define MSTATUSH_SBE 0x00000010 +#define MSTATUSH_MBE 0x00000020 +#define MSTATUSH_GVA 0x00000040 +#define MSTATUSH_MPV 0x00000080 + +#define SSTATUS_UIE 0x00000001 +#define SSTATUS_SIE 0x00000002 +#define SSTATUS_UPIE 0x00000010 +#define SSTATUS_SPIE 0x00000020 +#define SSTATUS_UBE 0x00000040 +#define SSTATUS_SPP 0x00000100 +#define SSTATUS_VS 0x00000600 +#define SSTATUS_FS 0x00006000 +#define SSTATUS_XS 0x00018000 +#define SSTATUS_SUM 0x00040000 +#define SSTATUS_MXR 0x00080000 +#define SSTATUS32_SD 0x80000000 +#define SSTATUS_UXL 0x0000000300000000 +#define SSTATUS64_SD 0x8000000000000000 + +#define HSTATUS_VSXL 0x300000000 +#define HSTATUS_VTSR 0x00400000 +#define HSTATUS_VTW 0x00200000 +#define HSTATUS_VTVM 0x00100000 +#define HSTATUS_VGEIN 0x0003f000 +#define HSTATUS_HU 0x00000200 +#define HSTATUS_SPVP 0x00000100 +#define HSTATUS_SPV 0x00000080 +#define HSTATUS_GVA 0x00000040 +#define HSTATUS_VSBE 0x00000020 + +#define USTATUS_UIE 0x00000001 +#define USTATUS_UPIE 0x00000010 + +#define MNSTATUS_NMIE 0x00000008 +#define MNSTATUS_MNPP 0x00001800 +#define MNSTATUS_MNPV 0x00000080 + +#define DCSR_XDEBUGVER (3U<<30) +#define DCSR_NDRESET (1<<29) +#define DCSR_FULLRESET (1<<28) +#define DCSR_EBREAKM (1<<15) +#define DCSR_EBREAKH (1<<14) +#define DCSR_EBREAKS (1<<13) +#define DCSR_EBREAKU (1<<12) +#define DCSR_STOPCYCLE (1<<10) +#define DCSR_STOPTIME (1<<9) +#define DCSR_CAUSE (7<<6) +#define DCSR_DEBUGINT (1<<5) +#define DCSR_HALT (1<<3) +#define DCSR_STEP (1<<2) +#define DCSR_PRV (3<<0) + +#define DCSR_CAUSE_NONE 0 +#define DCSR_CAUSE_SWBP 1 +#define DCSR_CAUSE_HWBP 2 +#define DCSR_CAUSE_DEBUGINT 3 +#define DCSR_CAUSE_STEP 4 +#define DCSR_CAUSE_HALT 5 +#define DCSR_CAUSE_GROUP 6 + +#define MCONTROL_TYPE(xlen) (0xfULL<<((xlen)-4)) +#define MCONTROL_DMODE(xlen) (1ULL<<((xlen)-5)) +#define MCONTROL_MASKMAX(xlen) (0x3fULL<<((xlen)-11)) + +#define MCONTROL_SELECT (1<<19) +#define MCONTROL_TIMING (1<<18) +#define MCONTROL_ACTION (0x3f<<12) +#define MCONTROL_CHAIN (1<<11) +#define MCONTROL_MATCH (0xf<<7) +#define MCONTROL_M (1<<6) +#define MCONTROL_H (1<<5) +#define MCONTROL_S (1<<4) +#define MCONTROL_U (1<<3) +#define MCONTROL_EXECUTE (1<<2) +#define MCONTROL_STORE (1<<1) +#define MCONTROL_LOAD (1<<0) + +#define MCONTROL_TYPE_NONE 0 +#define MCONTROL_TYPE_MATCH 2 + +#define MCONTROL_ACTION_DEBUG_EXCEPTION 0 +#define MCONTROL_ACTION_DEBUG_MODE 1 +#define MCONTROL_ACTION_TRACE_START 2 +#define MCONTROL_ACTION_TRACE_STOP 3 +#define MCONTROL_ACTION_TRACE_EMIT 4 + +#define MCONTROL_MATCH_EQUAL 0 +#define MCONTROL_MATCH_NAPOT 1 +#define MCONTROL_MATCH_GE 2 +#define MCONTROL_MATCH_LT 3 +#define MCONTROL_MATCH_MASK_LOW 4 +#define MCONTROL_MATCH_MASK_HIGH 5 + +#define MIP_USIP (1 << IRQ_U_SOFT) +#define MIP_SSIP (1 << IRQ_S_SOFT) +#define MIP_VSSIP (1 << IRQ_VS_SOFT) +#define MIP_MSIP (1 << IRQ_M_SOFT) +#define MIP_UTIP (1 << IRQ_U_TIMER) +#define MIP_STIP (1 << IRQ_S_TIMER) +#define MIP_VSTIP (1 << IRQ_VS_TIMER) +#define MIP_MTIP (1 << IRQ_M_TIMER) +#define MIP_UEIP (1 << IRQ_U_EXT) +#define MIP_SEIP (1 << IRQ_S_EXT) +#define MIP_VSEIP (1 << IRQ_VS_EXT) +#define MIP_MEIP (1 << IRQ_M_EXT) +#define MIP_SGEIP (1 << IRQ_S_GEXT) +#define MIP_LCOFIP (1 << IRQ_LCOF) + +#define MIP_S_MASK (MIP_SSIP | MIP_STIP | MIP_SEIP) +#define MIP_VS_MASK (MIP_VSSIP | MIP_VSTIP | MIP_VSEIP) +#define MIP_HS_MASK (MIP_VS_MASK | MIP_SGEIP) + +#define MIDELEG_FORCED_MASK MIP_HS_MASK + +#define SIP_SSIP MIP_SSIP +#define SIP_STIP MIP_STIP + +#define MENVCFG_FIOM 0x00000001 +#define MENVCFG_CBIE 0x00000030 +#define MENVCFG_CBCFE 0x00000040 +#define MENVCFG_CBZE 0x00000080 +#define MENVCFG_HADE 0x2000000000000000 +#define MENVCFG_PBMTE 0x4000000000000000 +#define MENVCFG_STCE 0x8000000000000000 + +#define MENVCFGH_HADE 0x20000000 +#define MENVCFGH_PBMTE 0x40000000 +#define MENVCFGH_STCE 0x80000000 + +#define MSTATEEN0_CS 0x00000001 +#define MSTATEEN0_FCSR 0x00000002 +#define MSTATEEN0_JVT 0x00000004 +#define MSTATEEN0_HCONTEXT 0x0200000000000000 +#define MSTATEEN0_HENVCFG 0x4000000000000000 +#define MSTATEEN_HSTATEEN 0x8000000000000000 + +#define MSTATEEN0H_HCONTEXT 0x02000000 +#define MSTATEEN0H_HENVCFG 0x40000000 +#define MSTATEENH_HSTATEEN 0x80000000 + +#define MHPMEVENT_VUINH 0x0400000000000000 +#define MHPMEVENT_VSINH 0x0800000000000000 +#define MHPMEVENT_UINH 0x1000000000000000 +#define MHPMEVENT_SINH 0x2000000000000000 +#define MHPMEVENT_MINH 0x4000000000000000 +#define MHPMEVENT_OF 0x8000000000000000 + +#define MHPMEVENTH_VUINH 0x04000000 +#define MHPMEVENTH_VSINH 0x08000000 +#define MHPMEVENTH_UINH 0x10000000 +#define MHPMEVENTH_SINH 0x20000000 +#define MHPMEVENTH_MINH 0x40000000 +#define MHPMEVENTH_OF 0x80000000 + +#define HENVCFG_FIOM 0x00000001 +#define HENVCFG_CBIE 0x00000030 +#define HENVCFG_CBCFE 0x00000040 +#define HENVCFG_CBZE 0x00000080 +#define HENVCFG_HADE 0x2000000000000000 +#define HENVCFG_PBMTE 0x4000000000000000 +#define HENVCFG_STCE 0x8000000000000000 + +#define HENVCFGH_HADE 0x20000000 +#define HENVCFGH_PBMTE 0x40000000 +#define HENVCFGH_STCE 0x80000000 + +#define HSTATEEN0_CS 0x00000001 +#define HSTATEEN0_FCSR 0x00000002 +#define HSTATEEN0_JVT 0x00000004 +#define HSTATEEN0_SCONTEXT 0x0200000000000000 +#define HSTATEEN0_SENVCFG 0x4000000000000000 +#define HSTATEEN_SSTATEEN 0x8000000000000000 + +#define HSTATEEN0H_SCONTEXT 0x02000000 +#define HSTATEEN0H_SENVCFG 0x40000000 +#define HSTATEENH_SSTATEEN 0x80000000 + +#define SENVCFG_FIOM 0x00000001 +#define SENVCFG_CBIE 0x00000030 +#define SENVCFG_CBCFE 0x00000040 +#define SENVCFG_CBZE 0x00000080 + +#define SSTATEEN0_CS 0x00000001 +#define SSTATEEN0_FCSR 0x00000002 +#define SSTATEEN0_JVT 0x00000004 + +#define MSECCFG_MML 0x00000001 +#define MSECCFG_MMWP 0x00000002 +#define MSECCFG_RLB 0x00000004 +#define MSECCFG_USEED 0x00000100 +#define MSECCFG_SSEED 0x00000200 + +/* jvt fields */ +#define JVT_MODE 0x3F +#define JVT_BASE (~0x3F) + +#define PRV_U 0 +#define PRV_S 1 +#define PRV_M 3 + +#define PRV_HS (PRV_S + 1) + +#define SATP32_MODE 0x80000000 +#define SATP32_ASID 0x7FC00000 +#define SATP32_PPN 0x003FFFFF +#define SATP64_MODE 0xF000000000000000 +#define SATP64_ASID 0x0FFFF00000000000 +#define SATP64_PPN 0x00000FFFFFFFFFFF + +#define SATP_MODE_OFF 0 +#define SATP_MODE_SV32 1 +#define SATP_MODE_SV39 8 +#define SATP_MODE_SV48 9 +#define SATP_MODE_SV57 10 +#define SATP_MODE_SV64 11 + +#define HGATP32_MODE 0x80000000 +#define HGATP32_VMID 0x1FC00000 +#define HGATP32_PPN 0x003FFFFF + +#define HGATP64_MODE 0xF000000000000000 +#define HGATP64_VMID 0x03FFF00000000000 +#define HGATP64_PPN 0x00000FFFFFFFFFFF + +#define HGATP_MODE_OFF 0 +#define HGATP_MODE_SV32X4 1 +#define HGATP_MODE_SV39X4 8 +#define HGATP_MODE_SV48X4 9 +#define HGATP_MODE_SV57X4 10 + +#define PMP_R 0x01 +#define PMP_W 0x02 +#define PMP_X 0x04 +#define PMP_A 0x18 +#define PMP_L 0x80 +#define PMP_SHIFT 2 + +#define PMP_TOR 0x08 +#define PMP_NA4 0x10 +#define PMP_NAPOT 0x18 + +#define IRQ_U_SOFT 0 +#define IRQ_S_SOFT 1 +#define IRQ_VS_SOFT 2 +#define IRQ_M_SOFT 3 +#define IRQ_U_TIMER 4 +#define IRQ_S_TIMER 5 +#define IRQ_VS_TIMER 6 +#define IRQ_M_TIMER 7 +#define IRQ_U_EXT 8 +#define IRQ_S_EXT 9 +#define IRQ_VS_EXT 10 +#define IRQ_M_EXT 11 +#define IRQ_S_GEXT 12 +#define IRQ_COP 12 +#define IRQ_LCOF 13 + +#define DEFAULT_RSTVEC 0x00001000 +#define CLINT_BASE 0x02000000 +#define CLINT_SIZE 0x000c0000 +#define EXT_IO_BASE 0x40000000 +#define DRAM_BASE 0x80000000 + +/* page table entry (PTE) fields */ +#define PTE_V 0x001 /* Valid */ +#define PTE_R 0x002 /* Read */ +#define PTE_W 0x004 /* Write */ +#define PTE_X 0x008 /* Execute */ +#define PTE_U 0x010 /* User */ +#define PTE_G 0x020 /* Global */ +#define PTE_A 0x040 /* Accessed */ +#define PTE_D 0x080 /* Dirty */ +#define PTE_SOFT 0x300 /* Reserved for Software */ +#define PTE_RSVD 0x1FC0000000000000 /* Reserved for future standard use */ +#define PTE_PBMT 0x6000000000000000 /* Svpbmt: Page-based memory types */ +#define PTE_N 0x8000000000000000 /* Svnapot: NAPOT translation contiguity */ +#define PTE_ATTR 0xFFC0000000000000 /* All attributes and reserved bits */ + +#define PTE_PPN_SHIFT 10 + +#define PTE_TABLE(PTE) (((PTE) & (PTE_V | PTE_R | PTE_W | PTE_X)) == PTE_V) + +#ifdef __riscv + +#if __riscv_xlen == 64 +# define MSTATUS_SD MSTATUS64_SD +# define SSTATUS_SD SSTATUS64_SD +# define RISCV_PGLEVEL_BITS 9 +# define SATP_MODE SATP64_MODE +#else +# define MSTATUS_SD MSTATUS32_SD +# define SSTATUS_SD SSTATUS32_SD +# define RISCV_PGLEVEL_BITS 10 +# define SATP_MODE SATP32_MODE +#endif +#define RISCV_PGSHIFT 12 +#define RISCV_PGSIZE (1 << RISCV_PGSHIFT) + +#ifndef __ASSEMBLER__ + +#ifdef __GNUC__ + +#define read_csr(reg) ({ unsigned long __tmp; \ + asm volatile ("csrr %0, " #reg : "=r"(__tmp)); \ + __tmp; }) + +#define write_csr(reg, val) ({ \ + asm volatile ("csrw " #reg ", %0" :: "rK"(val)); }) + +#define swap_csr(reg, val) ({ unsigned long __tmp; \ + asm volatile ("csrrw %0, " #reg ", %1" : "=r"(__tmp) : "rK"(val)); \ + __tmp; }) + +#define set_csr(reg, bit) ({ unsigned long __tmp; \ + asm volatile ("csrrs %0, " #reg ", %1" : "=r"(__tmp) : "rK"(bit)); \ + __tmp; }) + +#define clear_csr(reg, bit) ({ unsigned long __tmp; \ + asm volatile ("csrrc %0, " #reg ", %1" : "=r"(__tmp) : "rK"(bit)); \ + __tmp; }) + +#define rdtime() read_csr(time) +#define rdcycle() read_csr(cycle) +#define rdinstret() read_csr(instret) + +#endif + +#endif + +#endif + +#endif + +/* Automatically generated by parse_opcodes. */ +#ifndef RISCV_ENCODING_H +#define RISCV_ENCODING_H +#define MATCH_ADD 0x33 +#define MASK_ADD 0xfe00707f +#define MATCH_ADD16 0x40000077 +#define MASK_ADD16 0xfe00707f +#define MATCH_ADD32 0x40002077 +#define MASK_ADD32 0xfe00707f +#define MATCH_ADD64 0xc0001077 +#define MASK_ADD64 0xfe00707f +#define MATCH_ADD8 0x48000077 +#define MASK_ADD8 0xfe00707f +#define MATCH_ADD_UW 0x800003b +#define MASK_ADD_UW 0xfe00707f +#define MATCH_ADDI 0x13 +#define MASK_ADDI 0x707f +#define MATCH_ADDIW 0x1b +#define MASK_ADDIW 0x707f +#define MATCH_ADDW 0x3b +#define MASK_ADDW 0xfe00707f +#define MATCH_AES32DSI 0x2a000033 +#define MASK_AES32DSI 0x3e00707f +#define MATCH_AES32DSMI 0x2e000033 +#define MASK_AES32DSMI 0x3e00707f +#define MATCH_AES32ESI 0x22000033 +#define MASK_AES32ESI 0x3e00707f +#define MATCH_AES32ESMI 0x26000033 +#define MASK_AES32ESMI 0x3e00707f +#define MATCH_AES64DS 0x3a000033 +#define MASK_AES64DS 0xfe00707f +#define MATCH_AES64DSM 0x3e000033 +#define MASK_AES64DSM 0xfe00707f +#define MATCH_AES64ES 0x32000033 +#define MASK_AES64ES 0xfe00707f +#define MATCH_AES64ESM 0x36000033 +#define MASK_AES64ESM 0xfe00707f +#define MATCH_AES64IM 0x30001013 +#define MASK_AES64IM 0xfff0707f +#define MATCH_AES64KS1I 0x31001013 +#define MASK_AES64KS1I 0xff00707f +#define MATCH_AES64KS2 0x7e000033 +#define MASK_AES64KS2 0xfe00707f +#define MATCH_AMOADD_D 0x302f +#define MASK_AMOADD_D 0xf800707f +#define MATCH_AMOADD_W 0x202f +#define MASK_AMOADD_W 0xf800707f +#define MATCH_AMOAND_D 0x6000302f +#define MASK_AMOAND_D 0xf800707f +#define MATCH_AMOAND_W 0x6000202f +#define MASK_AMOAND_W 0xf800707f +#define MATCH_AMOMAX_D 0xa000302f +#define MASK_AMOMAX_D 0xf800707f +#define MATCH_AMOMAX_W 0xa000202f +#define MASK_AMOMAX_W 0xf800707f +#define MATCH_AMOMAXU_D 0xe000302f +#define MASK_AMOMAXU_D 0xf800707f +#define MATCH_AMOMAXU_W 0xe000202f +#define MASK_AMOMAXU_W 0xf800707f +#define MATCH_AMOMIN_D 0x8000302f +#define MASK_AMOMIN_D 0xf800707f +#define MATCH_AMOMIN_W 0x8000202f +#define MASK_AMOMIN_W 0xf800707f +#define MATCH_AMOMINU_D 0xc000302f +#define MASK_AMOMINU_D 0xf800707f +#define MATCH_AMOMINU_W 0xc000202f +#define MASK_AMOMINU_W 0xf800707f +#define MATCH_AMOOR_D 0x4000302f +#define MASK_AMOOR_D 0xf800707f +#define MATCH_AMOOR_W 0x4000202f +#define MASK_AMOOR_W 0xf800707f +#define MATCH_AMOSWAP_D 0x800302f +#define MASK_AMOSWAP_D 0xf800707f +#define MATCH_AMOSWAP_W 0x800202f +#define MASK_AMOSWAP_W 0xf800707f +#define MATCH_AMOXOR_D 0x2000302f +#define MASK_AMOXOR_D 0xf800707f +#define MATCH_AMOXOR_W 0x2000202f +#define MASK_AMOXOR_W 0xf800707f +#define MATCH_AND 0x7033 +#define MASK_AND 0xfe00707f +#define MATCH_ANDI 0x7013 +#define MASK_ANDI 0x707f +#define MATCH_ANDN 0x40007033 +#define MASK_ANDN 0xfe00707f +#define MATCH_AUIPC 0x17 +#define MASK_AUIPC 0x7f +#define MATCH_AVE 0xe0000077 +#define MASK_AVE 0xfe00707f +#define MATCH_BCLR 0x48001033 +#define MASK_BCLR 0xfe00707f +#define MATCH_BCLRI 0x48001013 +#define MASK_BCLRI 0xfc00707f +#define MATCH_BCOMPRESS 0x8006033 +#define MASK_BCOMPRESS 0xfe00707f +#define MATCH_BCOMPRESSW 0x800603b +#define MASK_BCOMPRESSW 0xfe00707f +#define MATCH_BDECOMPRESS 0x48006033 +#define MASK_BDECOMPRESS 0xfe00707f +#define MATCH_BDECOMPRESSW 0x4800603b +#define MASK_BDECOMPRESSW 0xfe00707f +#define MATCH_BEQ 0x63 +#define MASK_BEQ 0x707f +#define MATCH_BEXT 0x48005033 +#define MASK_BEXT 0xfe00707f +#define MATCH_BEXTI 0x48005013 +#define MASK_BEXTI 0xfc00707f +#define MATCH_BFP 0x48007033 +#define MASK_BFP 0xfe00707f +#define MATCH_BFPW 0x4800703b +#define MASK_BFPW 0xfe00707f +#define MATCH_BGE 0x5063 +#define MASK_BGE 0x707f +#define MATCH_BGEU 0x7063 +#define MASK_BGEU 0x707f +#define MATCH_BINV 0x68001033 +#define MASK_BINV 0xfe00707f +#define MATCH_BINVI 0x68001013 +#define MASK_BINVI 0xfc00707f +#define MATCH_BLT 0x4063 +#define MASK_BLT 0x707f +#define MATCH_BLTU 0x6063 +#define MASK_BLTU 0x707f +#define MATCH_BMATFLIP 0x60301013 +#define MASK_BMATFLIP 0xfff0707f +#define MATCH_BMATOR 0x8003033 +#define MASK_BMATOR 0xfe00707f +#define MATCH_BMATXOR 0x48003033 +#define MASK_BMATXOR 0xfe00707f +#define MATCH_BNE 0x1063 +#define MASK_BNE 0x707f +#define MATCH_BSET 0x28001033 +#define MASK_BSET 0xfe00707f +#define MATCH_BSETI 0x28001013 +#define MASK_BSETI 0xfc00707f +#define MATCH_C_ADD 0x9002 +#define MASK_C_ADD 0xf003 +#define MATCH_C_ADDI 0x1 +#define MASK_C_ADDI 0xe003 +#define MATCH_C_ADDI16SP 0x6101 +#define MASK_C_ADDI16SP 0xef83 +#define MATCH_C_ADDI4SPN 0x0 +#define MASK_C_ADDI4SPN 0xe003 +#define MATCH_C_ADDIW 0x2001 +#define MASK_C_ADDIW 0xe003 +#define MATCH_C_ADDW 0x9c21 +#define MASK_C_ADDW 0xfc63 +#define MATCH_C_AND 0x8c61 +#define MASK_C_AND 0xfc63 +#define MATCH_C_ANDI 0x8801 +#define MASK_C_ANDI 0xec03 +#define MATCH_C_BEQZ 0xc001 +#define MASK_C_BEQZ 0xe003 +#define MATCH_C_BNEZ 0xe001 +#define MASK_C_BNEZ 0xe003 +#define MATCH_C_EBREAK 0x9002 +#define MASK_C_EBREAK 0xffff +#define MATCH_C_FLD 0x2000 +#define MASK_C_FLD 0xe003 +#define MATCH_C_FLDSP 0x2002 +#define MASK_C_FLDSP 0xe003 +#define MATCH_C_FLW 0x6000 +#define MASK_C_FLW 0xe003 +#define MATCH_C_FLWSP 0x6002 +#define MASK_C_FLWSP 0xe003 +#define MATCH_C_FSD 0xa000 +#define MASK_C_FSD 0xe003 +#define MATCH_C_FSDSP 0xa002 +#define MASK_C_FSDSP 0xe003 +#define MATCH_C_FSW 0xe000 +#define MASK_C_FSW 0xe003 +#define MATCH_C_FSWSP 0xe002 +#define MASK_C_FSWSP 0xe003 +#define MATCH_C_J 0xa001 +#define MASK_C_J 0xe003 +#define MATCH_C_JAL 0x2001 +#define MASK_C_JAL 0xe003 +#define MATCH_C_JALR 0x9002 +#define MASK_C_JALR 0xf07f +#define MATCH_C_JR 0x8002 +#define MASK_C_JR 0xf07f +#define MATCH_C_LBU 0x8000 +#define MASK_C_LBU 0xfc03 +#define MATCH_C_LD 0x6000 +#define MASK_C_LD 0xe003 +#define MATCH_C_LDSP 0x6002 +#define MASK_C_LDSP 0xe003 +#define MATCH_C_LH 0x8440 +#define MASK_C_LH 0xfc43 +#define MATCH_C_LHU 0x8400 +#define MASK_C_LHU 0xfc43 +#define MATCH_C_LI 0x4001 +#define MASK_C_LI 0xe003 +#define MATCH_C_LUI 0x6001 +#define MASK_C_LUI 0xe003 +#define MATCH_C_LW 0x4000 +#define MASK_C_LW 0xe003 +#define MATCH_C_LWSP 0x4002 +#define MASK_C_LWSP 0xe003 +#define MATCH_C_MUL 0x9c41 +#define MASK_C_MUL 0xfc63 +#define MATCH_C_MV 0x8002 +#define MASK_C_MV 0xf003 +#define MATCH_C_NOP 0x1 +#define MASK_C_NOP 0xef83 +#define MATCH_C_NOT 0x9c75 +#define MASK_C_NOT 0xfc7f +#define MATCH_C_OR 0x8c41 +#define MASK_C_OR 0xfc63 +#define MATCH_C_SB 0x8800 +#define MASK_C_SB 0xfc03 +#define MATCH_C_SD 0xe000 +#define MASK_C_SD 0xe003 +#define MATCH_C_SDSP 0xe002 +#define MASK_C_SDSP 0xe003 +#define MATCH_C_SEXT_B 0x9c65 +#define MASK_C_SEXT_B 0xfc7f +#define MATCH_C_SEXT_H 0x9c6d +#define MASK_C_SEXT_H 0xfc7f +#define MATCH_C_SH 0x8c00 +#define MASK_C_SH 0xfc43 +#define MATCH_C_SLLI 0x2 +#define MASK_C_SLLI 0xe003 +#define MATCH_C_SRAI 0x8401 +#define MASK_C_SRAI 0xec03 +#define MATCH_C_SRLI 0x8001 +#define MASK_C_SRLI 0xec03 +#define MATCH_C_SUB 0x8c01 +#define MASK_C_SUB 0xfc63 +#define MATCH_C_SUBW 0x9c01 +#define MASK_C_SUBW 0xfc63 +#define MATCH_C_SW 0xc000 +#define MASK_C_SW 0xe003 +#define MATCH_C_SWSP 0xc002 +#define MASK_C_SWSP 0xe003 +#define MATCH_C_XOR 0x8c21 +#define MASK_C_XOR 0xfc63 +#define MATCH_C_ZEXT_B 0x9c61 +#define MASK_C_ZEXT_B 0xfc7f +#define MATCH_C_ZEXT_H 0x9c69 +#define MASK_C_ZEXT_H 0xfc7f +#define MATCH_C_ZEXT_W 0x9c71 +#define MASK_C_ZEXT_W 0xfc7f +#define MATCH_CBO_CLEAN 0x10200f +#define MASK_CBO_CLEAN 0xfff07fff +#define MATCH_CBO_FLUSH 0x20200f +#define MASK_CBO_FLUSH 0xfff07fff +#define MATCH_CBO_INVAL 0x200f +#define MASK_CBO_INVAL 0xfff07fff +#define MATCH_CBO_ZERO 0x40200f +#define MASK_CBO_ZERO 0xfff07fff +#define MATCH_CLMUL 0xa001033 +#define MASK_CLMUL 0xfe00707f +#define MATCH_CLMULH 0xa003033 +#define MASK_CLMULH 0xfe00707f +#define MATCH_CLMULR 0xa002033 +#define MASK_CLMULR 0xfe00707f +#define MATCH_CLRS16 0xae800077 +#define MASK_CLRS16 0xfff0707f +#define MATCH_CLRS32 0xaf800077 +#define MASK_CLRS32 0xfff0707f +#define MATCH_CLRS8 0xae000077 +#define MASK_CLRS8 0xfff0707f +#define MATCH_CLZ 0x60001013 +#define MASK_CLZ 0xfff0707f +#define MATCH_CLZ16 0xae900077 +#define MASK_CLZ16 0xfff0707f +#define MATCH_CLZ32 0xaf900077 +#define MASK_CLZ32 0xfff0707f +#define MATCH_CLZ8 0xae100077 +#define MASK_CLZ8 0xfff0707f +#define MATCH_CLZW 0x6000101b +#define MASK_CLZW 0xfff0707f +#define MATCH_CM_JALT 0xa002 +#define MASK_CM_JALT 0xfc03 +#define MATCH_CM_MVA01S 0xac62 +#define MASK_CM_MVA01S 0xfc63 +#define MATCH_CM_MVSA01 0xac22 +#define MASK_CM_MVSA01 0xfc63 +#define MATCH_CM_POP 0xba02 +#define MASK_CM_POP 0xff03 +#define MATCH_CM_POPRET 0xbe02 +#define MASK_CM_POPRET 0xff03 +#define MATCH_CM_POPRETZ 0xbc02 +#define MASK_CM_POPRETZ 0xff03 +#define MATCH_CM_PUSH 0xb802 +#define MASK_CM_PUSH 0xff03 +#define MATCH_CMIX 0x6001033 +#define MASK_CMIX 0x600707f +#define MATCH_CMOV 0x6005033 +#define MASK_CMOV 0x600707f +#define MATCH_CMPEQ16 0x4c000077 +#define MASK_CMPEQ16 0xfe00707f +#define MATCH_CMPEQ8 0x4e000077 +#define MASK_CMPEQ8 0xfe00707f +#define MATCH_CPOP 0x60201013 +#define MASK_CPOP 0xfff0707f +#define MATCH_CPOPW 0x6020101b +#define MASK_CPOPW 0xfff0707f +#define MATCH_CRAS16 0x44000077 +#define MASK_CRAS16 0xfe00707f +#define MATCH_CRAS32 0x44002077 +#define MASK_CRAS32 0xfe00707f +#define MATCH_CRC32_B 0x61001013 +#define MASK_CRC32_B 0xfff0707f +#define MATCH_CRC32_D 0x61301013 +#define MASK_CRC32_D 0xfff0707f +#define MATCH_CRC32_H 0x61101013 +#define MASK_CRC32_H 0xfff0707f +#define MATCH_CRC32_W 0x61201013 +#define MASK_CRC32_W 0xfff0707f +#define MATCH_CRC32C_B 0x61801013 +#define MASK_CRC32C_B 0xfff0707f +#define MATCH_CRC32C_D 0x61b01013 +#define MASK_CRC32C_D 0xfff0707f +#define MATCH_CRC32C_H 0x61901013 +#define MASK_CRC32C_H 0xfff0707f +#define MATCH_CRC32C_W 0x61a01013 +#define MASK_CRC32C_W 0xfff0707f +#define MATCH_CRSA16 0x46000077 +#define MASK_CRSA16 0xfe00707f +#define MATCH_CRSA32 0x46002077 +#define MASK_CRSA32 0xfe00707f +#define MATCH_CSRRC 0x3073 +#define MASK_CSRRC 0x707f +#define MATCH_CSRRCI 0x7073 +#define MASK_CSRRCI 0x707f +#define MATCH_CSRRS 0x2073 +#define MASK_CSRRS 0x707f +#define MATCH_CSRRSI 0x6073 +#define MASK_CSRRSI 0x707f +#define MATCH_CSRRW 0x1073 +#define MASK_CSRRW 0x707f +#define MATCH_CSRRWI 0x5073 +#define MASK_CSRRWI 0x707f +#define MATCH_CTZ 0x60101013 +#define MASK_CTZ 0xfff0707f +#define MATCH_CTZW 0x6010101b +#define MASK_CTZW 0xfff0707f +#define MATCH_CZERO_EQZ 0xe005033 +#define MASK_CZERO_EQZ 0xfe00707f +#define MATCH_CZERO_NEZ 0xe007033 +#define MASK_CZERO_NEZ 0xfe00707f +#define MATCH_DIV 0x2004033 +#define MASK_DIV 0xfe00707f +#define MATCH_DIVU 0x2005033 +#define MASK_DIVU 0xfe00707f +#define MATCH_DIVUW 0x200503b +#define MASK_DIVUW 0xfe00707f +#define MATCH_DIVW 0x200403b +#define MASK_DIVW 0xfe00707f +#define MATCH_DRET 0x7b200073 +#define MASK_DRET 0xffffffff +#define MATCH_EBREAK 0x100073 +#define MASK_EBREAK 0xffffffff +#define MATCH_ECALL 0x73 +#define MASK_ECALL 0xffffffff +#define MATCH_FADD_D 0x2000053 +#define MASK_FADD_D 0xfe00007f +#define MATCH_FADD_H 0x4000053 +#define MASK_FADD_H 0xfe00007f +#define MATCH_FADD_Q 0x6000053 +#define MASK_FADD_Q 0xfe00007f +#define MATCH_FADD_S 0x53 +#define MASK_FADD_S 0xfe00007f +#define MATCH_FCLASS_D 0xe2001053 +#define MASK_FCLASS_D 0xfff0707f +#define MATCH_FCLASS_H 0xe4001053 +#define MASK_FCLASS_H 0xfff0707f +#define MATCH_FCLASS_Q 0xe6001053 +#define MASK_FCLASS_Q 0xfff0707f +#define MATCH_FCLASS_S 0xe0001053 +#define MASK_FCLASS_S 0xfff0707f +#define MATCH_FCVT_D_H 0x42200053 +#define MASK_FCVT_D_H 0xfff0007f +#define MATCH_FCVT_D_L 0xd2200053 +#define MASK_FCVT_D_L 0xfff0007f +#define MATCH_FCVT_D_LU 0xd2300053 +#define MASK_FCVT_D_LU 0xfff0007f +#define MATCH_FCVT_D_Q 0x42300053 +#define MASK_FCVT_D_Q 0xfff0007f +#define MATCH_FCVT_D_S 0x42000053 +#define MASK_FCVT_D_S 0xfff0007f +#define MATCH_FCVT_D_W 0xd2000053 +#define MASK_FCVT_D_W 0xfff0007f +#define MATCH_FCVT_D_WU 0xd2100053 +#define MASK_FCVT_D_WU 0xfff0007f +#define MATCH_FCVT_H_D 0x44100053 +#define MASK_FCVT_H_D 0xfff0007f +#define MATCH_FCVT_H_L 0xd4200053 +#define MASK_FCVT_H_L 0xfff0007f +#define MATCH_FCVT_H_LU 0xd4300053 +#define MASK_FCVT_H_LU 0xfff0007f +#define MATCH_FCVT_H_Q 0x44300053 +#define MASK_FCVT_H_Q 0xfff0007f +#define MATCH_FCVT_H_S 0x44000053 +#define MASK_FCVT_H_S 0xfff0007f +#define MATCH_FCVT_H_W 0xd4000053 +#define MASK_FCVT_H_W 0xfff0007f +#define MATCH_FCVT_H_WU 0xd4100053 +#define MASK_FCVT_H_WU 0xfff0007f +#define MATCH_FCVT_L_D 0xc2200053 +#define MASK_FCVT_L_D 0xfff0007f +#define MATCH_FCVT_L_H 0xc4200053 +#define MASK_FCVT_L_H 0xfff0007f +#define MATCH_FCVT_L_Q 0xc6200053 +#define MASK_FCVT_L_Q 0xfff0007f +#define MATCH_FCVT_L_S 0xc0200053 +#define MASK_FCVT_L_S 0xfff0007f +#define MATCH_FCVT_LU_D 0xc2300053 +#define MASK_FCVT_LU_D 0xfff0007f +#define MATCH_FCVT_LU_H 0xc4300053 +#define MASK_FCVT_LU_H 0xfff0007f +#define MATCH_FCVT_LU_Q 0xc6300053 +#define MASK_FCVT_LU_Q 0xfff0007f +#define MATCH_FCVT_LU_S 0xc0300053 +#define MASK_FCVT_LU_S 0xfff0007f +#define MATCH_FCVT_Q_D 0x46100053 +#define MASK_FCVT_Q_D 0xfff0007f +#define MATCH_FCVT_Q_H 0x46200053 +#define MASK_FCVT_Q_H 0xfff0007f +#define MATCH_FCVT_Q_L 0xd6200053 +#define MASK_FCVT_Q_L 0xfff0007f +#define MATCH_FCVT_Q_LU 0xd6300053 +#define MASK_FCVT_Q_LU 0xfff0007f +#define MATCH_FCVT_Q_S 0x46000053 +#define MASK_FCVT_Q_S 0xfff0007f +#define MATCH_FCVT_Q_W 0xd6000053 +#define MASK_FCVT_Q_W 0xfff0007f +#define MATCH_FCVT_Q_WU 0xd6100053 +#define MASK_FCVT_Q_WU 0xfff0007f +#define MATCH_FCVT_S_D 0x40100053 +#define MASK_FCVT_S_D 0xfff0007f +#define MATCH_FCVT_S_H 0x40200053 +#define MASK_FCVT_S_H 0xfff0007f +#define MATCH_FCVT_S_L 0xd0200053 +#define MASK_FCVT_S_L 0xfff0007f +#define MATCH_FCVT_S_LU 0xd0300053 +#define MASK_FCVT_S_LU 0xfff0007f +#define MATCH_FCVT_S_Q 0x40300053 +#define MASK_FCVT_S_Q 0xfff0007f +#define MATCH_FCVT_S_W 0xd0000053 +#define MASK_FCVT_S_W 0xfff0007f +#define MATCH_FCVT_S_WU 0xd0100053 +#define MASK_FCVT_S_WU 0xfff0007f +#define MATCH_FCVT_W_D 0xc2000053 +#define MASK_FCVT_W_D 0xfff0007f +#define MATCH_FCVT_W_H 0xc4000053 +#define MASK_FCVT_W_H 0xfff0007f +#define MATCH_FCVT_W_Q 0xc6000053 +#define MASK_FCVT_W_Q 0xfff0007f +#define MATCH_FCVT_W_S 0xc0000053 +#define MASK_FCVT_W_S 0xfff0007f +#define MATCH_FCVT_WU_D 0xc2100053 +#define MASK_FCVT_WU_D 0xfff0007f +#define MATCH_FCVT_WU_H 0xc4100053 +#define MASK_FCVT_WU_H 0xfff0007f +#define MATCH_FCVT_WU_Q 0xc6100053 +#define MASK_FCVT_WU_Q 0xfff0007f +#define MATCH_FCVT_WU_S 0xc0100053 +#define MASK_FCVT_WU_S 0xfff0007f +#define MATCH_FDIV_D 0x1a000053 +#define MASK_FDIV_D 0xfe00007f +#define MATCH_FDIV_H 0x1c000053 +#define MASK_FDIV_H 0xfe00007f +#define MATCH_FDIV_Q 0x1e000053 +#define MASK_FDIV_Q 0xfe00007f +#define MATCH_FDIV_S 0x18000053 +#define MASK_FDIV_S 0xfe00007f +#define MATCH_FENCE 0xf +#define MASK_FENCE 0x707f +#define MATCH_FENCE_I 0x100f +#define MASK_FENCE_I 0x707f +#define MATCH_FEQ_D 0xa2002053 +#define MASK_FEQ_D 0xfe00707f +#define MATCH_FEQ_H 0xa4002053 +#define MASK_FEQ_H 0xfe00707f +#define MATCH_FEQ_Q 0xa6002053 +#define MASK_FEQ_Q 0xfe00707f +#define MATCH_FEQ_S 0xa0002053 +#define MASK_FEQ_S 0xfe00707f +#define MATCH_FLD 0x3007 +#define MASK_FLD 0x707f +#define MATCH_FLE_D 0xa2000053 +#define MASK_FLE_D 0xfe00707f +#define MATCH_FLE_H 0xa4000053 +#define MASK_FLE_H 0xfe00707f +#define MATCH_FLE_Q 0xa6000053 +#define MASK_FLE_Q 0xfe00707f +#define MATCH_FLE_S 0xa0000053 +#define MASK_FLE_S 0xfe00707f +#define MATCH_FLH 0x1007 +#define MASK_FLH 0x707f +#define MATCH_FLQ 0x4007 +#define MASK_FLQ 0x707f +#define MATCH_FLT_D 0xa2001053 +#define MASK_FLT_D 0xfe00707f +#define MATCH_FLT_H 0xa4001053 +#define MASK_FLT_H 0xfe00707f +#define MATCH_FLT_Q 0xa6001053 +#define MASK_FLT_Q 0xfe00707f +#define MATCH_FLT_S 0xa0001053 +#define MASK_FLT_S 0xfe00707f +#define MATCH_FLW 0x2007 +#define MASK_FLW 0x707f +#define MATCH_FMADD_D 0x2000043 +#define MASK_FMADD_D 0x600007f +#define MATCH_FMADD_H 0x4000043 +#define MASK_FMADD_H 0x600007f +#define MATCH_FMADD_Q 0x6000043 +#define MASK_FMADD_Q 0x600007f +#define MATCH_FMADD_S 0x43 +#define MASK_FMADD_S 0x600007f +#define MATCH_FMAX_D 0x2a001053 +#define MASK_FMAX_D 0xfe00707f +#define MATCH_FMAX_H 0x2c001053 +#define MASK_FMAX_H 0xfe00707f +#define MATCH_FMAX_Q 0x2e001053 +#define MASK_FMAX_Q 0xfe00707f +#define MATCH_FMAX_S 0x28001053 +#define MASK_FMAX_S 0xfe00707f +#define MATCH_FMIN_D 0x2a000053 +#define MASK_FMIN_D 0xfe00707f +#define MATCH_FMIN_H 0x2c000053 +#define MASK_FMIN_H 0xfe00707f +#define MATCH_FMIN_Q 0x2e000053 +#define MASK_FMIN_Q 0xfe00707f +#define MATCH_FMIN_S 0x28000053 +#define MASK_FMIN_S 0xfe00707f +#define MATCH_FMSUB_D 0x2000047 +#define MASK_FMSUB_D 0x600007f +#define MATCH_FMSUB_H 0x4000047 +#define MASK_FMSUB_H 0x600007f +#define MATCH_FMSUB_Q 0x6000047 +#define MASK_FMSUB_Q 0x600007f +#define MATCH_FMSUB_S 0x47 +#define MASK_FMSUB_S 0x600007f +#define MATCH_FMUL_D 0x12000053 +#define MASK_FMUL_D 0xfe00007f +#define MATCH_FMUL_H 0x14000053 +#define MASK_FMUL_H 0xfe00007f +#define MATCH_FMUL_Q 0x16000053 +#define MASK_FMUL_Q 0xfe00007f +#define MATCH_FMUL_S 0x10000053 +#define MASK_FMUL_S 0xfe00007f +#define MATCH_FMV_D_X 0xf2000053 +#define MASK_FMV_D_X 0xfff0707f +#define MATCH_FMV_H_X 0xf4000053 +#define MASK_FMV_H_X 0xfff0707f +#define MATCH_FMV_W_X 0xf0000053 +#define MASK_FMV_W_X 0xfff0707f +#define MATCH_FMV_X_D 0xe2000053 +#define MASK_FMV_X_D 0xfff0707f +#define MATCH_FMV_X_H 0xe4000053 +#define MASK_FMV_X_H 0xfff0707f +#define MATCH_FMV_X_W 0xe0000053 +#define MASK_FMV_X_W 0xfff0707f +#define MATCH_FNMADD_D 0x200004f +#define MASK_FNMADD_D 0x600007f +#define MATCH_FNMADD_H 0x400004f +#define MASK_FNMADD_H 0x600007f +#define MATCH_FNMADD_Q 0x600004f +#define MASK_FNMADD_Q 0x600007f +#define MATCH_FNMADD_S 0x4f +#define MASK_FNMADD_S 0x600007f +#define MATCH_FNMSUB_D 0x200004b +#define MASK_FNMSUB_D 0x600007f +#define MATCH_FNMSUB_H 0x400004b +#define MASK_FNMSUB_H 0x600007f +#define MATCH_FNMSUB_Q 0x600004b +#define MASK_FNMSUB_Q 0x600007f +#define MATCH_FNMSUB_S 0x4b +#define MASK_FNMSUB_S 0x600007f +#define MATCH_FSD 0x3027 +#define MASK_FSD 0x707f +#define MATCH_FSGNJ_D 0x22000053 +#define MASK_FSGNJ_D 0xfe00707f +#define MATCH_FSGNJ_H 0x24000053 +#define MASK_FSGNJ_H 0xfe00707f +#define MATCH_FSGNJ_Q 0x26000053 +#define MASK_FSGNJ_Q 0xfe00707f +#define MATCH_FSGNJ_S 0x20000053 +#define MASK_FSGNJ_S 0xfe00707f +#define MATCH_FSGNJN_D 0x22001053 +#define MASK_FSGNJN_D 0xfe00707f +#define MATCH_FSGNJN_H 0x24001053 +#define MASK_FSGNJN_H 0xfe00707f +#define MATCH_FSGNJN_Q 0x26001053 +#define MASK_FSGNJN_Q 0xfe00707f +#define MATCH_FSGNJN_S 0x20001053 +#define MASK_FSGNJN_S 0xfe00707f +#define MATCH_FSGNJX_D 0x22002053 +#define MASK_FSGNJX_D 0xfe00707f +#define MATCH_FSGNJX_H 0x24002053 +#define MASK_FSGNJX_H 0xfe00707f +#define MATCH_FSGNJX_Q 0x26002053 +#define MASK_FSGNJX_Q 0xfe00707f +#define MATCH_FSGNJX_S 0x20002053 +#define MASK_FSGNJX_S 0xfe00707f +#define MATCH_FSH 0x1027 +#define MASK_FSH 0x707f +#define MATCH_FSL 0x4001033 +#define MASK_FSL 0x600707f +#define MATCH_FSLW 0x400103b +#define MASK_FSLW 0x600707f +#define MATCH_FSQ 0x4027 +#define MASK_FSQ 0x707f +#define MATCH_FSQRT_D 0x5a000053 +#define MASK_FSQRT_D 0xfff0007f +#define MATCH_FSQRT_H 0x5c000053 +#define MASK_FSQRT_H 0xfff0007f +#define MATCH_FSQRT_Q 0x5e000053 +#define MASK_FSQRT_Q 0xfff0007f +#define MATCH_FSQRT_S 0x58000053 +#define MASK_FSQRT_S 0xfff0007f +#define MATCH_FSR 0x4005033 +#define MASK_FSR 0x600707f +#define MATCH_FSRI 0x4005013 +#define MASK_FSRI 0x400707f +#define MATCH_FSRIW 0x400501b +#define MASK_FSRIW 0x600707f +#define MATCH_FSRW 0x400503b +#define MASK_FSRW 0x600707f +#define MATCH_FSUB_D 0xa000053 +#define MASK_FSUB_D 0xfe00007f +#define MATCH_FSUB_H 0xc000053 +#define MASK_FSUB_H 0xfe00007f +#define MATCH_FSUB_Q 0xe000053 +#define MASK_FSUB_Q 0xfe00007f +#define MATCH_FSUB_S 0x8000053 +#define MASK_FSUB_S 0xfe00007f +#define MATCH_FSW 0x2027 +#define MASK_FSW 0x707f +#define MATCH_GORC 0x28005033 +#define MASK_GORC 0xfe00707f +#define MATCH_GORCI 0x28005013 +#define MASK_GORCI 0xfc00707f +#define MATCH_GORCIW 0x2800501b +#define MASK_GORCIW 0xfe00707f +#define MATCH_GORCW 0x2800503b +#define MASK_GORCW 0xfe00707f +#define MATCH_GREV 0x68005033 +#define MASK_GREV 0xfe00707f +#define MATCH_GREVI 0x68005013 +#define MASK_GREVI 0xfc00707f +#define MATCH_GREVIW 0x6800501b +#define MASK_GREVIW 0xfe00707f +#define MATCH_GREVW 0x6800503b +#define MASK_GREVW 0xfe00707f +#define MATCH_HFENCE_GVMA 0x62000073 +#define MASK_HFENCE_GVMA 0xfe007fff +#define MATCH_HFENCE_VVMA 0x22000073 +#define MASK_HFENCE_VVMA 0xfe007fff +#define MATCH_HINVAL_GVMA 0x66000073 +#define MASK_HINVAL_GVMA 0xfe007fff +#define MATCH_HINVAL_VVMA 0x26000073 +#define MASK_HINVAL_VVMA 0xfe007fff +#define MATCH_HLV_B 0x60004073 +#define MASK_HLV_B 0xfff0707f +#define MATCH_HLV_BU 0x60104073 +#define MASK_HLV_BU 0xfff0707f +#define MATCH_HLV_D 0x6c004073 +#define MASK_HLV_D 0xfff0707f +#define MATCH_HLV_H 0x64004073 +#define MASK_HLV_H 0xfff0707f +#define MATCH_HLV_HU 0x64104073 +#define MASK_HLV_HU 0xfff0707f +#define MATCH_HLV_W 0x68004073 +#define MASK_HLV_W 0xfff0707f +#define MATCH_HLV_WU 0x68104073 +#define MASK_HLV_WU 0xfff0707f +#define MATCH_HLVX_HU 0x64304073 +#define MASK_HLVX_HU 0xfff0707f +#define MATCH_HLVX_WU 0x68304073 +#define MASK_HLVX_WU 0xfff0707f +#define MATCH_HSV_B 0x62004073 +#define MASK_HSV_B 0xfe007fff +#define MATCH_HSV_D 0x6e004073 +#define MASK_HSV_D 0xfe007fff +#define MATCH_HSV_H 0x66004073 +#define MASK_HSV_H 0xfe007fff +#define MATCH_HSV_W 0x6a004073 +#define MASK_HSV_W 0xfe007fff +#define MATCH_INSB 0xac000077 +#define MASK_INSB 0xff80707f +#define MATCH_JAL 0x6f +#define MASK_JAL 0x7f +#define MATCH_JALR 0x67 +#define MASK_JALR 0x707f +#define MATCH_KABS16 0xad100077 +#define MASK_KABS16 0xfff0707f +#define MATCH_KABS32 0xad200077 +#define MASK_KABS32 0xfff0707f +#define MATCH_KABS8 0xad000077 +#define MASK_KABS8 0xfff0707f +#define MATCH_KABSW 0xad400077 +#define MASK_KABSW 0xfff0707f +#define MATCH_KADD16 0x10000077 +#define MASK_KADD16 0xfe00707f +#define MATCH_KADD32 0x10002077 +#define MASK_KADD32 0xfe00707f +#define MATCH_KADD64 0x90001077 +#define MASK_KADD64 0xfe00707f +#define MATCH_KADD8 0x18000077 +#define MASK_KADD8 0xfe00707f +#define MATCH_KADDH 0x4001077 +#define MASK_KADDH 0xfe00707f +#define MATCH_KADDW 0x1077 +#define MASK_KADDW 0xfe00707f +#define MATCH_KCRAS16 0x14000077 +#define MASK_KCRAS16 0xfe00707f +#define MATCH_KCRAS32 0x14002077 +#define MASK_KCRAS32 0xfe00707f +#define MATCH_KCRSA16 0x16000077 +#define MASK_KCRSA16 0xfe00707f +#define MATCH_KCRSA32 0x16002077 +#define MASK_KCRSA32 0xfe00707f +#define MATCH_KDMABB 0xd2001077 +#define MASK_KDMABB 0xfe00707f +#define MATCH_KDMABB16 0xd8001077 +#define MASK_KDMABB16 0xfe00707f +#define MATCH_KDMABT 0xe2001077 +#define MASK_KDMABT 0xfe00707f +#define MATCH_KDMABT16 0xe8001077 +#define MASK_KDMABT16 0xfe00707f +#define MATCH_KDMATT 0xf2001077 +#define MASK_KDMATT 0xfe00707f +#define MATCH_KDMATT16 0xf8001077 +#define MASK_KDMATT16 0xfe00707f +#define MATCH_KDMBB 0xa001077 +#define MASK_KDMBB 0xfe00707f +#define MATCH_KDMBB16 0xda001077 +#define MASK_KDMBB16 0xfe00707f +#define MATCH_KDMBT 0x1a001077 +#define MASK_KDMBT 0xfe00707f +#define MATCH_KDMBT16 0xea001077 +#define MASK_KDMBT16 0xfe00707f +#define MATCH_KDMTT 0x2a001077 +#define MASK_KDMTT 0xfe00707f +#define MATCH_KDMTT16 0xfa001077 +#define MASK_KDMTT16 0xfe00707f +#define MATCH_KHM16 0x86000077 +#define MASK_KHM16 0xfe00707f +#define MATCH_KHM8 0x8e000077 +#define MASK_KHM8 0xfe00707f +#define MATCH_KHMBB 0xc001077 +#define MASK_KHMBB 0xfe00707f +#define MATCH_KHMBB16 0xdc001077 +#define MASK_KHMBB16 0xfe00707f +#define MATCH_KHMBT 0x1c001077 +#define MASK_KHMBT 0xfe00707f +#define MATCH_KHMBT16 0xec001077 +#define MASK_KHMBT16 0xfe00707f +#define MATCH_KHMTT 0x2c001077 +#define MASK_KHMTT 0xfe00707f +#define MATCH_KHMTT16 0xfc001077 +#define MASK_KHMTT16 0xfe00707f +#define MATCH_KHMX16 0x96000077 +#define MASK_KHMX16 0xfe00707f +#define MATCH_KHMX8 0x9e000077 +#define MASK_KHMX8 0xfe00707f +#define MATCH_KMABB 0x5a001077 +#define MASK_KMABB 0xfe00707f +#define MATCH_KMABB32 0x5a002077 +#define MASK_KMABB32 0xfe00707f +#define MATCH_KMABT 0x6a001077 +#define MASK_KMABT 0xfe00707f +#define MATCH_KMABT32 0x6a002077 +#define MASK_KMABT32 0xfe00707f +#define MATCH_KMADA 0x48001077 +#define MASK_KMADA 0xfe00707f +#define MATCH_KMADRS 0x6c001077 +#define MASK_KMADRS 0xfe00707f +#define MATCH_KMADRS32 0x6c002077 +#define MASK_KMADRS32 0xfe00707f +#define MATCH_KMADS 0x5c001077 +#define MASK_KMADS 0xfe00707f +#define MATCH_KMADS32 0x5c002077 +#define MASK_KMADS32 0xfe00707f +#define MATCH_KMAR64 0x94001077 +#define MASK_KMAR64 0xfe00707f +#define MATCH_KMATT 0x7a001077 +#define MASK_KMATT 0xfe00707f +#define MATCH_KMATT32 0x7a002077 +#define MASK_KMATT32 0xfe00707f +#define MATCH_KMAXDA 0x4a001077 +#define MASK_KMAXDA 0xfe00707f +#define MATCH_KMAXDA32 0x4a002077 +#define MASK_KMAXDA32 0xfe00707f +#define MATCH_KMAXDS 0x7c001077 +#define MASK_KMAXDS 0xfe00707f +#define MATCH_KMAXDS32 0x7c002077 +#define MASK_KMAXDS32 0xfe00707f +#define MATCH_KMDA 0x38001077 +#define MASK_KMDA 0xfe00707f +#define MATCH_KMDA32 0x38002077 +#define MASK_KMDA32 0xfe00707f +#define MATCH_KMMAC 0x60001077 +#define MASK_KMMAC 0xfe00707f +#define MATCH_KMMAC_U 0x70001077 +#define MASK_KMMAC_U 0xfe00707f +#define MATCH_KMMAWB 0x46001077 +#define MASK_KMMAWB 0xfe00707f +#define MATCH_KMMAWB2 0xce001077 +#define MASK_KMMAWB2 0xfe00707f +#define MATCH_KMMAWB2_U 0xde001077 +#define MASK_KMMAWB2_U 0xfe00707f +#define MATCH_KMMAWB_U 0x56001077 +#define MASK_KMMAWB_U 0xfe00707f +#define MATCH_KMMAWT 0x66001077 +#define MASK_KMMAWT 0xfe00707f +#define MATCH_KMMAWT2 0xee001077 +#define MASK_KMMAWT2 0xfe00707f +#define MATCH_KMMAWT2_U 0xfe001077 +#define MASK_KMMAWT2_U 0xfe00707f +#define MATCH_KMMAWT_U 0x76001077 +#define MASK_KMMAWT_U 0xfe00707f +#define MATCH_KMMSB 0x42001077 +#define MASK_KMMSB 0xfe00707f +#define MATCH_KMMSB_U 0x52001077 +#define MASK_KMMSB_U 0xfe00707f +#define MATCH_KMMWB2 0x8e001077 +#define MASK_KMMWB2 0xfe00707f +#define MATCH_KMMWB2_U 0x9e001077 +#define MASK_KMMWB2_U 0xfe00707f +#define MATCH_KMMWT2 0xae001077 +#define MASK_KMMWT2 0xfe00707f +#define MATCH_KMMWT2_U 0xbe001077 +#define MASK_KMMWT2_U 0xfe00707f +#define MATCH_KMSDA 0x4c001077 +#define MASK_KMSDA 0xfe00707f +#define MATCH_KMSDA32 0x4c002077 +#define MASK_KMSDA32 0xfe00707f +#define MATCH_KMSR64 0x96001077 +#define MASK_KMSR64 0xfe00707f +#define MATCH_KMSXDA 0x4e001077 +#define MASK_KMSXDA 0xfe00707f +#define MATCH_KMSXDA32 0x4e002077 +#define MASK_KMSXDA32 0xfe00707f +#define MATCH_KMXDA 0x3a001077 +#define MASK_KMXDA 0xfe00707f +#define MATCH_KMXDA32 0x3a002077 +#define MASK_KMXDA32 0xfe00707f +#define MATCH_KSLL16 0x64000077 +#define MASK_KSLL16 0xfe00707f +#define MATCH_KSLL32 0x64002077 +#define MASK_KSLL32 0xfe00707f +#define MATCH_KSLL8 0x6c000077 +#define MASK_KSLL8 0xfe00707f +#define MATCH_KSLLI16 0x75000077 +#define MASK_KSLLI16 0xff00707f +#define MATCH_KSLLI32 0x84002077 +#define MASK_KSLLI32 0xfe00707f +#define MATCH_KSLLI8 0x7c800077 +#define MASK_KSLLI8 0xff80707f +#define MATCH_KSLLIW 0x36001077 +#define MASK_KSLLIW 0xfe00707f +#define MATCH_KSLLW 0x26001077 +#define MASK_KSLLW 0xfe00707f +#define MATCH_KSLRA16 0x56000077 +#define MASK_KSLRA16 0xfe00707f +#define MATCH_KSLRA16_U 0x66000077 +#define MASK_KSLRA16_U 0xfe00707f +#define MATCH_KSLRA32 0x56002077 +#define MASK_KSLRA32 0xfe00707f +#define MATCH_KSLRA32_U 0x66002077 +#define MASK_KSLRA32_U 0xfe00707f +#define MATCH_KSLRA8 0x5e000077 +#define MASK_KSLRA8 0xfe00707f +#define MATCH_KSLRA8_U 0x6e000077 +#define MASK_KSLRA8_U 0xfe00707f +#define MATCH_KSLRAW 0x6e001077 +#define MASK_KSLRAW 0xfe00707f +#define MATCH_KSLRAW_U 0x7e001077 +#define MASK_KSLRAW_U 0xfe00707f +#define MATCH_KSTAS16 0xc4002077 +#define MASK_KSTAS16 0xfe00707f +#define MATCH_KSTAS32 0xc0002077 +#define MASK_KSTAS32 0xfe00707f +#define MATCH_KSTSA16 0xc6002077 +#define MASK_KSTSA16 0xfe00707f +#define MATCH_KSTSA32 0xc2002077 +#define MASK_KSTSA32 0xfe00707f +#define MATCH_KSUB16 0x12000077 +#define MASK_KSUB16 0xfe00707f +#define MATCH_KSUB32 0x12002077 +#define MASK_KSUB32 0xfe00707f +#define MATCH_KSUB64 0x92001077 +#define MASK_KSUB64 0xfe00707f +#define MATCH_KSUB8 0x1a000077 +#define MASK_KSUB8 0xfe00707f +#define MATCH_KSUBH 0x6001077 +#define MASK_KSUBH 0xfe00707f +#define MATCH_KSUBW 0x2001077 +#define MASK_KSUBW 0xfe00707f +#define MATCH_KWMMUL 0x62001077 +#define MASK_KWMMUL 0xfe00707f +#define MATCH_KWMMUL_U 0x72001077 +#define MASK_KWMMUL_U 0xfe00707f +#define MATCH_LB 0x3 +#define MASK_LB 0x707f +#define MATCH_LBU 0x4003 +#define MASK_LBU 0x707f +#define MATCH_LD 0x3003 +#define MASK_LD 0x707f +#define MATCH_LH 0x1003 +#define MASK_LH 0x707f +#define MATCH_LHU 0x5003 +#define MASK_LHU 0x707f +#define MATCH_LR_D 0x1000302f +#define MASK_LR_D 0xf9f0707f +#define MATCH_LR_W 0x1000202f +#define MASK_LR_W 0xf9f0707f +#define MATCH_LUI 0x37 +#define MASK_LUI 0x7f +#define MATCH_LW 0x2003 +#define MASK_LW 0x707f +#define MATCH_LWU 0x6003 +#define MASK_LWU 0x707f +#define MATCH_MADDR32 0xc4001077 +#define MASK_MADDR32 0xfe00707f +#define MATCH_MAX 0xa006033 +#define MASK_MAX 0xfe00707f +#define MATCH_MAXU 0xa007033 +#define MASK_MAXU 0xfe00707f +#define MATCH_MIN 0xa004033 +#define MASK_MIN 0xfe00707f +#define MATCH_MINU 0xa005033 +#define MASK_MINU 0xfe00707f +#define MATCH_MNRET 0x70200073 +#define MASK_MNRET 0xffffffff +#define MATCH_MRET 0x30200073 +#define MASK_MRET 0xffffffff +#define MATCH_MSUBR32 0xc6001077 +#define MASK_MSUBR32 0xfe00707f +#define MATCH_MUL 0x2000033 +#define MASK_MUL 0xfe00707f +#define MATCH_MULH 0x2001033 +#define MASK_MULH 0xfe00707f +#define MATCH_MULHSU 0x2002033 +#define MASK_MULHSU 0xfe00707f +#define MATCH_MULHU 0x2003033 +#define MASK_MULHU 0xfe00707f +#define MATCH_MULR64 0xf0001077 +#define MASK_MULR64 0xfe00707f +#define MATCH_MULSR64 0xe0001077 +#define MASK_MULSR64 0xfe00707f +#define MATCH_MULW 0x200003b +#define MASK_MULW 0xfe00707f +#define MATCH_OR 0x6033 +#define MASK_OR 0xfe00707f +#define MATCH_ORI 0x6013 +#define MASK_ORI 0x707f +#define MATCH_ORN 0x40006033 +#define MASK_ORN 0xfe00707f +#define MATCH_PACK 0x8004033 +#define MASK_PACK 0xfe00707f +#define MATCH_PACKH 0x8007033 +#define MASK_PACKH 0xfe00707f +#define MATCH_PACKU 0x48004033 +#define MASK_PACKU 0xfe00707f +#define MATCH_PACKUW 0x4800403b +#define MASK_PACKUW 0xfe00707f +#define MATCH_PACKW 0x800403b +#define MASK_PACKW 0xfe00707f +#define MATCH_PAUSE 0x100000f +#define MASK_PAUSE 0xffffffff +#define MATCH_PBSAD 0xfc000077 +#define MASK_PBSAD 0xfe00707f +#define MATCH_PBSADA 0xfe000077 +#define MASK_PBSADA 0xfe00707f +#define MATCH_PKBB16 0xe001077 +#define MASK_PKBB16 0xfe00707f +#define MATCH_PKBT16 0x1e001077 +#define MASK_PKBT16 0xfe00707f +#define MATCH_PKBT32 0x1e002077 +#define MASK_PKBT32 0xfe00707f +#define MATCH_PKTB16 0x3e001077 +#define MASK_PKTB16 0xfe00707f +#define MATCH_PKTB32 0x3e002077 +#define MASK_PKTB32 0xfe00707f +#define MATCH_PKTT16 0x2e001077 +#define MASK_PKTT16 0xfe00707f +#define MATCH_PREFETCH_I 0x6013 +#define MASK_PREFETCH_I 0x1f07fff +#define MATCH_PREFETCH_R 0x106013 +#define MASK_PREFETCH_R 0x1f07fff +#define MATCH_PREFETCH_W 0x306013 +#define MASK_PREFETCH_W 0x1f07fff +#define MATCH_RADD16 0x77 +#define MASK_RADD16 0xfe00707f +#define MATCH_RADD32 0x2077 +#define MASK_RADD32 0xfe00707f +#define MATCH_RADD64 0x80001077 +#define MASK_RADD64 0xfe00707f +#define MATCH_RADD8 0x8000077 +#define MASK_RADD8 0xfe00707f +#define MATCH_RADDW 0x20001077 +#define MASK_RADDW 0xfe00707f +#define MATCH_RCRAS16 0x4000077 +#define MASK_RCRAS16 0xfe00707f +#define MATCH_RCRAS32 0x4002077 +#define MASK_RCRAS32 0xfe00707f +#define MATCH_RCRSA16 0x6000077 +#define MASK_RCRSA16 0xfe00707f +#define MATCH_RCRSA32 0x6002077 +#define MASK_RCRSA32 0xfe00707f +#define MATCH_REM 0x2006033 +#define MASK_REM 0xfe00707f +#define MATCH_REMU 0x2007033 +#define MASK_REMU 0xfe00707f +#define MATCH_REMUW 0x200703b +#define MASK_REMUW 0xfe00707f +#define MATCH_REMW 0x200603b +#define MASK_REMW 0xfe00707f +#define MATCH_ROL 0x60001033 +#define MASK_ROL 0xfe00707f +#define MATCH_ROLW 0x6000103b +#define MASK_ROLW 0xfe00707f +#define MATCH_ROR 0x60005033 +#define MASK_ROR 0xfe00707f +#define MATCH_RORI 0x60005013 +#define MASK_RORI 0xfc00707f +#define MATCH_RORIW 0x6000501b +#define MASK_RORIW 0xfe00707f +#define MATCH_RORW 0x6000503b +#define MASK_RORW 0xfe00707f +#define MATCH_RSTAS16 0xb4002077 +#define MASK_RSTAS16 0xfe00707f +#define MATCH_RSTAS32 0xb0002077 +#define MASK_RSTAS32 0xfe00707f +#define MATCH_RSTSA16 0xb6002077 +#define MASK_RSTSA16 0xfe00707f +#define MATCH_RSTSA32 0xb2002077 +#define MASK_RSTSA32 0xfe00707f +#define MATCH_RSUB16 0x2000077 +#define MASK_RSUB16 0xfe00707f +#define MATCH_RSUB32 0x2002077 +#define MASK_RSUB32 0xfe00707f +#define MATCH_RSUB64 0x82001077 +#define MASK_RSUB64 0xfe00707f +#define MATCH_RSUB8 0xa000077 +#define MASK_RSUB8 0xfe00707f +#define MATCH_RSUBW 0x22001077 +#define MASK_RSUBW 0xfe00707f +#define MATCH_SB 0x23 +#define MASK_SB 0x707f +#define MATCH_SC_D 0x1800302f +#define MASK_SC_D 0xf800707f +#define MATCH_SC_W 0x1800202f +#define MASK_SC_W 0xf800707f +#define MATCH_SCLIP16 0x84000077 +#define MASK_SCLIP16 0xff00707f +#define MATCH_SCLIP32 0xe4000077 +#define MASK_SCLIP32 0xfe00707f +#define MATCH_SCLIP8 0x8c000077 +#define MASK_SCLIP8 0xff80707f +#define MATCH_SCMPLE16 0x1c000077 +#define MASK_SCMPLE16 0xfe00707f +#define MATCH_SCMPLE8 0x1e000077 +#define MASK_SCMPLE8 0xfe00707f +#define MATCH_SCMPLT16 0xc000077 +#define MASK_SCMPLT16 0xfe00707f +#define MATCH_SCMPLT8 0xe000077 +#define MASK_SCMPLT8 0xfe00707f +#define MATCH_SD 0x3023 +#define MASK_SD 0x707f +#define MATCH_SEXT_B 0x60401013 +#define MASK_SEXT_B 0xfff0707f +#define MATCH_SEXT_H 0x60501013 +#define MASK_SEXT_H 0xfff0707f +#define MATCH_SFENCE_INVAL_IR 0x18100073 +#define MASK_SFENCE_INVAL_IR 0xffffffff +#define MATCH_SFENCE_VMA 0x12000073 +#define MASK_SFENCE_VMA 0xfe007fff +#define MATCH_SFENCE_W_INVAL 0x18000073 +#define MASK_SFENCE_W_INVAL 0xffffffff +#define MATCH_SH 0x1023 +#define MASK_SH 0x707f +#define MATCH_SH1ADD 0x20002033 +#define MASK_SH1ADD 0xfe00707f +#define MATCH_SH1ADD_UW 0x2000203b +#define MASK_SH1ADD_UW 0xfe00707f +#define MATCH_SH2ADD 0x20004033 +#define MASK_SH2ADD 0xfe00707f +#define MATCH_SH2ADD_UW 0x2000403b +#define MASK_SH2ADD_UW 0xfe00707f +#define MATCH_SH3ADD 0x20006033 +#define MASK_SH3ADD 0xfe00707f +#define MATCH_SH3ADD_UW 0x2000603b +#define MASK_SH3ADD_UW 0xfe00707f +#define MATCH_SHA256SIG0 0x10201013 +#define MASK_SHA256SIG0 0xfff0707f +#define MATCH_SHA256SIG1 0x10301013 +#define MASK_SHA256SIG1 0xfff0707f +#define MATCH_SHA256SUM0 0x10001013 +#define MASK_SHA256SUM0 0xfff0707f +#define MATCH_SHA256SUM1 0x10101013 +#define MASK_SHA256SUM1 0xfff0707f +#define MATCH_SHA512SIG0 0x10601013 +#define MASK_SHA512SIG0 0xfff0707f +#define MATCH_SHA512SIG0H 0x5c000033 +#define MASK_SHA512SIG0H 0xfe00707f +#define MATCH_SHA512SIG0L 0x54000033 +#define MASK_SHA512SIG0L 0xfe00707f +#define MATCH_SHA512SIG1 0x10701013 +#define MASK_SHA512SIG1 0xfff0707f +#define MATCH_SHA512SIG1H 0x5e000033 +#define MASK_SHA512SIG1H 0xfe00707f +#define MATCH_SHA512SIG1L 0x56000033 +#define MASK_SHA512SIG1L 0xfe00707f +#define MATCH_SHA512SUM0 0x10401013 +#define MASK_SHA512SUM0 0xfff0707f +#define MATCH_SHA512SUM0R 0x50000033 +#define MASK_SHA512SUM0R 0xfe00707f +#define MATCH_SHA512SUM1 0x10501013 +#define MASK_SHA512SUM1 0xfff0707f +#define MATCH_SHA512SUM1R 0x52000033 +#define MASK_SHA512SUM1R 0xfe00707f +#define MATCH_SHFL 0x8001033 +#define MASK_SHFL 0xfe00707f +#define MATCH_SHFLI 0x8001013 +#define MASK_SHFLI 0xfe00707f +#define MATCH_SHFLW 0x800103b +#define MASK_SHFLW 0xfe00707f +#define MATCH_SINVAL_VMA 0x16000073 +#define MASK_SINVAL_VMA 0xfe007fff +#define MATCH_SLL 0x1033 +#define MASK_SLL 0xfe00707f +#define MATCH_SLL16 0x54000077 +#define MASK_SLL16 0xfe00707f +#define MATCH_SLL32 0x54002077 +#define MASK_SLL32 0xfe00707f +#define MATCH_SLL8 0x5c000077 +#define MASK_SLL8 0xfe00707f +#define MATCH_SLLI 0x1013 +#define MASK_SLLI 0xfc00707f +#define MATCH_SLLI16 0x74000077 +#define MASK_SLLI16 0xff00707f +#define MATCH_SLLI32 0x74002077 +#define MASK_SLLI32 0xfe00707f +#define MATCH_SLLI8 0x7c000077 +#define MASK_SLLI8 0xff80707f +#define MATCH_SLLI_RV32 0x1013 +#define MASK_SLLI_RV32 0xfe00707f +#define MATCH_SLLI_UW 0x800101b +#define MASK_SLLI_UW 0xfc00707f +#define MATCH_SLLIW 0x101b +#define MASK_SLLIW 0xfe00707f +#define MATCH_SLLW 0x103b +#define MASK_SLLW 0xfe00707f +#define MATCH_SLO 0x20001033 +#define MASK_SLO 0xfe00707f +#define MATCH_SLOI 0x20001013 +#define MASK_SLOI 0xfc00707f +#define MATCH_SLOIW 0x2000101b +#define MASK_SLOIW 0xfe00707f +#define MATCH_SLOW 0x2000103b +#define MASK_SLOW 0xfe00707f +#define MATCH_SLT 0x2033 +#define MASK_SLT 0xfe00707f +#define MATCH_SLTI 0x2013 +#define MASK_SLTI 0x707f +#define MATCH_SLTIU 0x3013 +#define MASK_SLTIU 0x707f +#define MATCH_SLTU 0x3033 +#define MASK_SLTU 0xfe00707f +#define MATCH_SM3P0 0x10801013 +#define MASK_SM3P0 0xfff0707f +#define MATCH_SM3P1 0x10901013 +#define MASK_SM3P1 0xfff0707f +#define MATCH_SM4ED 0x30000033 +#define MASK_SM4ED 0x3e00707f +#define MATCH_SM4KS 0x34000033 +#define MASK_SM4KS 0x3e00707f +#define MATCH_SMAL 0x5e001077 +#define MASK_SMAL 0xfe00707f +#define MATCH_SMALBB 0x88001077 +#define MASK_SMALBB 0xfe00707f +#define MATCH_SMALBT 0x98001077 +#define MASK_SMALBT 0xfe00707f +#define MATCH_SMALDA 0x8c001077 +#define MASK_SMALDA 0xfe00707f +#define MATCH_SMALDRS 0x9a001077 +#define MASK_SMALDRS 0xfe00707f +#define MATCH_SMALDS 0x8a001077 +#define MASK_SMALDS 0xfe00707f +#define MATCH_SMALTT 0xa8001077 +#define MASK_SMALTT 0xfe00707f +#define MATCH_SMALXDA 0x9c001077 +#define MASK_SMALXDA 0xfe00707f +#define MATCH_SMALXDS 0xaa001077 +#define MASK_SMALXDS 0xfe00707f +#define MATCH_SMAQA 0xc8000077 +#define MASK_SMAQA 0xfe00707f +#define MATCH_SMAQA_SU 0xca000077 +#define MASK_SMAQA_SU 0xfe00707f +#define MATCH_SMAR64 0x84001077 +#define MASK_SMAR64 0xfe00707f +#define MATCH_SMAX16 0x82000077 +#define MASK_SMAX16 0xfe00707f +#define MATCH_SMAX32 0x92002077 +#define MASK_SMAX32 0xfe00707f +#define MATCH_SMAX8 0x8a000077 +#define MASK_SMAX8 0xfe00707f +#define MATCH_SMBB16 0x8001077 +#define MASK_SMBB16 0xfe00707f +#define MATCH_SMBT16 0x18001077 +#define MASK_SMBT16 0xfe00707f +#define MATCH_SMBT32 0x18002077 +#define MASK_SMBT32 0xfe00707f +#define MATCH_SMDRS 0x68001077 +#define MASK_SMDRS 0xfe00707f +#define MATCH_SMDRS32 0x68002077 +#define MASK_SMDRS32 0xfe00707f +#define MATCH_SMDS 0x58001077 +#define MASK_SMDS 0xfe00707f +#define MATCH_SMDS32 0x58002077 +#define MASK_SMDS32 0xfe00707f +#define MATCH_SMIN16 0x80000077 +#define MASK_SMIN16 0xfe00707f +#define MATCH_SMIN32 0x90002077 +#define MASK_SMIN32 0xfe00707f +#define MATCH_SMIN8 0x88000077 +#define MASK_SMIN8 0xfe00707f +#define MATCH_SMMUL 0x40001077 +#define MASK_SMMUL 0xfe00707f +#define MATCH_SMMUL_U 0x50001077 +#define MASK_SMMUL_U 0xfe00707f +#define MATCH_SMMWB 0x44001077 +#define MASK_SMMWB 0xfe00707f +#define MATCH_SMMWB_U 0x54001077 +#define MASK_SMMWB_U 0xfe00707f +#define MATCH_SMMWT 0x64001077 +#define MASK_SMMWT 0xfe00707f +#define MATCH_SMMWT_U 0x74001077 +#define MASK_SMMWT_U 0xfe00707f +#define MATCH_SMSLDA 0xac001077 +#define MASK_SMSLDA 0xfe00707f +#define MATCH_SMSLXDA 0xbc001077 +#define MASK_SMSLXDA 0xfe00707f +#define MATCH_SMSR64 0x86001077 +#define MASK_SMSR64 0xfe00707f +#define MATCH_SMTT16 0x28001077 +#define MASK_SMTT16 0xfe00707f +#define MATCH_SMTT32 0x28002077 +#define MASK_SMTT32 0xfe00707f +#define MATCH_SMUL16 0xa0000077 +#define MASK_SMUL16 0xfe00707f +#define MATCH_SMUL8 0xa8000077 +#define MASK_SMUL8 0xfe00707f +#define MATCH_SMULX16 0xa2000077 +#define MASK_SMULX16 0xfe00707f +#define MATCH_SMULX8 0xaa000077 +#define MASK_SMULX8 0xfe00707f +#define MATCH_SMXDS 0x78001077 +#define MASK_SMXDS 0xfe00707f +#define MATCH_SMXDS32 0x78002077 +#define MASK_SMXDS32 0xfe00707f +#define MATCH_SRA 0x40005033 +#define MASK_SRA 0xfe00707f +#define MATCH_SRA16 0x50000077 +#define MASK_SRA16 0xfe00707f +#define MATCH_SRA16_U 0x60000077 +#define MASK_SRA16_U 0xfe00707f +#define MATCH_SRA32 0x50002077 +#define MASK_SRA32 0xfe00707f +#define MATCH_SRA32_U 0x60002077 +#define MASK_SRA32_U 0xfe00707f +#define MATCH_SRA8 0x58000077 +#define MASK_SRA8 0xfe00707f +#define MATCH_SRA8_U 0x68000077 +#define MASK_SRA8_U 0xfe00707f +#define MATCH_SRA_U 0x24001077 +#define MASK_SRA_U 0xfe00707f +#define MATCH_SRAI 0x40005013 +#define MASK_SRAI 0xfc00707f +#define MATCH_SRAI16 0x70000077 +#define MASK_SRAI16 0xff00707f +#define MATCH_SRAI16_U 0x71000077 +#define MASK_SRAI16_U 0xff00707f +#define MATCH_SRAI32 0x70002077 +#define MASK_SRAI32 0xfe00707f +#define MATCH_SRAI32_U 0x80002077 +#define MASK_SRAI32_U 0xfe00707f +#define MATCH_SRAI8 0x78000077 +#define MASK_SRAI8 0xff80707f +#define MATCH_SRAI8_U 0x78800077 +#define MASK_SRAI8_U 0xff80707f +#define MATCH_SRAI_RV32 0x40005013 +#define MASK_SRAI_RV32 0xfe00707f +#define MATCH_SRAI_U 0xd4001077 +#define MASK_SRAI_U 0xfc00707f +#define MATCH_SRAIW 0x4000501b +#define MASK_SRAIW 0xfe00707f +#define MATCH_SRAIW_U 0x34001077 +#define MASK_SRAIW_U 0xfe00707f +#define MATCH_SRAW 0x4000503b +#define MASK_SRAW 0xfe00707f +#define MATCH_SRET 0x10200073 +#define MASK_SRET 0xffffffff +#define MATCH_SRL 0x5033 +#define MASK_SRL 0xfe00707f +#define MATCH_SRL16 0x52000077 +#define MASK_SRL16 0xfe00707f +#define MATCH_SRL16_U 0x62000077 +#define MASK_SRL16_U 0xfe00707f +#define MATCH_SRL32 0x52002077 +#define MASK_SRL32 0xfe00707f +#define MATCH_SRL32_U 0x62002077 +#define MASK_SRL32_U 0xfe00707f +#define MATCH_SRL8 0x5a000077 +#define MASK_SRL8 0xfe00707f +#define MATCH_SRL8_U 0x6a000077 +#define MASK_SRL8_U 0xfe00707f +#define MATCH_SRLI 0x5013 +#define MASK_SRLI 0xfc00707f +#define MATCH_SRLI16 0x72000077 +#define MASK_SRLI16 0xff00707f +#define MATCH_SRLI16_U 0x73000077 +#define MASK_SRLI16_U 0xff00707f +#define MATCH_SRLI32 0x72002077 +#define MASK_SRLI32 0xfe00707f +#define MATCH_SRLI32_U 0x82002077 +#define MASK_SRLI32_U 0xfe00707f +#define MATCH_SRLI8 0x7a000077 +#define MASK_SRLI8 0xff80707f +#define MATCH_SRLI8_U 0x7a800077 +#define MASK_SRLI8_U 0xff80707f +#define MATCH_SRLI_RV32 0x5013 +#define MASK_SRLI_RV32 0xfe00707f +#define MATCH_SRLIW 0x501b +#define MASK_SRLIW 0xfe00707f +#define MATCH_SRLW 0x503b +#define MASK_SRLW 0xfe00707f +#define MATCH_SRO 0x20005033 +#define MASK_SRO 0xfe00707f +#define MATCH_SROI 0x20005013 +#define MASK_SROI 0xfc00707f +#define MATCH_SROIW 0x2000501b +#define MASK_SROIW 0xfe00707f +#define MATCH_SROW 0x2000503b +#define MASK_SROW 0xfe00707f +#define MATCH_STAS16 0xf4002077 +#define MASK_STAS16 0xfe00707f +#define MATCH_STAS32 0xf0002077 +#define MASK_STAS32 0xfe00707f +#define MATCH_STSA16 0xf6002077 +#define MASK_STSA16 0xfe00707f +#define MATCH_STSA32 0xf2002077 +#define MASK_STSA32 0xfe00707f +#define MATCH_SUB 0x40000033 +#define MASK_SUB 0xfe00707f +#define MATCH_SUB16 0x42000077 +#define MASK_SUB16 0xfe00707f +#define MATCH_SUB32 0x42002077 +#define MASK_SUB32 0xfe00707f +#define MATCH_SUB64 0xc2001077 +#define MASK_SUB64 0xfe00707f +#define MATCH_SUB8 0x4a000077 +#define MASK_SUB8 0xfe00707f +#define MATCH_SUBW 0x4000003b +#define MASK_SUBW 0xfe00707f +#define MATCH_SUNPKD810 0xac800077 +#define MASK_SUNPKD810 0xfff0707f +#define MATCH_SUNPKD820 0xac900077 +#define MASK_SUNPKD820 0xfff0707f +#define MATCH_SUNPKD830 0xaca00077 +#define MASK_SUNPKD830 0xfff0707f +#define MATCH_SUNPKD831 0xacb00077 +#define MASK_SUNPKD831 0xfff0707f +#define MATCH_SUNPKD832 0xad300077 +#define MASK_SUNPKD832 0xfff0707f +#define MATCH_SW 0x2023 +#define MASK_SW 0x707f +#define MATCH_UCLIP16 0x85000077 +#define MASK_UCLIP16 0xff00707f +#define MATCH_UCLIP32 0xf4000077 +#define MASK_UCLIP32 0xfe00707f +#define MATCH_UCLIP8 0x8d000077 +#define MASK_UCLIP8 0xff80707f +#define MATCH_UCMPLE16 0x3c000077 +#define MASK_UCMPLE16 0xfe00707f +#define MATCH_UCMPLE8 0x3e000077 +#define MASK_UCMPLE8 0xfe00707f +#define MATCH_UCMPLT16 0x2c000077 +#define MASK_UCMPLT16 0xfe00707f +#define MATCH_UCMPLT8 0x2e000077 +#define MASK_UCMPLT8 0xfe00707f +#define MATCH_UKADD16 0x30000077 +#define MASK_UKADD16 0xfe00707f +#define MATCH_UKADD32 0x30002077 +#define MASK_UKADD32 0xfe00707f +#define MATCH_UKADD64 0xb0001077 +#define MASK_UKADD64 0xfe00707f +#define MATCH_UKADD8 0x38000077 +#define MASK_UKADD8 0xfe00707f +#define MATCH_UKADDH 0x14001077 +#define MASK_UKADDH 0xfe00707f +#define MATCH_UKADDW 0x10001077 +#define MASK_UKADDW 0xfe00707f +#define MATCH_UKCRAS16 0x34000077 +#define MASK_UKCRAS16 0xfe00707f +#define MATCH_UKCRAS32 0x34002077 +#define MASK_UKCRAS32 0xfe00707f +#define MATCH_UKCRSA16 0x36000077 +#define MASK_UKCRSA16 0xfe00707f +#define MATCH_UKCRSA32 0x36002077 +#define MASK_UKCRSA32 0xfe00707f +#define MATCH_UKMAR64 0xb4001077 +#define MASK_UKMAR64 0xfe00707f +#define MATCH_UKMSR64 0xb6001077 +#define MASK_UKMSR64 0xfe00707f +#define MATCH_UKSTAS16 0xe4002077 +#define MASK_UKSTAS16 0xfe00707f +#define MATCH_UKSTAS32 0xe0002077 +#define MASK_UKSTAS32 0xfe00707f +#define MATCH_UKSTSA16 0xe6002077 +#define MASK_UKSTSA16 0xfe00707f +#define MATCH_UKSTSA32 0xe2002077 +#define MASK_UKSTSA32 0xfe00707f +#define MATCH_UKSUB16 0x32000077 +#define MASK_UKSUB16 0xfe00707f +#define MATCH_UKSUB32 0x32002077 +#define MASK_UKSUB32 0xfe00707f +#define MATCH_UKSUB64 0xb2001077 +#define MASK_UKSUB64 0xfe00707f +#define MATCH_UKSUB8 0x3a000077 +#define MASK_UKSUB8 0xfe00707f +#define MATCH_UKSUBH 0x16001077 +#define MASK_UKSUBH 0xfe00707f +#define MATCH_UKSUBW 0x12001077 +#define MASK_UKSUBW 0xfe00707f +#define MATCH_UMAQA 0xcc000077 +#define MASK_UMAQA 0xfe00707f +#define MATCH_UMAR64 0xa4001077 +#define MASK_UMAR64 0xfe00707f +#define MATCH_UMAX16 0x92000077 +#define MASK_UMAX16 0xfe00707f +#define MATCH_UMAX32 0xa2002077 +#define MASK_UMAX32 0xfe00707f +#define MATCH_UMAX8 0x9a000077 +#define MASK_UMAX8 0xfe00707f +#define MATCH_UMIN16 0x90000077 +#define MASK_UMIN16 0xfe00707f +#define MATCH_UMIN32 0xa0002077 +#define MASK_UMIN32 0xfe00707f +#define MATCH_UMIN8 0x98000077 +#define MASK_UMIN8 0xfe00707f +#define MATCH_UMSR64 0xa6001077 +#define MASK_UMSR64 0xfe00707f +#define MATCH_UMUL16 0xb0000077 +#define MASK_UMUL16 0xfe00707f +#define MATCH_UMUL8 0xb8000077 +#define MASK_UMUL8 0xfe00707f +#define MATCH_UMULX16 0xb2000077 +#define MASK_UMULX16 0xfe00707f +#define MATCH_UMULX8 0xba000077 +#define MASK_UMULX8 0xfe00707f +#define MATCH_UNSHFL 0x8005033 +#define MASK_UNSHFL 0xfe00707f +#define MATCH_UNSHFLI 0x8005013 +#define MASK_UNSHFLI 0xfe00707f +#define MATCH_UNSHFLW 0x800503b +#define MASK_UNSHFLW 0xfe00707f +#define MATCH_URADD16 0x20000077 +#define MASK_URADD16 0xfe00707f +#define MATCH_URADD32 0x20002077 +#define MASK_URADD32 0xfe00707f +#define MATCH_URADD64 0xa0001077 +#define MASK_URADD64 0xfe00707f +#define MATCH_URADD8 0x28000077 +#define MASK_URADD8 0xfe00707f +#define MATCH_URADDW 0x30001077 +#define MASK_URADDW 0xfe00707f +#define MATCH_URCRAS16 0x24000077 +#define MASK_URCRAS16 0xfe00707f +#define MATCH_URCRAS32 0x24002077 +#define MASK_URCRAS32 0xfe00707f +#define MATCH_URCRSA16 0x26000077 +#define MASK_URCRSA16 0xfe00707f +#define MATCH_URCRSA32 0x26002077 +#define MASK_URCRSA32 0xfe00707f +#define MATCH_URSTAS16 0xd4002077 +#define MASK_URSTAS16 0xfe00707f +#define MATCH_URSTAS32 0xd0002077 +#define MASK_URSTAS32 0xfe00707f +#define MATCH_URSTSA16 0xd6002077 +#define MASK_URSTSA16 0xfe00707f +#define MATCH_URSTSA32 0xd2002077 +#define MASK_URSTSA32 0xfe00707f +#define MATCH_URSUB16 0x22000077 +#define MASK_URSUB16 0xfe00707f +#define MATCH_URSUB32 0x22002077 +#define MASK_URSUB32 0xfe00707f +#define MATCH_URSUB64 0xa2001077 +#define MASK_URSUB64 0xfe00707f +#define MATCH_URSUB8 0x2a000077 +#define MASK_URSUB8 0xfe00707f +#define MATCH_URSUBW 0x32001077 +#define MASK_URSUBW 0xfe00707f +#define MATCH_VAADD_VV 0x24002057 +#define MASK_VAADD_VV 0xfc00707f +#define MATCH_VAADD_VX 0x24006057 +#define MASK_VAADD_VX 0xfc00707f +#define MATCH_VAADDU_VV 0x20002057 +#define MASK_VAADDU_VV 0xfc00707f +#define MATCH_VAADDU_VX 0x20006057 +#define MASK_VAADDU_VX 0xfc00707f +#define MATCH_VADC_VIM 0x40003057 +#define MASK_VADC_VIM 0xfe00707f +#define MATCH_VADC_VVM 0x40000057 +#define MASK_VADC_VVM 0xfe00707f +#define MATCH_VADC_VXM 0x40004057 +#define MASK_VADC_VXM 0xfe00707f +#define MATCH_VADD_VI 0x3057 +#define MASK_VADD_VI 0xfc00707f +#define MATCH_VADD_VV 0x57 +#define MASK_VADD_VV 0xfc00707f +#define MATCH_VADD_VX 0x4057 +#define MASK_VADD_VX 0xfc00707f +#define MATCH_VAMOADDEI16_V 0x502f +#define MASK_VAMOADDEI16_V 0xf800707f +#define MATCH_VAMOADDEI32_V 0x602f +#define MASK_VAMOADDEI32_V 0xf800707f +#define MATCH_VAMOADDEI64_V 0x702f +#define MASK_VAMOADDEI64_V 0xf800707f +#define MATCH_VAMOADDEI8_V 0x2f +#define MASK_VAMOADDEI8_V 0xf800707f +#define MATCH_VAMOANDEI16_V 0x6000502f +#define MASK_VAMOANDEI16_V 0xf800707f +#define MATCH_VAMOANDEI32_V 0x6000602f +#define MASK_VAMOANDEI32_V 0xf800707f +#define MATCH_VAMOANDEI64_V 0x6000702f +#define MASK_VAMOANDEI64_V 0xf800707f +#define MATCH_VAMOANDEI8_V 0x6000002f +#define MASK_VAMOANDEI8_V 0xf800707f +#define MATCH_VAMOMAXEI16_V 0xa000502f +#define MASK_VAMOMAXEI16_V 0xf800707f +#define MATCH_VAMOMAXEI32_V 0xa000602f +#define MASK_VAMOMAXEI32_V 0xf800707f +#define MATCH_VAMOMAXEI64_V 0xa000702f +#define MASK_VAMOMAXEI64_V 0xf800707f +#define MATCH_VAMOMAXEI8_V 0xa000002f +#define MASK_VAMOMAXEI8_V 0xf800707f +#define MATCH_VAMOMAXUEI16_V 0xe000502f +#define MASK_VAMOMAXUEI16_V 0xf800707f +#define MATCH_VAMOMAXUEI32_V 0xe000602f +#define MASK_VAMOMAXUEI32_V 0xf800707f +#define MATCH_VAMOMAXUEI64_V 0xe000702f +#define MASK_VAMOMAXUEI64_V 0xf800707f +#define MATCH_VAMOMAXUEI8_V 0xe000002f +#define MASK_VAMOMAXUEI8_V 0xf800707f +#define MATCH_VAMOMINEI16_V 0x8000502f +#define MASK_VAMOMINEI16_V 0xf800707f +#define MATCH_VAMOMINEI32_V 0x8000602f +#define MASK_VAMOMINEI32_V 0xf800707f +#define MATCH_VAMOMINEI64_V 0x8000702f +#define MASK_VAMOMINEI64_V 0xf800707f +#define MATCH_VAMOMINEI8_V 0x8000002f +#define MASK_VAMOMINEI8_V 0xf800707f +#define MATCH_VAMOMINUEI16_V 0xc000502f +#define MASK_VAMOMINUEI16_V 0xf800707f +#define MATCH_VAMOMINUEI32_V 0xc000602f +#define MASK_VAMOMINUEI32_V 0xf800707f +#define MATCH_VAMOMINUEI64_V 0xc000702f +#define MASK_VAMOMINUEI64_V 0xf800707f +#define MATCH_VAMOMINUEI8_V 0xc000002f +#define MASK_VAMOMINUEI8_V 0xf800707f +#define MATCH_VAMOOREI16_V 0x4000502f +#define MASK_VAMOOREI16_V 0xf800707f +#define MATCH_VAMOOREI32_V 0x4000602f +#define MASK_VAMOOREI32_V 0xf800707f +#define MATCH_VAMOOREI64_V 0x4000702f +#define MASK_VAMOOREI64_V 0xf800707f +#define MATCH_VAMOOREI8_V 0x4000002f +#define MASK_VAMOOREI8_V 0xf800707f +#define MATCH_VAMOSWAPEI16_V 0x800502f +#define MASK_VAMOSWAPEI16_V 0xf800707f +#define MATCH_VAMOSWAPEI32_V 0x800602f +#define MASK_VAMOSWAPEI32_V 0xf800707f +#define MATCH_VAMOSWAPEI64_V 0x800702f +#define MASK_VAMOSWAPEI64_V 0xf800707f +#define MATCH_VAMOSWAPEI8_V 0x800002f +#define MASK_VAMOSWAPEI8_V 0xf800707f +#define MATCH_VAMOXOREI16_V 0x2000502f +#define MASK_VAMOXOREI16_V 0xf800707f +#define MATCH_VAMOXOREI32_V 0x2000602f +#define MASK_VAMOXOREI32_V 0xf800707f +#define MATCH_VAMOXOREI64_V 0x2000702f +#define MASK_VAMOXOREI64_V 0xf800707f +#define MATCH_VAMOXOREI8_V 0x2000002f +#define MASK_VAMOXOREI8_V 0xf800707f +#define MATCH_VAND_VI 0x24003057 +#define MASK_VAND_VI 0xfc00707f +#define MATCH_VAND_VV 0x24000057 +#define MASK_VAND_VV 0xfc00707f +#define MATCH_VAND_VX 0x24004057 +#define MASK_VAND_VX 0xfc00707f +#define MATCH_VASUB_VV 0x2c002057 +#define MASK_VASUB_VV 0xfc00707f +#define MATCH_VASUB_VX 0x2c006057 +#define MASK_VASUB_VX 0xfc00707f +#define MATCH_VASUBU_VV 0x28002057 +#define MASK_VASUBU_VV 0xfc00707f +#define MATCH_VASUBU_VX 0x28006057 +#define MASK_VASUBU_VX 0xfc00707f +#define MATCH_VCOMPRESS_VM 0x5e002057 +#define MASK_VCOMPRESS_VM 0xfe00707f +#define MATCH_VCPOP_M 0x40082057 +#define MASK_VCPOP_M 0xfc0ff07f +#define MATCH_VDIV_VV 0x84002057 +#define MASK_VDIV_VV 0xfc00707f +#define MATCH_VDIV_VX 0x84006057 +#define MASK_VDIV_VX 0xfc00707f +#define MATCH_VDIVU_VV 0x80002057 +#define MASK_VDIVU_VV 0xfc00707f +#define MATCH_VDIVU_VX 0x80006057 +#define MASK_VDIVU_VX 0xfc00707f +#define MATCH_VFADD_VF 0x5057 +#define MASK_VFADD_VF 0xfc00707f +#define MATCH_VFADD_VV 0x1057 +#define MASK_VFADD_VV 0xfc00707f +#define MATCH_VFCLASS_V 0x4c081057 +#define MASK_VFCLASS_V 0xfc0ff07f +#define MATCH_VFCVT_F_X_V 0x48019057 +#define MASK_VFCVT_F_X_V 0xfc0ff07f +#define MATCH_VFCVT_F_XU_V 0x48011057 +#define MASK_VFCVT_F_XU_V 0xfc0ff07f +#define MATCH_VFCVT_RTZ_X_F_V 0x48039057 +#define MASK_VFCVT_RTZ_X_F_V 0xfc0ff07f +#define MATCH_VFCVT_RTZ_XU_F_V 0x48031057 +#define MASK_VFCVT_RTZ_XU_F_V 0xfc0ff07f +#define MATCH_VFCVT_X_F_V 0x48009057 +#define MASK_VFCVT_X_F_V 0xfc0ff07f +#define MATCH_VFCVT_XU_F_V 0x48001057 +#define MASK_VFCVT_XU_F_V 0xfc0ff07f +#define MATCH_VFDIV_VF 0x80005057 +#define MASK_VFDIV_VF 0xfc00707f +#define MATCH_VFDIV_VV 0x80001057 +#define MASK_VFDIV_VV 0xfc00707f +#define MATCH_VFIRST_M 0x4008a057 +#define MASK_VFIRST_M 0xfc0ff07f +#define MATCH_VFMACC_VF 0xb0005057 +#define MASK_VFMACC_VF 0xfc00707f +#define MATCH_VFMACC_VV 0xb0001057 +#define MASK_VFMACC_VV 0xfc00707f +#define MATCH_VFMADD_VF 0xa0005057 +#define MASK_VFMADD_VF 0xfc00707f +#define MATCH_VFMADD_VV 0xa0001057 +#define MASK_VFMADD_VV 0xfc00707f +#define MATCH_VFMAX_VF 0x18005057 +#define MASK_VFMAX_VF 0xfc00707f +#define MATCH_VFMAX_VV 0x18001057 +#define MASK_VFMAX_VV 0xfc00707f +#define MATCH_VFMERGE_VFM 0x5c005057 +#define MASK_VFMERGE_VFM 0xfe00707f +#define MATCH_VFMIN_VF 0x10005057 +#define MASK_VFMIN_VF 0xfc00707f +#define MATCH_VFMIN_VV 0x10001057 +#define MASK_VFMIN_VV 0xfc00707f +#define MATCH_VFMSAC_VF 0xb8005057 +#define MASK_VFMSAC_VF 0xfc00707f +#define MATCH_VFMSAC_VV 0xb8001057 +#define MASK_VFMSAC_VV 0xfc00707f +#define MATCH_VFMSUB_VF 0xa8005057 +#define MASK_VFMSUB_VF 0xfc00707f +#define MATCH_VFMSUB_VV 0xa8001057 +#define MASK_VFMSUB_VV 0xfc00707f +#define MATCH_VFMUL_VF 0x90005057 +#define MASK_VFMUL_VF 0xfc00707f +#define MATCH_VFMUL_VV 0x90001057 +#define MASK_VFMUL_VV 0xfc00707f +#define MATCH_VFMV_F_S 0x42001057 +#define MASK_VFMV_F_S 0xfe0ff07f +#define MATCH_VFMV_S_F 0x42005057 +#define MASK_VFMV_S_F 0xfff0707f +#define MATCH_VFMV_V_F 0x5e005057 +#define MASK_VFMV_V_F 0xfff0707f +#define MATCH_VFNCVT_F_F_W 0x480a1057 +#define MASK_VFNCVT_F_F_W 0xfc0ff07f +#define MATCH_VFNCVT_F_X_W 0x48099057 +#define MASK_VFNCVT_F_X_W 0xfc0ff07f +#define MATCH_VFNCVT_F_XU_W 0x48091057 +#define MASK_VFNCVT_F_XU_W 0xfc0ff07f +#define MATCH_VFNCVT_ROD_F_F_W 0x480a9057 +#define MASK_VFNCVT_ROD_F_F_W 0xfc0ff07f +#define MATCH_VFNCVT_RTZ_X_F_W 0x480b9057 +#define MASK_VFNCVT_RTZ_X_F_W 0xfc0ff07f +#define MATCH_VFNCVT_RTZ_XU_F_W 0x480b1057 +#define MASK_VFNCVT_RTZ_XU_F_W 0xfc0ff07f +#define MATCH_VFNCVT_X_F_W 0x48089057 +#define MASK_VFNCVT_X_F_W 0xfc0ff07f +#define MATCH_VFNCVT_XU_F_W 0x48081057 +#define MASK_VFNCVT_XU_F_W 0xfc0ff07f +#define MATCH_VFNMACC_VF 0xb4005057 +#define MASK_VFNMACC_VF 0xfc00707f +#define MATCH_VFNMACC_VV 0xb4001057 +#define MASK_VFNMACC_VV 0xfc00707f +#define MATCH_VFNMADD_VF 0xa4005057 +#define MASK_VFNMADD_VF 0xfc00707f +#define MATCH_VFNMADD_VV 0xa4001057 +#define MASK_VFNMADD_VV 0xfc00707f +#define MATCH_VFNMSAC_VF 0xbc005057 +#define MASK_VFNMSAC_VF 0xfc00707f +#define MATCH_VFNMSAC_VV 0xbc001057 +#define MASK_VFNMSAC_VV 0xfc00707f +#define MATCH_VFNMSUB_VF 0xac005057 +#define MASK_VFNMSUB_VF 0xfc00707f +#define MATCH_VFNMSUB_VV 0xac001057 +#define MASK_VFNMSUB_VV 0xfc00707f +#define MATCH_VFRDIV_VF 0x84005057 +#define MASK_VFRDIV_VF 0xfc00707f +#define MATCH_VFREC7_V 0x4c029057 +#define MASK_VFREC7_V 0xfc0ff07f +#define MATCH_VFREDMAX_VS 0x1c001057 +#define MASK_VFREDMAX_VS 0xfc00707f +#define MATCH_VFREDMIN_VS 0x14001057 +#define MASK_VFREDMIN_VS 0xfc00707f +#define MATCH_VFREDOSUM_VS 0xc001057 +#define MASK_VFREDOSUM_VS 0xfc00707f +#define MATCH_VFREDUSUM_VS 0x4001057 +#define MASK_VFREDUSUM_VS 0xfc00707f +#define MATCH_VFRSQRT7_V 0x4c021057 +#define MASK_VFRSQRT7_V 0xfc0ff07f +#define MATCH_VFRSUB_VF 0x9c005057 +#define MASK_VFRSUB_VF 0xfc00707f +#define MATCH_VFSGNJ_VF 0x20005057 +#define MASK_VFSGNJ_VF 0xfc00707f +#define MATCH_VFSGNJ_VV 0x20001057 +#define MASK_VFSGNJ_VV 0xfc00707f +#define MATCH_VFSGNJN_VF 0x24005057 +#define MASK_VFSGNJN_VF 0xfc00707f +#define MATCH_VFSGNJN_VV 0x24001057 +#define MASK_VFSGNJN_VV 0xfc00707f +#define MATCH_VFSGNJX_VF 0x28005057 +#define MASK_VFSGNJX_VF 0xfc00707f +#define MATCH_VFSGNJX_VV 0x28001057 +#define MASK_VFSGNJX_VV 0xfc00707f +#define MATCH_VFSLIDE1DOWN_VF 0x3c005057 +#define MASK_VFSLIDE1DOWN_VF 0xfc00707f +#define MATCH_VFSLIDE1UP_VF 0x38005057 +#define MASK_VFSLIDE1UP_VF 0xfc00707f +#define MATCH_VFSQRT_V 0x4c001057 +#define MASK_VFSQRT_V 0xfc0ff07f +#define MATCH_VFSUB_VF 0x8005057 +#define MASK_VFSUB_VF 0xfc00707f +#define MATCH_VFSUB_VV 0x8001057 +#define MASK_VFSUB_VV 0xfc00707f +#define MATCH_VFWADD_VF 0xc0005057 +#define MASK_VFWADD_VF 0xfc00707f +#define MATCH_VFWADD_VV 0xc0001057 +#define MASK_VFWADD_VV 0xfc00707f +#define MATCH_VFWADD_WF 0xd0005057 +#define MASK_VFWADD_WF 0xfc00707f +#define MATCH_VFWADD_WV 0xd0001057 +#define MASK_VFWADD_WV 0xfc00707f +#define MATCH_VFWCVT_F_F_V 0x48061057 +#define MASK_VFWCVT_F_F_V 0xfc0ff07f +#define MATCH_VFWCVT_F_X_V 0x48059057 +#define MASK_VFWCVT_F_X_V 0xfc0ff07f +#define MATCH_VFWCVT_F_XU_V 0x48051057 +#define MASK_VFWCVT_F_XU_V 0xfc0ff07f +#define MATCH_VFWCVT_RTZ_X_F_V 0x48079057 +#define MASK_VFWCVT_RTZ_X_F_V 0xfc0ff07f +#define MATCH_VFWCVT_RTZ_XU_F_V 0x48071057 +#define MASK_VFWCVT_RTZ_XU_F_V 0xfc0ff07f +#define MATCH_VFWCVT_X_F_V 0x48049057 +#define MASK_VFWCVT_X_F_V 0xfc0ff07f +#define MATCH_VFWCVT_XU_F_V 0x48041057 +#define MASK_VFWCVT_XU_F_V 0xfc0ff07f +#define MATCH_VFWMACC_VF 0xf0005057 +#define MASK_VFWMACC_VF 0xfc00707f +#define MATCH_VFWMACC_VV 0xf0001057 +#define MASK_VFWMACC_VV 0xfc00707f +#define MATCH_VFWMSAC_VF 0xf8005057 +#define MASK_VFWMSAC_VF 0xfc00707f +#define MATCH_VFWMSAC_VV 0xf8001057 +#define MASK_VFWMSAC_VV 0xfc00707f +#define MATCH_VFWMUL_VF 0xe0005057 +#define MASK_VFWMUL_VF 0xfc00707f +#define MATCH_VFWMUL_VV 0xe0001057 +#define MASK_VFWMUL_VV 0xfc00707f +#define MATCH_VFWNMACC_VF 0xf4005057 +#define MASK_VFWNMACC_VF 0xfc00707f +#define MATCH_VFWNMACC_VV 0xf4001057 +#define MASK_VFWNMACC_VV 0xfc00707f +#define MATCH_VFWNMSAC_VF 0xfc005057 +#define MASK_VFWNMSAC_VF 0xfc00707f +#define MATCH_VFWNMSAC_VV 0xfc001057 +#define MASK_VFWNMSAC_VV 0xfc00707f +#define MATCH_VFWREDOSUM_VS 0xcc001057 +#define MASK_VFWREDOSUM_VS 0xfc00707f +#define MATCH_VFWREDUSUM_VS 0xc4001057 +#define MASK_VFWREDUSUM_VS 0xfc00707f +#define MATCH_VFWSUB_VF 0xc8005057 +#define MASK_VFWSUB_VF 0xfc00707f +#define MATCH_VFWSUB_VV 0xc8001057 +#define MASK_VFWSUB_VV 0xfc00707f +#define MATCH_VFWSUB_WF 0xd8005057 +#define MASK_VFWSUB_WF 0xfc00707f +#define MATCH_VFWSUB_WV 0xd8001057 +#define MASK_VFWSUB_WV 0xfc00707f +#define MATCH_VID_V 0x5008a057 +#define MASK_VID_V 0xfdfff07f +#define MATCH_VIOTA_M 0x50082057 +#define MASK_VIOTA_M 0xfc0ff07f +#define MATCH_VL1RE16_V 0x2805007 +#define MASK_VL1RE16_V 0xfff0707f +#define MATCH_VL1RE32_V 0x2806007 +#define MASK_VL1RE32_V 0xfff0707f +#define MATCH_VL1RE64_V 0x2807007 +#define MASK_VL1RE64_V 0xfff0707f +#define MATCH_VL1RE8_V 0x2800007 +#define MASK_VL1RE8_V 0xfff0707f +#define MATCH_VL2RE16_V 0x22805007 +#define MASK_VL2RE16_V 0xfff0707f +#define MATCH_VL2RE32_V 0x22806007 +#define MASK_VL2RE32_V 0xfff0707f +#define MATCH_VL2RE64_V 0x22807007 +#define MASK_VL2RE64_V 0xfff0707f +#define MATCH_VL2RE8_V 0x22800007 +#define MASK_VL2RE8_V 0xfff0707f +#define MATCH_VL4RE16_V 0x62805007 +#define MASK_VL4RE16_V 0xfff0707f +#define MATCH_VL4RE32_V 0x62806007 +#define MASK_VL4RE32_V 0xfff0707f +#define MATCH_VL4RE64_V 0x62807007 +#define MASK_VL4RE64_V 0xfff0707f +#define MATCH_VL4RE8_V 0x62800007 +#define MASK_VL4RE8_V 0xfff0707f +#define MATCH_VL8RE16_V 0xe2805007 +#define MASK_VL8RE16_V 0xfff0707f +#define MATCH_VL8RE32_V 0xe2806007 +#define MASK_VL8RE32_V 0xfff0707f +#define MATCH_VL8RE64_V 0xe2807007 +#define MASK_VL8RE64_V 0xfff0707f +#define MATCH_VL8RE8_V 0xe2800007 +#define MASK_VL8RE8_V 0xfff0707f +#define MATCH_VLE1024_V 0x10007007 +#define MASK_VLE1024_V 0x1df0707f +#define MATCH_VLE1024FF_V 0x11007007 +#define MASK_VLE1024FF_V 0x1df0707f +#define MATCH_VLE128_V 0x10000007 +#define MASK_VLE128_V 0x1df0707f +#define MATCH_VLE128FF_V 0x11000007 +#define MASK_VLE128FF_V 0x1df0707f +#define MATCH_VLE16_V 0x5007 +#define MASK_VLE16_V 0x1df0707f +#define MATCH_VLE16FF_V 0x1005007 +#define MASK_VLE16FF_V 0x1df0707f +#define MATCH_VLE256_V 0x10005007 +#define MASK_VLE256_V 0x1df0707f +#define MATCH_VLE256FF_V 0x11005007 +#define MASK_VLE256FF_V 0x1df0707f +#define MATCH_VLE32_V 0x6007 +#define MASK_VLE32_V 0x1df0707f +#define MATCH_VLE32FF_V 0x1006007 +#define MASK_VLE32FF_V 0x1df0707f +#define MATCH_VLE512_V 0x10006007 +#define MASK_VLE512_V 0x1df0707f +#define MATCH_VLE512FF_V 0x11006007 +#define MASK_VLE512FF_V 0x1df0707f +#define MATCH_VLE64_V 0x7007 +#define MASK_VLE64_V 0x1df0707f +#define MATCH_VLE64FF_V 0x1007007 +#define MASK_VLE64FF_V 0x1df0707f +#define MATCH_VLE8_V 0x7 +#define MASK_VLE8_V 0x1df0707f +#define MATCH_VLE8FF_V 0x1000007 +#define MASK_VLE8FF_V 0x1df0707f +#define MATCH_VLM_V 0x2b00007 +#define MASK_VLM_V 0xfff0707f +#define MATCH_VLOXEI1024_V 0x1c007007 +#define MASK_VLOXEI1024_V 0x1c00707f +#define MATCH_VLOXEI128_V 0x1c000007 +#define MASK_VLOXEI128_V 0x1c00707f +#define MATCH_VLOXEI16_V 0xc005007 +#define MASK_VLOXEI16_V 0x1c00707f +#define MATCH_VLOXEI256_V 0x1c005007 +#define MASK_VLOXEI256_V 0x1c00707f +#define MATCH_VLOXEI32_V 0xc006007 +#define MASK_VLOXEI32_V 0x1c00707f +#define MATCH_VLOXEI512_V 0x1c006007 +#define MASK_VLOXEI512_V 0x1c00707f +#define MATCH_VLOXEI64_V 0xc007007 +#define MASK_VLOXEI64_V 0x1c00707f +#define MATCH_VLOXEI8_V 0xc000007 +#define MASK_VLOXEI8_V 0x1c00707f +#define MATCH_VLSE1024_V 0x18007007 +#define MASK_VLSE1024_V 0x1c00707f +#define MATCH_VLSE128_V 0x18000007 +#define MASK_VLSE128_V 0x1c00707f +#define MATCH_VLSE16_V 0x8005007 +#define MASK_VLSE16_V 0x1c00707f +#define MATCH_VLSE256_V 0x18005007 +#define MASK_VLSE256_V 0x1c00707f +#define MATCH_VLSE32_V 0x8006007 +#define MASK_VLSE32_V 0x1c00707f +#define MATCH_VLSE512_V 0x18006007 +#define MASK_VLSE512_V 0x1c00707f +#define MATCH_VLSE64_V 0x8007007 +#define MASK_VLSE64_V 0x1c00707f +#define MATCH_VLSE8_V 0x8000007 +#define MASK_VLSE8_V 0x1c00707f +#define MATCH_VLUXEI1024_V 0x14007007 +#define MASK_VLUXEI1024_V 0x1c00707f +#define MATCH_VLUXEI128_V 0x14000007 +#define MASK_VLUXEI128_V 0x1c00707f +#define MATCH_VLUXEI16_V 0x4005007 +#define MASK_VLUXEI16_V 0x1c00707f +#define MATCH_VLUXEI256_V 0x14005007 +#define MASK_VLUXEI256_V 0x1c00707f +#define MATCH_VLUXEI32_V 0x4006007 +#define MASK_VLUXEI32_V 0x1c00707f +#define MATCH_VLUXEI512_V 0x14006007 +#define MASK_VLUXEI512_V 0x1c00707f +#define MATCH_VLUXEI64_V 0x4007007 +#define MASK_VLUXEI64_V 0x1c00707f +#define MATCH_VLUXEI8_V 0x4000007 +#define MASK_VLUXEI8_V 0x1c00707f +#define MATCH_VMACC_VV 0xb4002057 +#define MASK_VMACC_VV 0xfc00707f +#define MATCH_VMACC_VX 0xb4006057 +#define MASK_VMACC_VX 0xfc00707f +#define MATCH_VMADC_VI 0x46003057 +#define MASK_VMADC_VI 0xfe00707f +#define MATCH_VMADC_VIM 0x44003057 +#define MASK_VMADC_VIM 0xfe00707f +#define MATCH_VMADC_VV 0x46000057 +#define MASK_VMADC_VV 0xfe00707f +#define MATCH_VMADC_VVM 0x44000057 +#define MASK_VMADC_VVM 0xfe00707f +#define MATCH_VMADC_VX 0x46004057 +#define MASK_VMADC_VX 0xfe00707f +#define MATCH_VMADC_VXM 0x44004057 +#define MASK_VMADC_VXM 0xfe00707f +#define MATCH_VMADD_VV 0xa4002057 +#define MASK_VMADD_VV 0xfc00707f +#define MATCH_VMADD_VX 0xa4006057 +#define MASK_VMADD_VX 0xfc00707f +#define MATCH_VMAND_MM 0x64002057 +#define MASK_VMAND_MM 0xfc00707f +#define MATCH_VMANDN_MM 0x60002057 +#define MASK_VMANDN_MM 0xfc00707f +#define MATCH_VMAX_VV 0x1c000057 +#define MASK_VMAX_VV 0xfc00707f +#define MATCH_VMAX_VX 0x1c004057 +#define MASK_VMAX_VX 0xfc00707f +#define MATCH_VMAXU_VV 0x18000057 +#define MASK_VMAXU_VV 0xfc00707f +#define MATCH_VMAXU_VX 0x18004057 +#define MASK_VMAXU_VX 0xfc00707f +#define MATCH_VMERGE_VIM 0x5c003057 +#define MASK_VMERGE_VIM 0xfe00707f +#define MATCH_VMERGE_VVM 0x5c000057 +#define MASK_VMERGE_VVM 0xfe00707f +#define MATCH_VMERGE_VXM 0x5c004057 +#define MASK_VMERGE_VXM 0xfe00707f +#define MATCH_VMFEQ_VF 0x60005057 +#define MASK_VMFEQ_VF 0xfc00707f +#define MATCH_VMFEQ_VV 0x60001057 +#define MASK_VMFEQ_VV 0xfc00707f +#define MATCH_VMFGE_VF 0x7c005057 +#define MASK_VMFGE_VF 0xfc00707f +#define MATCH_VMFGT_VF 0x74005057 +#define MASK_VMFGT_VF 0xfc00707f +#define MATCH_VMFLE_VF 0x64005057 +#define MASK_VMFLE_VF 0xfc00707f +#define MATCH_VMFLE_VV 0x64001057 +#define MASK_VMFLE_VV 0xfc00707f +#define MATCH_VMFLT_VF 0x6c005057 +#define MASK_VMFLT_VF 0xfc00707f +#define MATCH_VMFLT_VV 0x6c001057 +#define MASK_VMFLT_VV 0xfc00707f +#define MATCH_VMFNE_VF 0x70005057 +#define MASK_VMFNE_VF 0xfc00707f +#define MATCH_VMFNE_VV 0x70001057 +#define MASK_VMFNE_VV 0xfc00707f +#define MATCH_VMIN_VV 0x14000057 +#define MASK_VMIN_VV 0xfc00707f +#define MATCH_VMIN_VX 0x14004057 +#define MASK_VMIN_VX 0xfc00707f +#define MATCH_VMINU_VV 0x10000057 +#define MASK_VMINU_VV 0xfc00707f +#define MATCH_VMINU_VX 0x10004057 +#define MASK_VMINU_VX 0xfc00707f +#define MATCH_VMNAND_MM 0x74002057 +#define MASK_VMNAND_MM 0xfc00707f +#define MATCH_VMNOR_MM 0x78002057 +#define MASK_VMNOR_MM 0xfc00707f +#define MATCH_VMOR_MM 0x68002057 +#define MASK_VMOR_MM 0xfc00707f +#define MATCH_VMORN_MM 0x70002057 +#define MASK_VMORN_MM 0xfc00707f +#define MATCH_VMSBC_VV 0x4e000057 +#define MASK_VMSBC_VV 0xfe00707f +#define MATCH_VMSBC_VVM 0x4c000057 +#define MASK_VMSBC_VVM 0xfe00707f +#define MATCH_VMSBC_VX 0x4e004057 +#define MASK_VMSBC_VX 0xfe00707f +#define MATCH_VMSBC_VXM 0x4c004057 +#define MASK_VMSBC_VXM 0xfe00707f +#define MATCH_VMSBF_M 0x5000a057 +#define MASK_VMSBF_M 0xfc0ff07f +#define MATCH_VMSEQ_VI 0x60003057 +#define MASK_VMSEQ_VI 0xfc00707f +#define MATCH_VMSEQ_VV 0x60000057 +#define MASK_VMSEQ_VV 0xfc00707f +#define MATCH_VMSEQ_VX 0x60004057 +#define MASK_VMSEQ_VX 0xfc00707f +#define MATCH_VMSGT_VI 0x7c003057 +#define MASK_VMSGT_VI 0xfc00707f +#define MATCH_VMSGT_VX 0x7c004057 +#define MASK_VMSGT_VX 0xfc00707f +#define MATCH_VMSGTU_VI 0x78003057 +#define MASK_VMSGTU_VI 0xfc00707f +#define MATCH_VMSGTU_VX 0x78004057 +#define MASK_VMSGTU_VX 0xfc00707f +#define MATCH_VMSIF_M 0x5001a057 +#define MASK_VMSIF_M 0xfc0ff07f +#define MATCH_VMSLE_VI 0x74003057 +#define MASK_VMSLE_VI 0xfc00707f +#define MATCH_VMSLE_VV 0x74000057 +#define MASK_VMSLE_VV 0xfc00707f +#define MATCH_VMSLE_VX 0x74004057 +#define MASK_VMSLE_VX 0xfc00707f +#define MATCH_VMSLEU_VI 0x70003057 +#define MASK_VMSLEU_VI 0xfc00707f +#define MATCH_VMSLEU_VV 0x70000057 +#define MASK_VMSLEU_VV 0xfc00707f +#define MATCH_VMSLEU_VX 0x70004057 +#define MASK_VMSLEU_VX 0xfc00707f +#define MATCH_VMSLT_VV 0x6c000057 +#define MASK_VMSLT_VV 0xfc00707f +#define MATCH_VMSLT_VX 0x6c004057 +#define MASK_VMSLT_VX 0xfc00707f +#define MATCH_VMSLTU_VV 0x68000057 +#define MASK_VMSLTU_VV 0xfc00707f +#define MATCH_VMSLTU_VX 0x68004057 +#define MASK_VMSLTU_VX 0xfc00707f +#define MATCH_VMSNE_VI 0x64003057 +#define MASK_VMSNE_VI 0xfc00707f +#define MATCH_VMSNE_VV 0x64000057 +#define MASK_VMSNE_VV 0xfc00707f +#define MATCH_VMSNE_VX 0x64004057 +#define MASK_VMSNE_VX 0xfc00707f +#define MATCH_VMSOF_M 0x50012057 +#define MASK_VMSOF_M 0xfc0ff07f +#define MATCH_VMUL_VV 0x94002057 +#define MASK_VMUL_VV 0xfc00707f +#define MATCH_VMUL_VX 0x94006057 +#define MASK_VMUL_VX 0xfc00707f +#define MATCH_VMULH_VV 0x9c002057 +#define MASK_VMULH_VV 0xfc00707f +#define MATCH_VMULH_VX 0x9c006057 +#define MASK_VMULH_VX 0xfc00707f +#define MATCH_VMULHSU_VV 0x98002057 +#define MASK_VMULHSU_VV 0xfc00707f +#define MATCH_VMULHSU_VX 0x98006057 +#define MASK_VMULHSU_VX 0xfc00707f +#define MATCH_VMULHU_VV 0x90002057 +#define MASK_VMULHU_VV 0xfc00707f +#define MATCH_VMULHU_VX 0x90006057 +#define MASK_VMULHU_VX 0xfc00707f +#define MATCH_VMV1R_V 0x9e003057 +#define MASK_VMV1R_V 0xfe0ff07f +#define MATCH_VMV2R_V 0x9e00b057 +#define MASK_VMV2R_V 0xfe0ff07f +#define MATCH_VMV4R_V 0x9e01b057 +#define MASK_VMV4R_V 0xfe0ff07f +#define MATCH_VMV8R_V 0x9e03b057 +#define MASK_VMV8R_V 0xfe0ff07f +#define MATCH_VMV_S_X 0x42006057 +#define MASK_VMV_S_X 0xfff0707f +#define MATCH_VMV_V_I 0x5e003057 +#define MASK_VMV_V_I 0xfff0707f +#define MATCH_VMV_V_V 0x5e000057 +#define MASK_VMV_V_V 0xfff0707f +#define MATCH_VMV_V_X 0x5e004057 +#define MASK_VMV_V_X 0xfff0707f +#define MATCH_VMV_X_S 0x42002057 +#define MASK_VMV_X_S 0xfe0ff07f +#define MATCH_VMXNOR_MM 0x7c002057 +#define MASK_VMXNOR_MM 0xfc00707f +#define MATCH_VMXOR_MM 0x6c002057 +#define MASK_VMXOR_MM 0xfc00707f +#define MATCH_VNCLIP_WI 0xbc003057 +#define MASK_VNCLIP_WI 0xfc00707f +#define MATCH_VNCLIP_WV 0xbc000057 +#define MASK_VNCLIP_WV 0xfc00707f +#define MATCH_VNCLIP_WX 0xbc004057 +#define MASK_VNCLIP_WX 0xfc00707f +#define MATCH_VNCLIPU_WI 0xb8003057 +#define MASK_VNCLIPU_WI 0xfc00707f +#define MATCH_VNCLIPU_WV 0xb8000057 +#define MASK_VNCLIPU_WV 0xfc00707f +#define MATCH_VNCLIPU_WX 0xb8004057 +#define MASK_VNCLIPU_WX 0xfc00707f +#define MATCH_VNMSAC_VV 0xbc002057 +#define MASK_VNMSAC_VV 0xfc00707f +#define MATCH_VNMSAC_VX 0xbc006057 +#define MASK_VNMSAC_VX 0xfc00707f +#define MATCH_VNMSUB_VV 0xac002057 +#define MASK_VNMSUB_VV 0xfc00707f +#define MATCH_VNMSUB_VX 0xac006057 +#define MASK_VNMSUB_VX 0xfc00707f +#define MATCH_VNSRA_WI 0xb4003057 +#define MASK_VNSRA_WI 0xfc00707f +#define MATCH_VNSRA_WV 0xb4000057 +#define MASK_VNSRA_WV 0xfc00707f +#define MATCH_VNSRA_WX 0xb4004057 +#define MASK_VNSRA_WX 0xfc00707f +#define MATCH_VNSRL_WI 0xb0003057 +#define MASK_VNSRL_WI 0xfc00707f +#define MATCH_VNSRL_WV 0xb0000057 +#define MASK_VNSRL_WV 0xfc00707f +#define MATCH_VNSRL_WX 0xb0004057 +#define MASK_VNSRL_WX 0xfc00707f +#define MATCH_VOR_VI 0x28003057 +#define MASK_VOR_VI 0xfc00707f +#define MATCH_VOR_VV 0x28000057 +#define MASK_VOR_VV 0xfc00707f +#define MATCH_VOR_VX 0x28004057 +#define MASK_VOR_VX 0xfc00707f +#define MATCH_VREDAND_VS 0x4002057 +#define MASK_VREDAND_VS 0xfc00707f +#define MATCH_VREDMAX_VS 0x1c002057 +#define MASK_VREDMAX_VS 0xfc00707f +#define MATCH_VREDMAXU_VS 0x18002057 +#define MASK_VREDMAXU_VS 0xfc00707f +#define MATCH_VREDMIN_VS 0x14002057 +#define MASK_VREDMIN_VS 0xfc00707f +#define MATCH_VREDMINU_VS 0x10002057 +#define MASK_VREDMINU_VS 0xfc00707f +#define MATCH_VREDOR_VS 0x8002057 +#define MASK_VREDOR_VS 0xfc00707f +#define MATCH_VREDSUM_VS 0x2057 +#define MASK_VREDSUM_VS 0xfc00707f +#define MATCH_VREDXOR_VS 0xc002057 +#define MASK_VREDXOR_VS 0xfc00707f +#define MATCH_VREM_VV 0x8c002057 +#define MASK_VREM_VV 0xfc00707f +#define MATCH_VREM_VX 0x8c006057 +#define MASK_VREM_VX 0xfc00707f +#define MATCH_VREMU_VV 0x88002057 +#define MASK_VREMU_VV 0xfc00707f +#define MATCH_VREMU_VX 0x88006057 +#define MASK_VREMU_VX 0xfc00707f +#define MATCH_VRGATHER_VI 0x30003057 +#define MASK_VRGATHER_VI 0xfc00707f +#define MATCH_VRGATHER_VV 0x30000057 +#define MASK_VRGATHER_VV 0xfc00707f +#define MATCH_VRGATHER_VX 0x30004057 +#define MASK_VRGATHER_VX 0xfc00707f +#define MATCH_VRGATHEREI16_VV 0x38000057 +#define MASK_VRGATHEREI16_VV 0xfc00707f +#define MATCH_VRSUB_VI 0xc003057 +#define MASK_VRSUB_VI 0xfc00707f +#define MATCH_VRSUB_VX 0xc004057 +#define MASK_VRSUB_VX 0xfc00707f +#define MATCH_VS1R_V 0x2800027 +#define MASK_VS1R_V 0xfff0707f +#define MATCH_VS2R_V 0x22800027 +#define MASK_VS2R_V 0xfff0707f +#define MATCH_VS4R_V 0x62800027 +#define MASK_VS4R_V 0xfff0707f +#define MATCH_VS8R_V 0xe2800027 +#define MASK_VS8R_V 0xfff0707f +#define MATCH_VSADD_VI 0x84003057 +#define MASK_VSADD_VI 0xfc00707f +#define MATCH_VSADD_VV 0x84000057 +#define MASK_VSADD_VV 0xfc00707f +#define MATCH_VSADD_VX 0x84004057 +#define MASK_VSADD_VX 0xfc00707f +#define MATCH_VSADDU_VI 0x80003057 +#define MASK_VSADDU_VI 0xfc00707f +#define MATCH_VSADDU_VV 0x80000057 +#define MASK_VSADDU_VV 0xfc00707f +#define MATCH_VSADDU_VX 0x80004057 +#define MASK_VSADDU_VX 0xfc00707f +#define MATCH_VSBC_VVM 0x48000057 +#define MASK_VSBC_VVM 0xfe00707f +#define MATCH_VSBC_VXM 0x48004057 +#define MASK_VSBC_VXM 0xfe00707f +#define MATCH_VSE1024_V 0x10007027 +#define MASK_VSE1024_V 0x1df0707f +#define MATCH_VSE128_V 0x10000027 +#define MASK_VSE128_V 0x1df0707f +#define MATCH_VSE16_V 0x5027 +#define MASK_VSE16_V 0x1df0707f +#define MATCH_VSE256_V 0x10005027 +#define MASK_VSE256_V 0x1df0707f +#define MATCH_VSE32_V 0x6027 +#define MASK_VSE32_V 0x1df0707f +#define MATCH_VSE512_V 0x10006027 +#define MASK_VSE512_V 0x1df0707f +#define MATCH_VSE64_V 0x7027 +#define MASK_VSE64_V 0x1df0707f +#define MATCH_VSE8_V 0x27 +#define MASK_VSE8_V 0x1df0707f +#define MATCH_VSETIVLI 0xc0007057 +#define MASK_VSETIVLI 0xc000707f +#define MATCH_VSETVL 0x80007057 +#define MASK_VSETVL 0xfe00707f +#define MATCH_VSETVLI 0x7057 +#define MASK_VSETVLI 0x8000707f +#define MATCH_VSEXT_VF2 0x4803a057 +#define MASK_VSEXT_VF2 0xfc0ff07f +#define MATCH_VSEXT_VF4 0x4802a057 +#define MASK_VSEXT_VF4 0xfc0ff07f +#define MATCH_VSEXT_VF8 0x4801a057 +#define MASK_VSEXT_VF8 0xfc0ff07f +#define MATCH_VSLIDE1DOWN_VX 0x3c006057 +#define MASK_VSLIDE1DOWN_VX 0xfc00707f +#define MATCH_VSLIDE1UP_VX 0x38006057 +#define MASK_VSLIDE1UP_VX 0xfc00707f +#define MATCH_VSLIDEDOWN_VI 0x3c003057 +#define MASK_VSLIDEDOWN_VI 0xfc00707f +#define MATCH_VSLIDEDOWN_VX 0x3c004057 +#define MASK_VSLIDEDOWN_VX 0xfc00707f +#define MATCH_VSLIDEUP_VI 0x38003057 +#define MASK_VSLIDEUP_VI 0xfc00707f +#define MATCH_VSLIDEUP_VX 0x38004057 +#define MASK_VSLIDEUP_VX 0xfc00707f +#define MATCH_VSLL_VI 0x94003057 +#define MASK_VSLL_VI 0xfc00707f +#define MATCH_VSLL_VV 0x94000057 +#define MASK_VSLL_VV 0xfc00707f +#define MATCH_VSLL_VX 0x94004057 +#define MASK_VSLL_VX 0xfc00707f +#define MATCH_VSM_V 0x2b00027 +#define MASK_VSM_V 0xfff0707f +#define MATCH_VSMUL_VV 0x9c000057 +#define MASK_VSMUL_VV 0xfc00707f +#define MATCH_VSMUL_VX 0x9c004057 +#define MASK_VSMUL_VX 0xfc00707f +#define MATCH_VSOXEI1024_V 0x1c007027 +#define MASK_VSOXEI1024_V 0x1c00707f +#define MATCH_VSOXEI128_V 0x1c000027 +#define MASK_VSOXEI128_V 0x1c00707f +#define MATCH_VSOXEI16_V 0xc005027 +#define MASK_VSOXEI16_V 0x1c00707f +#define MATCH_VSOXEI256_V 0x1c005027 +#define MASK_VSOXEI256_V 0x1c00707f +#define MATCH_VSOXEI32_V 0xc006027 +#define MASK_VSOXEI32_V 0x1c00707f +#define MATCH_VSOXEI512_V 0x1c006027 +#define MASK_VSOXEI512_V 0x1c00707f +#define MATCH_VSOXEI64_V 0xc007027 +#define MASK_VSOXEI64_V 0x1c00707f +#define MATCH_VSOXEI8_V 0xc000027 +#define MASK_VSOXEI8_V 0x1c00707f +#define MATCH_VSRA_VI 0xa4003057 +#define MASK_VSRA_VI 0xfc00707f +#define MATCH_VSRA_VV 0xa4000057 +#define MASK_VSRA_VV 0xfc00707f +#define MATCH_VSRA_VX 0xa4004057 +#define MASK_VSRA_VX 0xfc00707f +#define MATCH_VSRL_VI 0xa0003057 +#define MASK_VSRL_VI 0xfc00707f +#define MATCH_VSRL_VV 0xa0000057 +#define MASK_VSRL_VV 0xfc00707f +#define MATCH_VSRL_VX 0xa0004057 +#define MASK_VSRL_VX 0xfc00707f +#define MATCH_VSSE1024_V 0x18007027 +#define MASK_VSSE1024_V 0x1c00707f +#define MATCH_VSSE128_V 0x18000027 +#define MASK_VSSE128_V 0x1c00707f +#define MATCH_VSSE16_V 0x8005027 +#define MASK_VSSE16_V 0x1c00707f +#define MATCH_VSSE256_V 0x18005027 +#define MASK_VSSE256_V 0x1c00707f +#define MATCH_VSSE32_V 0x8006027 +#define MASK_VSSE32_V 0x1c00707f +#define MATCH_VSSE512_V 0x18006027 +#define MASK_VSSE512_V 0x1c00707f +#define MATCH_VSSE64_V 0x8007027 +#define MASK_VSSE64_V 0x1c00707f +#define MATCH_VSSE8_V 0x8000027 +#define MASK_VSSE8_V 0x1c00707f +#define MATCH_VSSRA_VI 0xac003057 +#define MASK_VSSRA_VI 0xfc00707f +#define MATCH_VSSRA_VV 0xac000057 +#define MASK_VSSRA_VV 0xfc00707f +#define MATCH_VSSRA_VX 0xac004057 +#define MASK_VSSRA_VX 0xfc00707f +#define MATCH_VSSRL_VI 0xa8003057 +#define MASK_VSSRL_VI 0xfc00707f +#define MATCH_VSSRL_VV 0xa8000057 +#define MASK_VSSRL_VV 0xfc00707f +#define MATCH_VSSRL_VX 0xa8004057 +#define MASK_VSSRL_VX 0xfc00707f +#define MATCH_VSSUB_VV 0x8c000057 +#define MASK_VSSUB_VV 0xfc00707f +#define MATCH_VSSUB_VX 0x8c004057 +#define MASK_VSSUB_VX 0xfc00707f +#define MATCH_VSSUBU_VV 0x88000057 +#define MASK_VSSUBU_VV 0xfc00707f +#define MATCH_VSSUBU_VX 0x88004057 +#define MASK_VSSUBU_VX 0xfc00707f +#define MATCH_VSUB_VV 0x8000057 +#define MASK_VSUB_VV 0xfc00707f +#define MATCH_VSUB_VX 0x8004057 +#define MASK_VSUB_VX 0xfc00707f +#define MATCH_VSUXEI1024_V 0x14007027 +#define MASK_VSUXEI1024_V 0x1c00707f +#define MATCH_VSUXEI128_V 0x14000027 +#define MASK_VSUXEI128_V 0x1c00707f +#define MATCH_VSUXEI16_V 0x4005027 +#define MASK_VSUXEI16_V 0x1c00707f +#define MATCH_VSUXEI256_V 0x14005027 +#define MASK_VSUXEI256_V 0x1c00707f +#define MATCH_VSUXEI32_V 0x4006027 +#define MASK_VSUXEI32_V 0x1c00707f +#define MATCH_VSUXEI512_V 0x14006027 +#define MASK_VSUXEI512_V 0x1c00707f +#define MATCH_VSUXEI64_V 0x4007027 +#define MASK_VSUXEI64_V 0x1c00707f +#define MATCH_VSUXEI8_V 0x4000027 +#define MASK_VSUXEI8_V 0x1c00707f +#define MATCH_VWADD_VV 0xc4002057 +#define MASK_VWADD_VV 0xfc00707f +#define MATCH_VWADD_VX 0xc4006057 +#define MASK_VWADD_VX 0xfc00707f +#define MATCH_VWADD_WV 0xd4002057 +#define MASK_VWADD_WV 0xfc00707f +#define MATCH_VWADD_WX 0xd4006057 +#define MASK_VWADD_WX 0xfc00707f +#define MATCH_VWADDU_VV 0xc0002057 +#define MASK_VWADDU_VV 0xfc00707f +#define MATCH_VWADDU_VX 0xc0006057 +#define MASK_VWADDU_VX 0xfc00707f +#define MATCH_VWADDU_WV 0xd0002057 +#define MASK_VWADDU_WV 0xfc00707f +#define MATCH_VWADDU_WX 0xd0006057 +#define MASK_VWADDU_WX 0xfc00707f +#define MATCH_VWMACC_VV 0xf4002057 +#define MASK_VWMACC_VV 0xfc00707f +#define MATCH_VWMACC_VX 0xf4006057 +#define MASK_VWMACC_VX 0xfc00707f +#define MATCH_VWMACCSU_VV 0xfc002057 +#define MASK_VWMACCSU_VV 0xfc00707f +#define MATCH_VWMACCSU_VX 0xfc006057 +#define MASK_VWMACCSU_VX 0xfc00707f +#define MATCH_VWMACCU_VV 0xf0002057 +#define MASK_VWMACCU_VV 0xfc00707f +#define MATCH_VWMACCU_VX 0xf0006057 +#define MASK_VWMACCU_VX 0xfc00707f +#define MATCH_VWMACCUS_VX 0xf8006057 +#define MASK_VWMACCUS_VX 0xfc00707f +#define MATCH_VWMUL_VV 0xec002057 +#define MASK_VWMUL_VV 0xfc00707f +#define MATCH_VWMUL_VX 0xec006057 +#define MASK_VWMUL_VX 0xfc00707f +#define MATCH_VWMULSU_VV 0xe8002057 +#define MASK_VWMULSU_VV 0xfc00707f +#define MATCH_VWMULSU_VX 0xe8006057 +#define MASK_VWMULSU_VX 0xfc00707f +#define MATCH_VWMULU_VV 0xe0002057 +#define MASK_VWMULU_VV 0xfc00707f +#define MATCH_VWMULU_VX 0xe0006057 +#define MASK_VWMULU_VX 0xfc00707f +#define MATCH_VWREDSUM_VS 0xc4000057 +#define MASK_VWREDSUM_VS 0xfc00707f +#define MATCH_VWREDSUMU_VS 0xc0000057 +#define MASK_VWREDSUMU_VS 0xfc00707f +#define MATCH_VWSUB_VV 0xcc002057 +#define MASK_VWSUB_VV 0xfc00707f +#define MATCH_VWSUB_VX 0xcc006057 +#define MASK_VWSUB_VX 0xfc00707f +#define MATCH_VWSUB_WV 0xdc002057 +#define MASK_VWSUB_WV 0xfc00707f +#define MATCH_VWSUB_WX 0xdc006057 +#define MASK_VWSUB_WX 0xfc00707f +#define MATCH_VWSUBU_VV 0xc8002057 +#define MASK_VWSUBU_VV 0xfc00707f +#define MATCH_VWSUBU_VX 0xc8006057 +#define MASK_VWSUBU_VX 0xfc00707f +#define MATCH_VWSUBU_WV 0xd8002057 +#define MASK_VWSUBU_WV 0xfc00707f +#define MATCH_VWSUBU_WX 0xd8006057 +#define MASK_VWSUBU_WX 0xfc00707f +#define MATCH_VXOR_VI 0x2c003057 +#define MASK_VXOR_VI 0xfc00707f +#define MATCH_VXOR_VV 0x2c000057 +#define MASK_VXOR_VV 0xfc00707f +#define MATCH_VXOR_VX 0x2c004057 +#define MASK_VXOR_VX 0xfc00707f +#define MATCH_VZEXT_VF2 0x48032057 +#define MASK_VZEXT_VF2 0xfc0ff07f +#define MATCH_VZEXT_VF4 0x48022057 +#define MASK_VZEXT_VF4 0xfc0ff07f +#define MATCH_VZEXT_VF8 0x48012057 +#define MASK_VZEXT_VF8 0xfc0ff07f +#define MATCH_WFI 0x10500073 +#define MASK_WFI 0xffffffff +#define MATCH_WRS_NTO 0xd00073 +#define MASK_WRS_NTO 0xffffffff +#define MATCH_WRS_STO 0x1d00073 +#define MASK_WRS_STO 0xffffffff +#define MATCH_XNOR 0x40004033 +#define MASK_XNOR 0xfe00707f +#define MATCH_XOR 0x4033 +#define MASK_XOR 0xfe00707f +#define MATCH_XORI 0x4013 +#define MASK_XORI 0x707f +#define MATCH_XPERM16 0x28006033 +#define MASK_XPERM16 0xfe00707f +#define MATCH_XPERM32 0x28000033 +#define MASK_XPERM32 0xfe00707f +#define MATCH_XPERM4 0x28002033 +#define MASK_XPERM4 0xfe00707f +#define MATCH_XPERM8 0x28004033 +#define MASK_XPERM8 0xfe00707f +#define MATCH_ZUNPKD810 0xacc00077 +#define MASK_ZUNPKD810 0xfff0707f +#define MATCH_ZUNPKD820 0xacd00077 +#define MASK_ZUNPKD820 0xfff0707f +#define MATCH_ZUNPKD830 0xace00077 +#define MASK_ZUNPKD830 0xfff0707f +#define MATCH_ZUNPKD831 0xacf00077 +#define MASK_ZUNPKD831 0xfff0707f +#define MATCH_ZUNPKD832 0xad700077 +#define MASK_ZUNPKD832 0xfff0707f + +#define CSR_FFLAGS 0x1 +#define CSR_FRM 0x2 +#define CSR_FCSR 0x3 +#define CSR_VSTART 0x8 +#define CSR_VXSAT 0x9 +#define CSR_VXRM 0xa +#define CSR_VCSR 0xf +#define CSR_SEED 0x15 +#define CSR_JVT 0x17 +#define CSR_CYCLE 0xc00 +#define CSR_TIME 0xc01 +#define CSR_INSTRET 0xc02 +#define CSR_HPMCOUNTER3 0xc03 +#define CSR_HPMCOUNTER4 0xc04 +#define CSR_HPMCOUNTER5 0xc05 +#define CSR_HPMCOUNTER6 0xc06 +#define CSR_HPMCOUNTER7 0xc07 +#define CSR_HPMCOUNTER8 0xc08 +#define CSR_HPMCOUNTER9 0xc09 +#define CSR_HPMCOUNTER10 0xc0a +#define CSR_HPMCOUNTER11 0xc0b +#define CSR_HPMCOUNTER12 0xc0c +#define CSR_HPMCOUNTER13 0xc0d +#define CSR_HPMCOUNTER14 0xc0e +#define CSR_HPMCOUNTER15 0xc0f +#define CSR_HPMCOUNTER16 0xc10 +#define CSR_HPMCOUNTER17 0xc11 +#define CSR_HPMCOUNTER18 0xc12 +#define CSR_HPMCOUNTER19 0xc13 +#define CSR_HPMCOUNTER20 0xc14 +#define CSR_HPMCOUNTER21 0xc15 +#define CSR_HPMCOUNTER22 0xc16 +#define CSR_HPMCOUNTER23 0xc17 +#define CSR_HPMCOUNTER24 0xc18 +#define CSR_HPMCOUNTER25 0xc19 +#define CSR_HPMCOUNTER26 0xc1a +#define CSR_HPMCOUNTER27 0xc1b +#define CSR_HPMCOUNTER28 0xc1c +#define CSR_HPMCOUNTER29 0xc1d +#define CSR_HPMCOUNTER30 0xc1e +#define CSR_HPMCOUNTER31 0xc1f +#define CSR_VL 0xc20 +#define CSR_VTYPE 0xc21 +#define CSR_VLENB 0xc22 +#define CSR_SSTATUS 0x100 +#define CSR_SEDELEG 0x102 +#define CSR_SIDELEG 0x103 +#define CSR_SIE 0x104 +#define CSR_STVEC 0x105 +#define CSR_SCOUNTEREN 0x106 +#define CSR_SENVCFG 0x10a +#define CSR_SSTATEEN0 0x10c +#define CSR_SSTATEEN1 0x10d +#define CSR_SSTATEEN2 0x10e +#define CSR_SSTATEEN3 0x10f +#define CSR_SSCRATCH 0x140 +#define CSR_SEPC 0x141 +#define CSR_SCAUSE 0x142 +#define CSR_STVAL 0x143 +#define CSR_SIP 0x144 +#define CSR_STIMECMP 0x14d +#define CSR_SISELECT 0x150 +#define CSR_SIREG 0x151 +#define CSR_STOPEI 0x15c +#define CSR_SATP 0x180 +#define CSR_SCONTEXT 0x5a8 +#define CSR_VSSTATUS 0x200 +#define CSR_VSIE 0x204 +#define CSR_VSTVEC 0x205 +#define CSR_VSSCRATCH 0x240 +#define CSR_VSEPC 0x241 +#define CSR_VSCAUSE 0x242 +#define CSR_VSTVAL 0x243 +#define CSR_VSIP 0x244 +#define CSR_VSTIMECMP 0x24d +#define CSR_VSISELECT 0x250 +#define CSR_VSIREG 0x251 +#define CSR_VSTOPEI 0x25c +#define CSR_VSATP 0x280 +#define CSR_HSTATUS 0x600 +#define CSR_HEDELEG 0x602 +#define CSR_HIDELEG 0x603 +#define CSR_HIE 0x604 +#define CSR_HTIMEDELTA 0x605 +#define CSR_HCOUNTEREN 0x606 +#define CSR_HGEIE 0x607 +#define CSR_HVIEN 0x608 +#define CSR_HVICTL 0x609 +#define CSR_HENVCFG 0x60a +#define CSR_HSTATEEN0 0x60c +#define CSR_HSTATEEN1 0x60d +#define CSR_HSTATEEN2 0x60e +#define CSR_HSTATEEN3 0x60f +#define CSR_HTVAL 0x643 +#define CSR_HIP 0x644 +#define CSR_HVIP 0x645 +#define CSR_HVIPRIO1 0x646 +#define CSR_HVIPRIO2 0x647 +#define CSR_HTINST 0x64a +#define CSR_HGATP 0x680 +#define CSR_HCONTEXT 0x6a8 +#define CSR_HGEIP 0xe12 +#define CSR_VSTOPI 0xeb0 +#define CSR_SCOUNTOVF 0xda0 +#define CSR_STOPI 0xdb0 +#define CSR_UTVT 0x7 +#define CSR_UNXTI 0x45 +#define CSR_UINTSTATUS 0x46 +#define CSR_USCRATCHCSW 0x48 +#define CSR_USCRATCHCSWL 0x49 +#define CSR_STVT 0x107 +#define CSR_SNXTI 0x145 +#define CSR_SINTSTATUS 0x146 +#define CSR_SSCRATCHCSW 0x148 +#define CSR_SSCRATCHCSWL 0x149 +#define CSR_MTVT 0x307 +#define CSR_MNXTI 0x345 +#define CSR_MINTSTATUS 0x346 +#define CSR_MSCRATCHCSW 0x348 +#define CSR_MSCRATCHCSWL 0x349 +#define CSR_MSTATUS 0x300 +#define CSR_MISA 0x301 +#define CSR_MEDELEG 0x302 +#define CSR_MIDELEG 0x303 +#define CSR_MIE 0x304 +#define CSR_MTVEC 0x305 +#define CSR_MCOUNTEREN 0x306 +#define CSR_MVIEN 0x308 +#define CSR_MVIP 0x309 +#define CSR_MENVCFG 0x30a +#define CSR_MSTATEEN0 0x30c +#define CSR_MSTATEEN1 0x30d +#define CSR_MSTATEEN2 0x30e +#define CSR_MSTATEEN3 0x30f +#define CSR_MCOUNTINHIBIT 0x320 +#define CSR_MSCRATCH 0x340 +#define CSR_MEPC 0x341 +#define CSR_MCAUSE 0x342 +#define CSR_MTVAL 0x343 +#define CSR_MIP 0x344 +#define CSR_MTINST 0x34a +#define CSR_MTVAL2 0x34b +#define CSR_MISELECT 0x350 +#define CSR_MIREG 0x351 +#define CSR_MTOPEI 0x35c +#define CSR_PMPCFG0 0x3a0 +#define CSR_PMPCFG1 0x3a1 +#define CSR_PMPCFG2 0x3a2 +#define CSR_PMPCFG3 0x3a3 +#define CSR_PMPCFG4 0x3a4 +#define CSR_PMPCFG5 0x3a5 +#define CSR_PMPCFG6 0x3a6 +#define CSR_PMPCFG7 0x3a7 +#define CSR_PMPCFG8 0x3a8 +#define CSR_PMPCFG9 0x3a9 +#define CSR_PMPCFG10 0x3aa +#define CSR_PMPCFG11 0x3ab +#define CSR_PMPCFG12 0x3ac +#define CSR_PMPCFG13 0x3ad +#define CSR_PMPCFG14 0x3ae +#define CSR_PMPCFG15 0x3af +#define CSR_PMPADDR0 0x3b0 +#define CSR_PMPADDR1 0x3b1 +#define CSR_PMPADDR2 0x3b2 +#define CSR_PMPADDR3 0x3b3 +#define CSR_PMPADDR4 0x3b4 +#define CSR_PMPADDR5 0x3b5 +#define CSR_PMPADDR6 0x3b6 +#define CSR_PMPADDR7 0x3b7 +#define CSR_PMPADDR8 0x3b8 +#define CSR_PMPADDR9 0x3b9 +#define CSR_PMPADDR10 0x3ba +#define CSR_PMPADDR11 0x3bb +#define CSR_PMPADDR12 0x3bc +#define CSR_PMPADDR13 0x3bd +#define CSR_PMPADDR14 0x3be +#define CSR_PMPADDR15 0x3bf +#define CSR_PMPADDR16 0x3c0 +#define CSR_PMPADDR17 0x3c1 +#define CSR_PMPADDR18 0x3c2 +#define CSR_PMPADDR19 0x3c3 +#define CSR_PMPADDR20 0x3c4 +#define CSR_PMPADDR21 0x3c5 +#define CSR_PMPADDR22 0x3c6 +#define CSR_PMPADDR23 0x3c7 +#define CSR_PMPADDR24 0x3c8 +#define CSR_PMPADDR25 0x3c9 +#define CSR_PMPADDR26 0x3ca +#define CSR_PMPADDR27 0x3cb +#define CSR_PMPADDR28 0x3cc +#define CSR_PMPADDR29 0x3cd +#define CSR_PMPADDR30 0x3ce +#define CSR_PMPADDR31 0x3cf +#define CSR_PMPADDR32 0x3d0 +#define CSR_PMPADDR33 0x3d1 +#define CSR_PMPADDR34 0x3d2 +#define CSR_PMPADDR35 0x3d3 +#define CSR_PMPADDR36 0x3d4 +#define CSR_PMPADDR37 0x3d5 +#define CSR_PMPADDR38 0x3d6 +#define CSR_PMPADDR39 0x3d7 +#define CSR_PMPADDR40 0x3d8 +#define CSR_PMPADDR41 0x3d9 +#define CSR_PMPADDR42 0x3da +#define CSR_PMPADDR43 0x3db +#define CSR_PMPADDR44 0x3dc +#define CSR_PMPADDR45 0x3dd +#define CSR_PMPADDR46 0x3de +#define CSR_PMPADDR47 0x3df +#define CSR_PMPADDR48 0x3e0 +#define CSR_PMPADDR49 0x3e1 +#define CSR_PMPADDR50 0x3e2 +#define CSR_PMPADDR51 0x3e3 +#define CSR_PMPADDR52 0x3e4 +#define CSR_PMPADDR53 0x3e5 +#define CSR_PMPADDR54 0x3e6 +#define CSR_PMPADDR55 0x3e7 +#define CSR_PMPADDR56 0x3e8 +#define CSR_PMPADDR57 0x3e9 +#define CSR_PMPADDR58 0x3ea +#define CSR_PMPADDR59 0x3eb +#define CSR_PMPADDR60 0x3ec +#define CSR_PMPADDR61 0x3ed +#define CSR_PMPADDR62 0x3ee +#define CSR_PMPADDR63 0x3ef +#define CSR_MSECCFG 0x747 +#define CSR_TSELECT 0x7a0 +#define CSR_TDATA1 0x7a1 +#define CSR_TDATA2 0x7a2 +#define CSR_TDATA3 0x7a3 +#define CSR_TINFO 0x7a4 +#define CSR_TCONTROL 0x7a5 +#define CSR_MCONTEXT 0x7a8 +#define CSR_MSCONTEXT 0x7aa +#define CSR_DCSR 0x7b0 +#define CSR_DPC 0x7b1 +#define CSR_DSCRATCH0 0x7b2 +#define CSR_DSCRATCH1 0x7b3 +#define CSR_MCYCLE 0xb00 +#define CSR_MINSTRET 0xb02 +#define CSR_MHPMCOUNTER3 0xb03 +#define CSR_MHPMCOUNTER4 0xb04 +#define CSR_MHPMCOUNTER5 0xb05 +#define CSR_MHPMCOUNTER6 0xb06 +#define CSR_MHPMCOUNTER7 0xb07 +#define CSR_MHPMCOUNTER8 0xb08 +#define CSR_MHPMCOUNTER9 0xb09 +#define CSR_MHPMCOUNTER10 0xb0a +#define CSR_MHPMCOUNTER11 0xb0b +#define CSR_MHPMCOUNTER12 0xb0c +#define CSR_MHPMCOUNTER13 0xb0d +#define CSR_MHPMCOUNTER14 0xb0e +#define CSR_MHPMCOUNTER15 0xb0f +#define CSR_MHPMCOUNTER16 0xb10 +#define CSR_MHPMCOUNTER17 0xb11 +#define CSR_MHPMCOUNTER18 0xb12 +#define CSR_MHPMCOUNTER19 0xb13 +#define CSR_MHPMCOUNTER20 0xb14 +#define CSR_MHPMCOUNTER21 0xb15 +#define CSR_MHPMCOUNTER22 0xb16 +#define CSR_MHPMCOUNTER23 0xb17 +#define CSR_MHPMCOUNTER24 0xb18 +#define CSR_MHPMCOUNTER25 0xb19 +#define CSR_MHPMCOUNTER26 0xb1a +#define CSR_MHPMCOUNTER27 0xb1b +#define CSR_MHPMCOUNTER28 0xb1c +#define CSR_MHPMCOUNTER29 0xb1d +#define CSR_MHPMCOUNTER30 0xb1e +#define CSR_MHPMCOUNTER31 0xb1f +#define CSR_MHPMEVENT3 0x323 +#define CSR_MHPMEVENT4 0x324 +#define CSR_MHPMEVENT5 0x325 +#define CSR_MHPMEVENT6 0x326 +#define CSR_MHPMEVENT7 0x327 +#define CSR_MHPMEVENT8 0x328 +#define CSR_MHPMEVENT9 0x329 +#define CSR_MHPMEVENT10 0x32a +#define CSR_MHPMEVENT11 0x32b +#define CSR_MHPMEVENT12 0x32c +#define CSR_MHPMEVENT13 0x32d +#define CSR_MHPMEVENT14 0x32e +#define CSR_MHPMEVENT15 0x32f +#define CSR_MHPMEVENT16 0x330 +#define CSR_MHPMEVENT17 0x331 +#define CSR_MHPMEVENT18 0x332 +#define CSR_MHPMEVENT19 0x333 +#define CSR_MHPMEVENT20 0x334 +#define CSR_MHPMEVENT21 0x335 +#define CSR_MHPMEVENT22 0x336 +#define CSR_MHPMEVENT23 0x337 +#define CSR_MHPMEVENT24 0x338 +#define CSR_MHPMEVENT25 0x339 +#define CSR_MHPMEVENT26 0x33a +#define CSR_MHPMEVENT27 0x33b +#define CSR_MHPMEVENT28 0x33c +#define CSR_MHPMEVENT29 0x33d +#define CSR_MHPMEVENT30 0x33e +#define CSR_MHPMEVENT31 0x33f +#define CSR_MVENDORID 0xf11 +#define CSR_MARCHID 0xf12 +#define CSR_MIMPID 0xf13 +#define CSR_MHARTID 0xf14 +#define CSR_MCONFIGPTR 0xf15 +#define CSR_MTOPI 0xfb0 +#define CSR_SIEH 0x114 +#define CSR_SIPH 0x154 +#define CSR_STIMECMPH 0x15d +#define CSR_VSIEH 0x214 +#define CSR_VSIPH 0x254 +#define CSR_VSTIMECMPH 0x25d +#define CSR_HTIMEDELTAH 0x615 +#define CSR_HIDELEGH 0x613 +#define CSR_HVIENH 0x618 +#define CSR_HENVCFGH 0x61a +#define CSR_HVIPH 0x655 +#define CSR_HVIPRIO1H 0x656 +#define CSR_HVIPRIO2H 0x657 +#define CSR_HSTATEEN0H 0x61c +#define CSR_HSTATEEN1H 0x61d +#define CSR_HSTATEEN2H 0x61e +#define CSR_HSTATEEN3H 0x61f +#define CSR_CYCLEH 0xc80 +#define CSR_TIMEH 0xc81 +#define CSR_INSTRETH 0xc82 +#define CSR_HPMCOUNTER3H 0xc83 +#define CSR_HPMCOUNTER4H 0xc84 +#define CSR_HPMCOUNTER5H 0xc85 +#define CSR_HPMCOUNTER6H 0xc86 +#define CSR_HPMCOUNTER7H 0xc87 +#define CSR_HPMCOUNTER8H 0xc88 +#define CSR_HPMCOUNTER9H 0xc89 +#define CSR_HPMCOUNTER10H 0xc8a +#define CSR_HPMCOUNTER11H 0xc8b +#define CSR_HPMCOUNTER12H 0xc8c +#define CSR_HPMCOUNTER13H 0xc8d +#define CSR_HPMCOUNTER14H 0xc8e +#define CSR_HPMCOUNTER15H 0xc8f +#define CSR_HPMCOUNTER16H 0xc90 +#define CSR_HPMCOUNTER17H 0xc91 +#define CSR_HPMCOUNTER18H 0xc92 +#define CSR_HPMCOUNTER19H 0xc93 +#define CSR_HPMCOUNTER20H 0xc94 +#define CSR_HPMCOUNTER21H 0xc95 +#define CSR_HPMCOUNTER22H 0xc96 +#define CSR_HPMCOUNTER23H 0xc97 +#define CSR_HPMCOUNTER24H 0xc98 +#define CSR_HPMCOUNTER25H 0xc99 +#define CSR_HPMCOUNTER26H 0xc9a +#define CSR_HPMCOUNTER27H 0xc9b +#define CSR_HPMCOUNTER28H 0xc9c +#define CSR_HPMCOUNTER29H 0xc9d +#define CSR_HPMCOUNTER30H 0xc9e +#define CSR_HPMCOUNTER31H 0xc9f +#define CSR_MSTATUSH 0x310 +#define CSR_MIDELEGH 0x313 +#define CSR_MIEH 0x314 +#define CSR_MVIENH 0x318 +#define CSR_MVIPH 0x319 +#define CSR_MENVCFGH 0x31a +#define CSR_MSTATEEN0H 0x31c +#define CSR_MSTATEEN1H 0x31d +#define CSR_MSTATEEN2H 0x31e +#define CSR_MSTATEEN3H 0x31f +#define CSR_MIPH 0x354 +#define CSR_MHPMEVENT3H 0x723 +#define CSR_MHPMEVENT4H 0x724 +#define CSR_MHPMEVENT5H 0x725 +#define CSR_MHPMEVENT6H 0x726 +#define CSR_MHPMEVENT7H 0x727 +#define CSR_MHPMEVENT8H 0x728 +#define CSR_MHPMEVENT9H 0x729 +#define CSR_MHPMEVENT10H 0x72a +#define CSR_MHPMEVENT11H 0x72b +#define CSR_MHPMEVENT12H 0x72c +#define CSR_MHPMEVENT13H 0x72d +#define CSR_MHPMEVENT14H 0x72e +#define CSR_MHPMEVENT15H 0x72f +#define CSR_MHPMEVENT16H 0x730 +#define CSR_MHPMEVENT17H 0x731 +#define CSR_MHPMEVENT18H 0x732 +#define CSR_MHPMEVENT19H 0x733 +#define CSR_MHPMEVENT20H 0x734 +#define CSR_MHPMEVENT21H 0x735 +#define CSR_MHPMEVENT22H 0x736 +#define CSR_MHPMEVENT23H 0x737 +#define CSR_MHPMEVENT24H 0x738 +#define CSR_MHPMEVENT25H 0x739 +#define CSR_MHPMEVENT26H 0x73a +#define CSR_MHPMEVENT27H 0x73b +#define CSR_MHPMEVENT28H 0x73c +#define CSR_MHPMEVENT29H 0x73d +#define CSR_MHPMEVENT30H 0x73e +#define CSR_MHPMEVENT31H 0x73f +#define CSR_MNSCRATCH 0x740 +#define CSR_MNEPC 0x741 +#define CSR_MNCAUSE 0x742 +#define CSR_MNSTATUS 0x744 +#define CSR_MSECCFGH 0x757 +#define CSR_MCYCLEH 0xb80 +#define CSR_MINSTRETH 0xb82 +#define CSR_MHPMCOUNTER3H 0xb83 +#define CSR_MHPMCOUNTER4H 0xb84 +#define CSR_MHPMCOUNTER5H 0xb85 +#define CSR_MHPMCOUNTER6H 0xb86 +#define CSR_MHPMCOUNTER7H 0xb87 +#define CSR_MHPMCOUNTER8H 0xb88 +#define CSR_MHPMCOUNTER9H 0xb89 +#define CSR_MHPMCOUNTER10H 0xb8a +#define CSR_MHPMCOUNTER11H 0xb8b +#define CSR_MHPMCOUNTER12H 0xb8c +#define CSR_MHPMCOUNTER13H 0xb8d +#define CSR_MHPMCOUNTER14H 0xb8e +#define CSR_MHPMCOUNTER15H 0xb8f +#define CSR_MHPMCOUNTER16H 0xb90 +#define CSR_MHPMCOUNTER17H 0xb91 +#define CSR_MHPMCOUNTER18H 0xb92 +#define CSR_MHPMCOUNTER19H 0xb93 +#define CSR_MHPMCOUNTER20H 0xb94 +#define CSR_MHPMCOUNTER21H 0xb95 +#define CSR_MHPMCOUNTER22H 0xb96 +#define CSR_MHPMCOUNTER23H 0xb97 +#define CSR_MHPMCOUNTER24H 0xb98 +#define CSR_MHPMCOUNTER25H 0xb99 +#define CSR_MHPMCOUNTER26H 0xb9a +#define CSR_MHPMCOUNTER27H 0xb9b +#define CSR_MHPMCOUNTER28H 0xb9c +#define CSR_MHPMCOUNTER29H 0xb9d +#define CSR_MHPMCOUNTER30H 0xb9e +#define CSR_MHPMCOUNTER31H 0xb9f + +#define CAUSE_MISALIGNED_FETCH 0x0 +#define CAUSE_FETCH_ACCESS 0x1 +#define CAUSE_ILLEGAL_INSTRUCTION 0x2 +#define CAUSE_BREAKPOINT 0x3 +#define CAUSE_MISALIGNED_LOAD 0x4 +#define CAUSE_LOAD_ACCESS 0x5 +#define CAUSE_MISALIGNED_STORE 0x6 +#define CAUSE_STORE_ACCESS 0x7 +#define CAUSE_USER_ECALL 0x8 +#define CAUSE_SUPERVISOR_ECALL 0x9 +#define CAUSE_VIRTUAL_SUPERVISOR_ECALL 0xa +#define CAUSE_MACHINE_ECALL 0xb +#define CAUSE_FETCH_PAGE_FAULT 0xc +#define CAUSE_LOAD_PAGE_FAULT 0xd +#define CAUSE_STORE_PAGE_FAULT 0xf +#define CAUSE_FETCH_GUEST_PAGE_FAULT 0x14 +#define CAUSE_LOAD_GUEST_PAGE_FAULT 0x15 +#define CAUSE_VIRTUAL_INSTRUCTION 0x16 +#define CAUSE_STORE_GUEST_PAGE_FAULT 0x17 + +#define INSN_FIELD_RD 0xf80 +#define INSN_FIELD_RT 0xf8000 +#define INSN_FIELD_RS1 0xf8000 +#define INSN_FIELD_RS2 0x1f00000 +#define INSN_FIELD_RS3 0xf8000000 +#define INSN_FIELD_AQRL 0x6000000 +#define INSN_FIELD_AQ 0x4000000 +#define INSN_FIELD_RL 0x2000000 +#define INSN_FIELD_FM 0xf0000000 +#define INSN_FIELD_PRED 0xf000000 +#define INSN_FIELD_SUCC 0xf00000 +#define INSN_FIELD_RM 0x7000 +#define INSN_FIELD_FUNCT3 0x7000 +#define INSN_FIELD_FUNCT2 0x6000000 +#define INSN_FIELD_IMM20 0xfffff000 +#define INSN_FIELD_JIMM20 0xfffff000 +#define INSN_FIELD_IMM12 0xfff00000 +#define INSN_FIELD_CSR 0xfff00000 +#define INSN_FIELD_IMM12HI 0xfe000000 +#define INSN_FIELD_BIMM12HI 0xfe000000 +#define INSN_FIELD_IMM12LO 0xf80 +#define INSN_FIELD_BIMM12LO 0xf80 +#define INSN_FIELD_ZIMM 0xf8000 +#define INSN_FIELD_SHAMTQ 0x7f00000 +#define INSN_FIELD_SHAMTW 0x1f00000 +#define INSN_FIELD_SHAMTW4 0xf00000 +#define INSN_FIELD_SHAMTD 0x3f00000 +#define INSN_FIELD_BS 0xc0000000 +#define INSN_FIELD_RNUM 0xf00000 +#define INSN_FIELD_RC 0x3e000000 +#define INSN_FIELD_IMM2 0x300000 +#define INSN_FIELD_IMM3 0x700000 +#define INSN_FIELD_IMM4 0xf00000 +#define INSN_FIELD_IMM5 0x1f00000 +#define INSN_FIELD_IMM6 0x3f00000 +#define INSN_FIELD_OPCODE 0x7f +#define INSN_FIELD_FUNCT7 0xfe000000 +#define INSN_FIELD_VD 0xf80 +#define INSN_FIELD_VS3 0xf80 +#define INSN_FIELD_VS1 0xf8000 +#define INSN_FIELD_VS2 0x1f00000 +#define INSN_FIELD_VM 0x2000000 +#define INSN_FIELD_WD 0x4000000 +#define INSN_FIELD_AMOOP 0xf8000000 +#define INSN_FIELD_NF 0xe0000000 +#define INSN_FIELD_SIMM5 0xf8000 +#define INSN_FIELD_ZIMM10 0x3ff00000 +#define INSN_FIELD_ZIMM11 0x7ff00000 +#define INSN_FIELD_C_NZUIMM10 0x1fe0 +#define INSN_FIELD_C_UIMM7LO 0x60 +#define INSN_FIELD_C_UIMM7HI 0x1c00 +#define INSN_FIELD_C_UIMM8LO 0x60 +#define INSN_FIELD_C_UIMM8HI 0x1c00 +#define INSN_FIELD_C_UIMM9LO 0x60 +#define INSN_FIELD_C_UIMM9HI 0x1c00 +#define INSN_FIELD_C_NZIMM6LO 0x7c +#define INSN_FIELD_C_NZIMM6HI 0x1000 +#define INSN_FIELD_C_IMM6LO 0x7c +#define INSN_FIELD_C_IMM6HI 0x1000 +#define INSN_FIELD_C_NZIMM10HI 0x1000 +#define INSN_FIELD_C_NZIMM10LO 0x7c +#define INSN_FIELD_C_NZIMM18HI 0x1000 +#define INSN_FIELD_C_NZIMM18LO 0x7c +#define INSN_FIELD_C_IMM12 0x1ffc +#define INSN_FIELD_C_BIMM9LO 0x7c +#define INSN_FIELD_C_BIMM9HI 0x1c00 +#define INSN_FIELD_C_NZUIMM5 0x7c +#define INSN_FIELD_C_NZUIMM6LO 0x7c +#define INSN_FIELD_C_NZUIMM6HI 0x1000 +#define INSN_FIELD_C_UIMM8SPLO 0x7c +#define INSN_FIELD_C_UIMM8SPHI 0x1000 +#define INSN_FIELD_C_UIMM8SP_S 0x1f80 +#define INSN_FIELD_C_UIMM10SPLO 0x7c +#define INSN_FIELD_C_UIMM10SPHI 0x1000 +#define INSN_FIELD_C_UIMM9SPLO 0x7c +#define INSN_FIELD_C_UIMM9SPHI 0x1000 +#define INSN_FIELD_C_UIMM10SP_S 0x1f80 +#define INSN_FIELD_C_UIMM9SP_S 0x1f80 +#define INSN_FIELD_C_UIMM2 0x60 +#define INSN_FIELD_C_UIMM1 0x20 +#define INSN_FIELD_C_RLIST 0xf0 +#define INSN_FIELD_C_SPIMM 0xc +#define INSN_FIELD_C_INDEX 0x3fc +#define INSN_FIELD_RS1_P 0x380 +#define INSN_FIELD_RS2_P 0x1c +#define INSN_FIELD_RD_P 0x1c +#define INSN_FIELD_RD_RS1_N0 0xf80 +#define INSN_FIELD_RD_RS1_P 0x380 +#define INSN_FIELD_RD_RS1 0xf80 +#define INSN_FIELD_RD_N2 0xf80 +#define INSN_FIELD_RD_N0 0xf80 +#define INSN_FIELD_RS1_N0 0xf80 +#define INSN_FIELD_C_RS2_N0 0x7c +#define INSN_FIELD_C_RS1_N0 0xf80 +#define INSN_FIELD_C_RS2 0x7c +#define INSN_FIELD_C_SREG1 0x380 +#define INSN_FIELD_C_SREG2 0x1c +#endif +#ifdef DECLARE_INSN +DECLARE_INSN(add, MATCH_ADD, MASK_ADD) +DECLARE_INSN(add16, MATCH_ADD16, MASK_ADD16) +DECLARE_INSN(add32, MATCH_ADD32, MASK_ADD32) +DECLARE_INSN(add64, MATCH_ADD64, MASK_ADD64) +DECLARE_INSN(add8, MATCH_ADD8, MASK_ADD8) +DECLARE_INSN(add_uw, MATCH_ADD_UW, MASK_ADD_UW) +DECLARE_INSN(addi, MATCH_ADDI, MASK_ADDI) +DECLARE_INSN(addiw, MATCH_ADDIW, MASK_ADDIW) +DECLARE_INSN(addw, MATCH_ADDW, MASK_ADDW) +DECLARE_INSN(aes32dsi, MATCH_AES32DSI, MASK_AES32DSI) +DECLARE_INSN(aes32dsmi, MATCH_AES32DSMI, MASK_AES32DSMI) +DECLARE_INSN(aes32esi, MATCH_AES32ESI, MASK_AES32ESI) +DECLARE_INSN(aes32esmi, MATCH_AES32ESMI, MASK_AES32ESMI) +DECLARE_INSN(aes64ds, MATCH_AES64DS, MASK_AES64DS) +DECLARE_INSN(aes64dsm, MATCH_AES64DSM, MASK_AES64DSM) +DECLARE_INSN(aes64es, MATCH_AES64ES, MASK_AES64ES) +DECLARE_INSN(aes64esm, MATCH_AES64ESM, MASK_AES64ESM) +DECLARE_INSN(aes64im, MATCH_AES64IM, MASK_AES64IM) +DECLARE_INSN(aes64ks1i, MATCH_AES64KS1I, MASK_AES64KS1I) +DECLARE_INSN(aes64ks2, MATCH_AES64KS2, MASK_AES64KS2) +DECLARE_INSN(amoadd_d, MATCH_AMOADD_D, MASK_AMOADD_D) +DECLARE_INSN(amoadd_w, MATCH_AMOADD_W, MASK_AMOADD_W) +DECLARE_INSN(amoand_d, MATCH_AMOAND_D, MASK_AMOAND_D) +DECLARE_INSN(amoand_w, MATCH_AMOAND_W, MASK_AMOAND_W) +DECLARE_INSN(amomax_d, MATCH_AMOMAX_D, MASK_AMOMAX_D) +DECLARE_INSN(amomax_w, MATCH_AMOMAX_W, MASK_AMOMAX_W) +DECLARE_INSN(amomaxu_d, MATCH_AMOMAXU_D, MASK_AMOMAXU_D) +DECLARE_INSN(amomaxu_w, MATCH_AMOMAXU_W, MASK_AMOMAXU_W) +DECLARE_INSN(amomin_d, MATCH_AMOMIN_D, MASK_AMOMIN_D) +DECLARE_INSN(amomin_w, MATCH_AMOMIN_W, MASK_AMOMIN_W) +DECLARE_INSN(amominu_d, MATCH_AMOMINU_D, MASK_AMOMINU_D) +DECLARE_INSN(amominu_w, MATCH_AMOMINU_W, MASK_AMOMINU_W) +DECLARE_INSN(amoor_d, MATCH_AMOOR_D, MASK_AMOOR_D) +DECLARE_INSN(amoor_w, MATCH_AMOOR_W, MASK_AMOOR_W) +DECLARE_INSN(amoswap_d, MATCH_AMOSWAP_D, MASK_AMOSWAP_D) +DECLARE_INSN(amoswap_w, MATCH_AMOSWAP_W, MASK_AMOSWAP_W) +DECLARE_INSN(amoxor_d, MATCH_AMOXOR_D, MASK_AMOXOR_D) +DECLARE_INSN(amoxor_w, MATCH_AMOXOR_W, MASK_AMOXOR_W) +DECLARE_INSN(and, MATCH_AND, MASK_AND) +DECLARE_INSN(andi, MATCH_ANDI, MASK_ANDI) +DECLARE_INSN(andn, MATCH_ANDN, MASK_ANDN) +DECLARE_INSN(auipc, MATCH_AUIPC, MASK_AUIPC) +DECLARE_INSN(ave, MATCH_AVE, MASK_AVE) +DECLARE_INSN(bclr, MATCH_BCLR, MASK_BCLR) +DECLARE_INSN(bclri, MATCH_BCLRI, MASK_BCLRI) +DECLARE_INSN(bcompress, MATCH_BCOMPRESS, MASK_BCOMPRESS) +DECLARE_INSN(bcompressw, MATCH_BCOMPRESSW, MASK_BCOMPRESSW) +DECLARE_INSN(bdecompress, MATCH_BDECOMPRESS, MASK_BDECOMPRESS) +DECLARE_INSN(bdecompressw, MATCH_BDECOMPRESSW, MASK_BDECOMPRESSW) +DECLARE_INSN(beq, MATCH_BEQ, MASK_BEQ) +DECLARE_INSN(bext, MATCH_BEXT, MASK_BEXT) +DECLARE_INSN(bexti, MATCH_BEXTI, MASK_BEXTI) +DECLARE_INSN(bfp, MATCH_BFP, MASK_BFP) +DECLARE_INSN(bfpw, MATCH_BFPW, MASK_BFPW) +DECLARE_INSN(bge, MATCH_BGE, MASK_BGE) +DECLARE_INSN(bgeu, MATCH_BGEU, MASK_BGEU) +DECLARE_INSN(binv, MATCH_BINV, MASK_BINV) +DECLARE_INSN(binvi, MATCH_BINVI, MASK_BINVI) +DECLARE_INSN(blt, MATCH_BLT, MASK_BLT) +DECLARE_INSN(bltu, MATCH_BLTU, MASK_BLTU) +DECLARE_INSN(bmatflip, MATCH_BMATFLIP, MASK_BMATFLIP) +DECLARE_INSN(bmator, MATCH_BMATOR, MASK_BMATOR) +DECLARE_INSN(bmatxor, MATCH_BMATXOR, MASK_BMATXOR) +DECLARE_INSN(bne, MATCH_BNE, MASK_BNE) +DECLARE_INSN(bset, MATCH_BSET, MASK_BSET) +DECLARE_INSN(bseti, MATCH_BSETI, MASK_BSETI) +DECLARE_INSN(c_add, MATCH_C_ADD, MASK_C_ADD) +DECLARE_INSN(c_addi, MATCH_C_ADDI, MASK_C_ADDI) +DECLARE_INSN(c_addi16sp, MATCH_C_ADDI16SP, MASK_C_ADDI16SP) +DECLARE_INSN(c_addi4spn, MATCH_C_ADDI4SPN, MASK_C_ADDI4SPN) +DECLARE_INSN(c_addiw, MATCH_C_ADDIW, MASK_C_ADDIW) +DECLARE_INSN(c_addw, MATCH_C_ADDW, MASK_C_ADDW) +DECLARE_INSN(c_and, MATCH_C_AND, MASK_C_AND) +DECLARE_INSN(c_andi, MATCH_C_ANDI, MASK_C_ANDI) +DECLARE_INSN(c_beqz, MATCH_C_BEQZ, MASK_C_BEQZ) +DECLARE_INSN(c_bnez, MATCH_C_BNEZ, MASK_C_BNEZ) +DECLARE_INSN(c_ebreak, MATCH_C_EBREAK, MASK_C_EBREAK) +DECLARE_INSN(c_fld, MATCH_C_FLD, MASK_C_FLD) +DECLARE_INSN(c_fldsp, MATCH_C_FLDSP, MASK_C_FLDSP) +DECLARE_INSN(c_flw, MATCH_C_FLW, MASK_C_FLW) +DECLARE_INSN(c_flwsp, MATCH_C_FLWSP, MASK_C_FLWSP) +DECLARE_INSN(c_fsd, MATCH_C_FSD, MASK_C_FSD) +DECLARE_INSN(c_fsdsp, MATCH_C_FSDSP, MASK_C_FSDSP) +DECLARE_INSN(c_fsw, MATCH_C_FSW, MASK_C_FSW) +DECLARE_INSN(c_fswsp, MATCH_C_FSWSP, MASK_C_FSWSP) +DECLARE_INSN(c_j, MATCH_C_J, MASK_C_J) +DECLARE_INSN(c_jal, MATCH_C_JAL, MASK_C_JAL) +DECLARE_INSN(c_jalr, MATCH_C_JALR, MASK_C_JALR) +DECLARE_INSN(c_jr, MATCH_C_JR, MASK_C_JR) +DECLARE_INSN(c_lbu, MATCH_C_LBU, MASK_C_LBU) +DECLARE_INSN(c_ld, MATCH_C_LD, MASK_C_LD) +DECLARE_INSN(c_ldsp, MATCH_C_LDSP, MASK_C_LDSP) +DECLARE_INSN(c_lh, MATCH_C_LH, MASK_C_LH) +DECLARE_INSN(c_lhu, MATCH_C_LHU, MASK_C_LHU) +DECLARE_INSN(c_li, MATCH_C_LI, MASK_C_LI) +DECLARE_INSN(c_lui, MATCH_C_LUI, MASK_C_LUI) +DECLARE_INSN(c_lw, MATCH_C_LW, MASK_C_LW) +DECLARE_INSN(c_lwsp, MATCH_C_LWSP, MASK_C_LWSP) +DECLARE_INSN(c_mul, MATCH_C_MUL, MASK_C_MUL) +DECLARE_INSN(c_mv, MATCH_C_MV, MASK_C_MV) +DECLARE_INSN(c_nop, MATCH_C_NOP, MASK_C_NOP) +DECLARE_INSN(c_not, MATCH_C_NOT, MASK_C_NOT) +DECLARE_INSN(c_or, MATCH_C_OR, MASK_C_OR) +DECLARE_INSN(c_sb, MATCH_C_SB, MASK_C_SB) +DECLARE_INSN(c_sd, MATCH_C_SD, MASK_C_SD) +DECLARE_INSN(c_sdsp, MATCH_C_SDSP, MASK_C_SDSP) +DECLARE_INSN(c_sext_b, MATCH_C_SEXT_B, MASK_C_SEXT_B) +DECLARE_INSN(c_sext_h, MATCH_C_SEXT_H, MASK_C_SEXT_H) +DECLARE_INSN(c_sh, MATCH_C_SH, MASK_C_SH) +DECLARE_INSN(c_slli, MATCH_C_SLLI, MASK_C_SLLI) +DECLARE_INSN(c_srai, MATCH_C_SRAI, MASK_C_SRAI) +DECLARE_INSN(c_srli, MATCH_C_SRLI, MASK_C_SRLI) +DECLARE_INSN(c_sub, MATCH_C_SUB, MASK_C_SUB) +DECLARE_INSN(c_subw, MATCH_C_SUBW, MASK_C_SUBW) +DECLARE_INSN(c_sw, MATCH_C_SW, MASK_C_SW) +DECLARE_INSN(c_swsp, MATCH_C_SWSP, MASK_C_SWSP) +DECLARE_INSN(c_xor, MATCH_C_XOR, MASK_C_XOR) +DECLARE_INSN(c_zext_b, MATCH_C_ZEXT_B, MASK_C_ZEXT_B) +DECLARE_INSN(c_zext_h, MATCH_C_ZEXT_H, MASK_C_ZEXT_H) +DECLARE_INSN(c_zext_w, MATCH_C_ZEXT_W, MASK_C_ZEXT_W) +DECLARE_INSN(cbo_clean, MATCH_CBO_CLEAN, MASK_CBO_CLEAN) +DECLARE_INSN(cbo_flush, MATCH_CBO_FLUSH, MASK_CBO_FLUSH) +DECLARE_INSN(cbo_inval, MATCH_CBO_INVAL, MASK_CBO_INVAL) +DECLARE_INSN(cbo_zero, MATCH_CBO_ZERO, MASK_CBO_ZERO) +DECLARE_INSN(clmul, MATCH_CLMUL, MASK_CLMUL) +DECLARE_INSN(clmulh, MATCH_CLMULH, MASK_CLMULH) +DECLARE_INSN(clmulr, MATCH_CLMULR, MASK_CLMULR) +DECLARE_INSN(clrs16, MATCH_CLRS16, MASK_CLRS16) +DECLARE_INSN(clrs32, MATCH_CLRS32, MASK_CLRS32) +DECLARE_INSN(clrs8, MATCH_CLRS8, MASK_CLRS8) +DECLARE_INSN(clz, MATCH_CLZ, MASK_CLZ) +DECLARE_INSN(clz16, MATCH_CLZ16, MASK_CLZ16) +DECLARE_INSN(clz32, MATCH_CLZ32, MASK_CLZ32) +DECLARE_INSN(clz8, MATCH_CLZ8, MASK_CLZ8) +DECLARE_INSN(clzw, MATCH_CLZW, MASK_CLZW) +DECLARE_INSN(cm_jalt, MATCH_CM_JALT, MASK_CM_JALT) +DECLARE_INSN(cm_mva01s, MATCH_CM_MVA01S, MASK_CM_MVA01S) +DECLARE_INSN(cm_mvsa01, MATCH_CM_MVSA01, MASK_CM_MVSA01) +DECLARE_INSN(cm_pop, MATCH_CM_POP, MASK_CM_POP) +DECLARE_INSN(cm_popret, MATCH_CM_POPRET, MASK_CM_POPRET) +DECLARE_INSN(cm_popretz, MATCH_CM_POPRETZ, MASK_CM_POPRETZ) +DECLARE_INSN(cm_push, MATCH_CM_PUSH, MASK_CM_PUSH) +DECLARE_INSN(cmix, MATCH_CMIX, MASK_CMIX) +DECLARE_INSN(cmov, MATCH_CMOV, MASK_CMOV) +DECLARE_INSN(cmpeq16, MATCH_CMPEQ16, MASK_CMPEQ16) +DECLARE_INSN(cmpeq8, MATCH_CMPEQ8, MASK_CMPEQ8) +DECLARE_INSN(cpop, MATCH_CPOP, MASK_CPOP) +DECLARE_INSN(cpopw, MATCH_CPOPW, MASK_CPOPW) +DECLARE_INSN(cras16, MATCH_CRAS16, MASK_CRAS16) +DECLARE_INSN(cras32, MATCH_CRAS32, MASK_CRAS32) +DECLARE_INSN(crc32_b, MATCH_CRC32_B, MASK_CRC32_B) +DECLARE_INSN(crc32_d, MATCH_CRC32_D, MASK_CRC32_D) +DECLARE_INSN(crc32_h, MATCH_CRC32_H, MASK_CRC32_H) +DECLARE_INSN(crc32_w, MATCH_CRC32_W, MASK_CRC32_W) +DECLARE_INSN(crc32c_b, MATCH_CRC32C_B, MASK_CRC32C_B) +DECLARE_INSN(crc32c_d, MATCH_CRC32C_D, MASK_CRC32C_D) +DECLARE_INSN(crc32c_h, MATCH_CRC32C_H, MASK_CRC32C_H) +DECLARE_INSN(crc32c_w, MATCH_CRC32C_W, MASK_CRC32C_W) +DECLARE_INSN(crsa16, MATCH_CRSA16, MASK_CRSA16) +DECLARE_INSN(crsa32, MATCH_CRSA32, MASK_CRSA32) +DECLARE_INSN(csrrc, MATCH_CSRRC, MASK_CSRRC) +DECLARE_INSN(csrrci, MATCH_CSRRCI, MASK_CSRRCI) +DECLARE_INSN(csrrs, MATCH_CSRRS, MASK_CSRRS) +DECLARE_INSN(csrrsi, MATCH_CSRRSI, MASK_CSRRSI) +DECLARE_INSN(csrrw, MATCH_CSRRW, MASK_CSRRW) +DECLARE_INSN(csrrwi, MATCH_CSRRWI, MASK_CSRRWI) +DECLARE_INSN(ctz, MATCH_CTZ, MASK_CTZ) +DECLARE_INSN(ctzw, MATCH_CTZW, MASK_CTZW) +DECLARE_INSN(czero_eqz, MATCH_CZERO_EQZ, MASK_CZERO_EQZ) +DECLARE_INSN(czero_nez, MATCH_CZERO_NEZ, MASK_CZERO_NEZ) +DECLARE_INSN(div, MATCH_DIV, MASK_DIV) +DECLARE_INSN(divu, MATCH_DIVU, MASK_DIVU) +DECLARE_INSN(divuw, MATCH_DIVUW, MASK_DIVUW) +DECLARE_INSN(divw, MATCH_DIVW, MASK_DIVW) +DECLARE_INSN(dret, MATCH_DRET, MASK_DRET) +DECLARE_INSN(ebreak, MATCH_EBREAK, MASK_EBREAK) +DECLARE_INSN(ecall, MATCH_ECALL, MASK_ECALL) +DECLARE_INSN(fadd_d, MATCH_FADD_D, MASK_FADD_D) +DECLARE_INSN(fadd_h, MATCH_FADD_H, MASK_FADD_H) +DECLARE_INSN(fadd_q, MATCH_FADD_Q, MASK_FADD_Q) +DECLARE_INSN(fadd_s, MATCH_FADD_S, MASK_FADD_S) +DECLARE_INSN(fclass_d, MATCH_FCLASS_D, MASK_FCLASS_D) +DECLARE_INSN(fclass_h, MATCH_FCLASS_H, MASK_FCLASS_H) +DECLARE_INSN(fclass_q, MATCH_FCLASS_Q, MASK_FCLASS_Q) +DECLARE_INSN(fclass_s, MATCH_FCLASS_S, MASK_FCLASS_S) +DECLARE_INSN(fcvt_d_h, MATCH_FCVT_D_H, MASK_FCVT_D_H) +DECLARE_INSN(fcvt_d_l, MATCH_FCVT_D_L, MASK_FCVT_D_L) +DECLARE_INSN(fcvt_d_lu, MATCH_FCVT_D_LU, MASK_FCVT_D_LU) +DECLARE_INSN(fcvt_d_q, MATCH_FCVT_D_Q, MASK_FCVT_D_Q) +DECLARE_INSN(fcvt_d_s, MATCH_FCVT_D_S, MASK_FCVT_D_S) +DECLARE_INSN(fcvt_d_w, MATCH_FCVT_D_W, MASK_FCVT_D_W) +DECLARE_INSN(fcvt_d_wu, MATCH_FCVT_D_WU, MASK_FCVT_D_WU) +DECLARE_INSN(fcvt_h_d, MATCH_FCVT_H_D, MASK_FCVT_H_D) +DECLARE_INSN(fcvt_h_l, MATCH_FCVT_H_L, MASK_FCVT_H_L) +DECLARE_INSN(fcvt_h_lu, MATCH_FCVT_H_LU, MASK_FCVT_H_LU) +DECLARE_INSN(fcvt_h_q, MATCH_FCVT_H_Q, MASK_FCVT_H_Q) +DECLARE_INSN(fcvt_h_s, MATCH_FCVT_H_S, MASK_FCVT_H_S) +DECLARE_INSN(fcvt_h_w, MATCH_FCVT_H_W, MASK_FCVT_H_W) +DECLARE_INSN(fcvt_h_wu, MATCH_FCVT_H_WU, MASK_FCVT_H_WU) +DECLARE_INSN(fcvt_l_d, MATCH_FCVT_L_D, MASK_FCVT_L_D) +DECLARE_INSN(fcvt_l_h, MATCH_FCVT_L_H, MASK_FCVT_L_H) +DECLARE_INSN(fcvt_l_q, MATCH_FCVT_L_Q, MASK_FCVT_L_Q) +DECLARE_INSN(fcvt_l_s, MATCH_FCVT_L_S, MASK_FCVT_L_S) +DECLARE_INSN(fcvt_lu_d, MATCH_FCVT_LU_D, MASK_FCVT_LU_D) +DECLARE_INSN(fcvt_lu_h, MATCH_FCVT_LU_H, MASK_FCVT_LU_H) +DECLARE_INSN(fcvt_lu_q, MATCH_FCVT_LU_Q, MASK_FCVT_LU_Q) +DECLARE_INSN(fcvt_lu_s, MATCH_FCVT_LU_S, MASK_FCVT_LU_S) +DECLARE_INSN(fcvt_q_d, MATCH_FCVT_Q_D, MASK_FCVT_Q_D) +DECLARE_INSN(fcvt_q_h, MATCH_FCVT_Q_H, MASK_FCVT_Q_H) +DECLARE_INSN(fcvt_q_l, MATCH_FCVT_Q_L, MASK_FCVT_Q_L) +DECLARE_INSN(fcvt_q_lu, MATCH_FCVT_Q_LU, MASK_FCVT_Q_LU) +DECLARE_INSN(fcvt_q_s, MATCH_FCVT_Q_S, MASK_FCVT_Q_S) +DECLARE_INSN(fcvt_q_w, MATCH_FCVT_Q_W, MASK_FCVT_Q_W) +DECLARE_INSN(fcvt_q_wu, MATCH_FCVT_Q_WU, MASK_FCVT_Q_WU) +DECLARE_INSN(fcvt_s_d, MATCH_FCVT_S_D, MASK_FCVT_S_D) +DECLARE_INSN(fcvt_s_h, MATCH_FCVT_S_H, MASK_FCVT_S_H) +DECLARE_INSN(fcvt_s_l, MATCH_FCVT_S_L, MASK_FCVT_S_L) +DECLARE_INSN(fcvt_s_lu, MATCH_FCVT_S_LU, MASK_FCVT_S_LU) +DECLARE_INSN(fcvt_s_q, MATCH_FCVT_S_Q, MASK_FCVT_S_Q) +DECLARE_INSN(fcvt_s_w, MATCH_FCVT_S_W, MASK_FCVT_S_W) +DECLARE_INSN(fcvt_s_wu, MATCH_FCVT_S_WU, MASK_FCVT_S_WU) +DECLARE_INSN(fcvt_w_d, MATCH_FCVT_W_D, MASK_FCVT_W_D) +DECLARE_INSN(fcvt_w_h, MATCH_FCVT_W_H, MASK_FCVT_W_H) +DECLARE_INSN(fcvt_w_q, MATCH_FCVT_W_Q, MASK_FCVT_W_Q) +DECLARE_INSN(fcvt_w_s, MATCH_FCVT_W_S, MASK_FCVT_W_S) +DECLARE_INSN(fcvt_wu_d, MATCH_FCVT_WU_D, MASK_FCVT_WU_D) +DECLARE_INSN(fcvt_wu_h, MATCH_FCVT_WU_H, MASK_FCVT_WU_H) +DECLARE_INSN(fcvt_wu_q, MATCH_FCVT_WU_Q, MASK_FCVT_WU_Q) +DECLARE_INSN(fcvt_wu_s, MATCH_FCVT_WU_S, MASK_FCVT_WU_S) +DECLARE_INSN(fdiv_d, MATCH_FDIV_D, MASK_FDIV_D) +DECLARE_INSN(fdiv_h, MATCH_FDIV_H, MASK_FDIV_H) +DECLARE_INSN(fdiv_q, MATCH_FDIV_Q, MASK_FDIV_Q) +DECLARE_INSN(fdiv_s, MATCH_FDIV_S, MASK_FDIV_S) +DECLARE_INSN(fence, MATCH_FENCE, MASK_FENCE) +DECLARE_INSN(fence_i, MATCH_FENCE_I, MASK_FENCE_I) +DECLARE_INSN(feq_d, MATCH_FEQ_D, MASK_FEQ_D) +DECLARE_INSN(feq_h, MATCH_FEQ_H, MASK_FEQ_H) +DECLARE_INSN(feq_q, MATCH_FEQ_Q, MASK_FEQ_Q) +DECLARE_INSN(feq_s, MATCH_FEQ_S, MASK_FEQ_S) +DECLARE_INSN(fld, MATCH_FLD, MASK_FLD) +DECLARE_INSN(fle_d, MATCH_FLE_D, MASK_FLE_D) +DECLARE_INSN(fle_h, MATCH_FLE_H, MASK_FLE_H) +DECLARE_INSN(fle_q, MATCH_FLE_Q, MASK_FLE_Q) +DECLARE_INSN(fle_s, MATCH_FLE_S, MASK_FLE_S) +DECLARE_INSN(flh, MATCH_FLH, MASK_FLH) +DECLARE_INSN(flq, MATCH_FLQ, MASK_FLQ) +DECLARE_INSN(flt_d, MATCH_FLT_D, MASK_FLT_D) +DECLARE_INSN(flt_h, MATCH_FLT_H, MASK_FLT_H) +DECLARE_INSN(flt_q, MATCH_FLT_Q, MASK_FLT_Q) +DECLARE_INSN(flt_s, MATCH_FLT_S, MASK_FLT_S) +DECLARE_INSN(flw, MATCH_FLW, MASK_FLW) +DECLARE_INSN(fmadd_d, MATCH_FMADD_D, MASK_FMADD_D) +DECLARE_INSN(fmadd_h, MATCH_FMADD_H, MASK_FMADD_H) +DECLARE_INSN(fmadd_q, MATCH_FMADD_Q, MASK_FMADD_Q) +DECLARE_INSN(fmadd_s, MATCH_FMADD_S, MASK_FMADD_S) +DECLARE_INSN(fmax_d, MATCH_FMAX_D, MASK_FMAX_D) +DECLARE_INSN(fmax_h, MATCH_FMAX_H, MASK_FMAX_H) +DECLARE_INSN(fmax_q, MATCH_FMAX_Q, MASK_FMAX_Q) +DECLARE_INSN(fmax_s, MATCH_FMAX_S, MASK_FMAX_S) +DECLARE_INSN(fmin_d, MATCH_FMIN_D, MASK_FMIN_D) +DECLARE_INSN(fmin_h, MATCH_FMIN_H, MASK_FMIN_H) +DECLARE_INSN(fmin_q, MATCH_FMIN_Q, MASK_FMIN_Q) +DECLARE_INSN(fmin_s, MATCH_FMIN_S, MASK_FMIN_S) +DECLARE_INSN(fmsub_d, MATCH_FMSUB_D, MASK_FMSUB_D) +DECLARE_INSN(fmsub_h, MATCH_FMSUB_H, MASK_FMSUB_H) +DECLARE_INSN(fmsub_q, MATCH_FMSUB_Q, MASK_FMSUB_Q) +DECLARE_INSN(fmsub_s, MATCH_FMSUB_S, MASK_FMSUB_S) +DECLARE_INSN(fmul_d, MATCH_FMUL_D, MASK_FMUL_D) +DECLARE_INSN(fmul_h, MATCH_FMUL_H, MASK_FMUL_H) +DECLARE_INSN(fmul_q, MATCH_FMUL_Q, MASK_FMUL_Q) +DECLARE_INSN(fmul_s, MATCH_FMUL_S, MASK_FMUL_S) +DECLARE_INSN(fmv_d_x, MATCH_FMV_D_X, MASK_FMV_D_X) +DECLARE_INSN(fmv_h_x, MATCH_FMV_H_X, MASK_FMV_H_X) +DECLARE_INSN(fmv_w_x, MATCH_FMV_W_X, MASK_FMV_W_X) +DECLARE_INSN(fmv_x_d, MATCH_FMV_X_D, MASK_FMV_X_D) +DECLARE_INSN(fmv_x_h, MATCH_FMV_X_H, MASK_FMV_X_H) +DECLARE_INSN(fmv_x_w, MATCH_FMV_X_W, MASK_FMV_X_W) +DECLARE_INSN(fnmadd_d, MATCH_FNMADD_D, MASK_FNMADD_D) +DECLARE_INSN(fnmadd_h, MATCH_FNMADD_H, MASK_FNMADD_H) +DECLARE_INSN(fnmadd_q, MATCH_FNMADD_Q, MASK_FNMADD_Q) +DECLARE_INSN(fnmadd_s, MATCH_FNMADD_S, MASK_FNMADD_S) +DECLARE_INSN(fnmsub_d, MATCH_FNMSUB_D, MASK_FNMSUB_D) +DECLARE_INSN(fnmsub_h, MATCH_FNMSUB_H, MASK_FNMSUB_H) +DECLARE_INSN(fnmsub_q, MATCH_FNMSUB_Q, MASK_FNMSUB_Q) +DECLARE_INSN(fnmsub_s, MATCH_FNMSUB_S, MASK_FNMSUB_S) +DECLARE_INSN(fsd, MATCH_FSD, MASK_FSD) +DECLARE_INSN(fsgnj_d, MATCH_FSGNJ_D, MASK_FSGNJ_D) +DECLARE_INSN(fsgnj_h, MATCH_FSGNJ_H, MASK_FSGNJ_H) +DECLARE_INSN(fsgnj_q, MATCH_FSGNJ_Q, MASK_FSGNJ_Q) +DECLARE_INSN(fsgnj_s, MATCH_FSGNJ_S, MASK_FSGNJ_S) +DECLARE_INSN(fsgnjn_d, MATCH_FSGNJN_D, MASK_FSGNJN_D) +DECLARE_INSN(fsgnjn_h, MATCH_FSGNJN_H, MASK_FSGNJN_H) +DECLARE_INSN(fsgnjn_q, MATCH_FSGNJN_Q, MASK_FSGNJN_Q) +DECLARE_INSN(fsgnjn_s, MATCH_FSGNJN_S, MASK_FSGNJN_S) +DECLARE_INSN(fsgnjx_d, MATCH_FSGNJX_D, MASK_FSGNJX_D) +DECLARE_INSN(fsgnjx_h, MATCH_FSGNJX_H, MASK_FSGNJX_H) +DECLARE_INSN(fsgnjx_q, MATCH_FSGNJX_Q, MASK_FSGNJX_Q) +DECLARE_INSN(fsgnjx_s, MATCH_FSGNJX_S, MASK_FSGNJX_S) +DECLARE_INSN(fsh, MATCH_FSH, MASK_FSH) +DECLARE_INSN(fsl, MATCH_FSL, MASK_FSL) +DECLARE_INSN(fslw, MATCH_FSLW, MASK_FSLW) +DECLARE_INSN(fsq, MATCH_FSQ, MASK_FSQ) +DECLARE_INSN(fsqrt_d, MATCH_FSQRT_D, MASK_FSQRT_D) +DECLARE_INSN(fsqrt_h, MATCH_FSQRT_H, MASK_FSQRT_H) +DECLARE_INSN(fsqrt_q, MATCH_FSQRT_Q, MASK_FSQRT_Q) +DECLARE_INSN(fsqrt_s, MATCH_FSQRT_S, MASK_FSQRT_S) +DECLARE_INSN(fsr, MATCH_FSR, MASK_FSR) +DECLARE_INSN(fsri, MATCH_FSRI, MASK_FSRI) +DECLARE_INSN(fsriw, MATCH_FSRIW, MASK_FSRIW) +DECLARE_INSN(fsrw, MATCH_FSRW, MASK_FSRW) +DECLARE_INSN(fsub_d, MATCH_FSUB_D, MASK_FSUB_D) +DECLARE_INSN(fsub_h, MATCH_FSUB_H, MASK_FSUB_H) +DECLARE_INSN(fsub_q, MATCH_FSUB_Q, MASK_FSUB_Q) +DECLARE_INSN(fsub_s, MATCH_FSUB_S, MASK_FSUB_S) +DECLARE_INSN(fsw, MATCH_FSW, MASK_FSW) +DECLARE_INSN(gorc, MATCH_GORC, MASK_GORC) +DECLARE_INSN(gorci, MATCH_GORCI, MASK_GORCI) +DECLARE_INSN(gorciw, MATCH_GORCIW, MASK_GORCIW) +DECLARE_INSN(gorcw, MATCH_GORCW, MASK_GORCW) +DECLARE_INSN(grev, MATCH_GREV, MASK_GREV) +DECLARE_INSN(grevi, MATCH_GREVI, MASK_GREVI) +DECLARE_INSN(greviw, MATCH_GREVIW, MASK_GREVIW) +DECLARE_INSN(grevw, MATCH_GREVW, MASK_GREVW) +DECLARE_INSN(hfence_gvma, MATCH_HFENCE_GVMA, MASK_HFENCE_GVMA) +DECLARE_INSN(hfence_vvma, MATCH_HFENCE_VVMA, MASK_HFENCE_VVMA) +DECLARE_INSN(hinval_gvma, MATCH_HINVAL_GVMA, MASK_HINVAL_GVMA) +DECLARE_INSN(hinval_vvma, MATCH_HINVAL_VVMA, MASK_HINVAL_VVMA) +DECLARE_INSN(hlv_b, MATCH_HLV_B, MASK_HLV_B) +DECLARE_INSN(hlv_bu, MATCH_HLV_BU, MASK_HLV_BU) +DECLARE_INSN(hlv_d, MATCH_HLV_D, MASK_HLV_D) +DECLARE_INSN(hlv_h, MATCH_HLV_H, MASK_HLV_H) +DECLARE_INSN(hlv_hu, MATCH_HLV_HU, MASK_HLV_HU) +DECLARE_INSN(hlv_w, MATCH_HLV_W, MASK_HLV_W) +DECLARE_INSN(hlv_wu, MATCH_HLV_WU, MASK_HLV_WU) +DECLARE_INSN(hlvx_hu, MATCH_HLVX_HU, MASK_HLVX_HU) +DECLARE_INSN(hlvx_wu, MATCH_HLVX_WU, MASK_HLVX_WU) +DECLARE_INSN(hsv_b, MATCH_HSV_B, MASK_HSV_B) +DECLARE_INSN(hsv_d, MATCH_HSV_D, MASK_HSV_D) +DECLARE_INSN(hsv_h, MATCH_HSV_H, MASK_HSV_H) +DECLARE_INSN(hsv_w, MATCH_HSV_W, MASK_HSV_W) +DECLARE_INSN(insb, MATCH_INSB, MASK_INSB) +DECLARE_INSN(jal, MATCH_JAL, MASK_JAL) +DECLARE_INSN(jalr, MATCH_JALR, MASK_JALR) +DECLARE_INSN(kabs16, MATCH_KABS16, MASK_KABS16) +DECLARE_INSN(kabs32, MATCH_KABS32, MASK_KABS32) +DECLARE_INSN(kabs8, MATCH_KABS8, MASK_KABS8) +DECLARE_INSN(kabsw, MATCH_KABSW, MASK_KABSW) +DECLARE_INSN(kadd16, MATCH_KADD16, MASK_KADD16) +DECLARE_INSN(kadd32, MATCH_KADD32, MASK_KADD32) +DECLARE_INSN(kadd64, MATCH_KADD64, MASK_KADD64) +DECLARE_INSN(kadd8, MATCH_KADD8, MASK_KADD8) +DECLARE_INSN(kaddh, MATCH_KADDH, MASK_KADDH) +DECLARE_INSN(kaddw, MATCH_KADDW, MASK_KADDW) +DECLARE_INSN(kcras16, MATCH_KCRAS16, MASK_KCRAS16) +DECLARE_INSN(kcras32, MATCH_KCRAS32, MASK_KCRAS32) +DECLARE_INSN(kcrsa16, MATCH_KCRSA16, MASK_KCRSA16) +DECLARE_INSN(kcrsa32, MATCH_KCRSA32, MASK_KCRSA32) +DECLARE_INSN(kdmabb, MATCH_KDMABB, MASK_KDMABB) +DECLARE_INSN(kdmabb16, MATCH_KDMABB16, MASK_KDMABB16) +DECLARE_INSN(kdmabt, MATCH_KDMABT, MASK_KDMABT) +DECLARE_INSN(kdmabt16, MATCH_KDMABT16, MASK_KDMABT16) +DECLARE_INSN(kdmatt, MATCH_KDMATT, MASK_KDMATT) +DECLARE_INSN(kdmatt16, MATCH_KDMATT16, MASK_KDMATT16) +DECLARE_INSN(kdmbb, MATCH_KDMBB, MASK_KDMBB) +DECLARE_INSN(kdmbb16, MATCH_KDMBB16, MASK_KDMBB16) +DECLARE_INSN(kdmbt, MATCH_KDMBT, MASK_KDMBT) +DECLARE_INSN(kdmbt16, MATCH_KDMBT16, MASK_KDMBT16) +DECLARE_INSN(kdmtt, MATCH_KDMTT, MASK_KDMTT) +DECLARE_INSN(kdmtt16, MATCH_KDMTT16, MASK_KDMTT16) +DECLARE_INSN(khm16, MATCH_KHM16, MASK_KHM16) +DECLARE_INSN(khm8, MATCH_KHM8, MASK_KHM8) +DECLARE_INSN(khmbb, MATCH_KHMBB, MASK_KHMBB) +DECLARE_INSN(khmbb16, MATCH_KHMBB16, MASK_KHMBB16) +DECLARE_INSN(khmbt, MATCH_KHMBT, MASK_KHMBT) +DECLARE_INSN(khmbt16, MATCH_KHMBT16, MASK_KHMBT16) +DECLARE_INSN(khmtt, MATCH_KHMTT, MASK_KHMTT) +DECLARE_INSN(khmtt16, MATCH_KHMTT16, MASK_KHMTT16) +DECLARE_INSN(khmx16, MATCH_KHMX16, MASK_KHMX16) +DECLARE_INSN(khmx8, MATCH_KHMX8, MASK_KHMX8) +DECLARE_INSN(kmabb, MATCH_KMABB, MASK_KMABB) +DECLARE_INSN(kmabb32, MATCH_KMABB32, MASK_KMABB32) +DECLARE_INSN(kmabt, MATCH_KMABT, MASK_KMABT) +DECLARE_INSN(kmabt32, MATCH_KMABT32, MASK_KMABT32) +DECLARE_INSN(kmada, MATCH_KMADA, MASK_KMADA) +DECLARE_INSN(kmadrs, MATCH_KMADRS, MASK_KMADRS) +DECLARE_INSN(kmadrs32, MATCH_KMADRS32, MASK_KMADRS32) +DECLARE_INSN(kmads, MATCH_KMADS, MASK_KMADS) +DECLARE_INSN(kmads32, MATCH_KMADS32, MASK_KMADS32) +DECLARE_INSN(kmar64, MATCH_KMAR64, MASK_KMAR64) +DECLARE_INSN(kmatt, MATCH_KMATT, MASK_KMATT) +DECLARE_INSN(kmatt32, MATCH_KMATT32, MASK_KMATT32) +DECLARE_INSN(kmaxda, MATCH_KMAXDA, MASK_KMAXDA) +DECLARE_INSN(kmaxda32, MATCH_KMAXDA32, MASK_KMAXDA32) +DECLARE_INSN(kmaxds, MATCH_KMAXDS, MASK_KMAXDS) +DECLARE_INSN(kmaxds32, MATCH_KMAXDS32, MASK_KMAXDS32) +DECLARE_INSN(kmda, MATCH_KMDA, MASK_KMDA) +DECLARE_INSN(kmda32, MATCH_KMDA32, MASK_KMDA32) +DECLARE_INSN(kmmac, MATCH_KMMAC, MASK_KMMAC) +DECLARE_INSN(kmmac_u, MATCH_KMMAC_U, MASK_KMMAC_U) +DECLARE_INSN(kmmawb, MATCH_KMMAWB, MASK_KMMAWB) +DECLARE_INSN(kmmawb2, MATCH_KMMAWB2, MASK_KMMAWB2) +DECLARE_INSN(kmmawb2_u, MATCH_KMMAWB2_U, MASK_KMMAWB2_U) +DECLARE_INSN(kmmawb_u, MATCH_KMMAWB_U, MASK_KMMAWB_U) +DECLARE_INSN(kmmawt, MATCH_KMMAWT, MASK_KMMAWT) +DECLARE_INSN(kmmawt2, MATCH_KMMAWT2, MASK_KMMAWT2) +DECLARE_INSN(kmmawt2_u, MATCH_KMMAWT2_U, MASK_KMMAWT2_U) +DECLARE_INSN(kmmawt_u, MATCH_KMMAWT_U, MASK_KMMAWT_U) +DECLARE_INSN(kmmsb, MATCH_KMMSB, MASK_KMMSB) +DECLARE_INSN(kmmsb_u, MATCH_KMMSB_U, MASK_KMMSB_U) +DECLARE_INSN(kmmwb2, MATCH_KMMWB2, MASK_KMMWB2) +DECLARE_INSN(kmmwb2_u, MATCH_KMMWB2_U, MASK_KMMWB2_U) +DECLARE_INSN(kmmwt2, MATCH_KMMWT2, MASK_KMMWT2) +DECLARE_INSN(kmmwt2_u, MATCH_KMMWT2_U, MASK_KMMWT2_U) +DECLARE_INSN(kmsda, MATCH_KMSDA, MASK_KMSDA) +DECLARE_INSN(kmsda32, MATCH_KMSDA32, MASK_KMSDA32) +DECLARE_INSN(kmsr64, MATCH_KMSR64, MASK_KMSR64) +DECLARE_INSN(kmsxda, MATCH_KMSXDA, MASK_KMSXDA) +DECLARE_INSN(kmsxda32, MATCH_KMSXDA32, MASK_KMSXDA32) +DECLARE_INSN(kmxda, MATCH_KMXDA, MASK_KMXDA) +DECLARE_INSN(kmxda32, MATCH_KMXDA32, MASK_KMXDA32) +DECLARE_INSN(ksll16, MATCH_KSLL16, MASK_KSLL16) +DECLARE_INSN(ksll32, MATCH_KSLL32, MASK_KSLL32) +DECLARE_INSN(ksll8, MATCH_KSLL8, MASK_KSLL8) +DECLARE_INSN(kslli16, MATCH_KSLLI16, MASK_KSLLI16) +DECLARE_INSN(kslli32, MATCH_KSLLI32, MASK_KSLLI32) +DECLARE_INSN(kslli8, MATCH_KSLLI8, MASK_KSLLI8) +DECLARE_INSN(kslliw, MATCH_KSLLIW, MASK_KSLLIW) +DECLARE_INSN(ksllw, MATCH_KSLLW, MASK_KSLLW) +DECLARE_INSN(kslra16, MATCH_KSLRA16, MASK_KSLRA16) +DECLARE_INSN(kslra16_u, MATCH_KSLRA16_U, MASK_KSLRA16_U) +DECLARE_INSN(kslra32, MATCH_KSLRA32, MASK_KSLRA32) +DECLARE_INSN(kslra32_u, MATCH_KSLRA32_U, MASK_KSLRA32_U) +DECLARE_INSN(kslra8, MATCH_KSLRA8, MASK_KSLRA8) +DECLARE_INSN(kslra8_u, MATCH_KSLRA8_U, MASK_KSLRA8_U) +DECLARE_INSN(kslraw, MATCH_KSLRAW, MASK_KSLRAW) +DECLARE_INSN(kslraw_u, MATCH_KSLRAW_U, MASK_KSLRAW_U) +DECLARE_INSN(kstas16, MATCH_KSTAS16, MASK_KSTAS16) +DECLARE_INSN(kstas32, MATCH_KSTAS32, MASK_KSTAS32) +DECLARE_INSN(kstsa16, MATCH_KSTSA16, MASK_KSTSA16) +DECLARE_INSN(kstsa32, MATCH_KSTSA32, MASK_KSTSA32) +DECLARE_INSN(ksub16, MATCH_KSUB16, MASK_KSUB16) +DECLARE_INSN(ksub32, MATCH_KSUB32, MASK_KSUB32) +DECLARE_INSN(ksub64, MATCH_KSUB64, MASK_KSUB64) +DECLARE_INSN(ksub8, MATCH_KSUB8, MASK_KSUB8) +DECLARE_INSN(ksubh, MATCH_KSUBH, MASK_KSUBH) +DECLARE_INSN(ksubw, MATCH_KSUBW, MASK_KSUBW) +DECLARE_INSN(kwmmul, MATCH_KWMMUL, MASK_KWMMUL) +DECLARE_INSN(kwmmul_u, MATCH_KWMMUL_U, MASK_KWMMUL_U) +DECLARE_INSN(lb, MATCH_LB, MASK_LB) +DECLARE_INSN(lbu, MATCH_LBU, MASK_LBU) +DECLARE_INSN(ld, MATCH_LD, MASK_LD) +DECLARE_INSN(lh, MATCH_LH, MASK_LH) +DECLARE_INSN(lhu, MATCH_LHU, MASK_LHU) +DECLARE_INSN(lr_d, MATCH_LR_D, MASK_LR_D) +DECLARE_INSN(lr_w, MATCH_LR_W, MASK_LR_W) +DECLARE_INSN(lui, MATCH_LUI, MASK_LUI) +DECLARE_INSN(lw, MATCH_LW, MASK_LW) +DECLARE_INSN(lwu, MATCH_LWU, MASK_LWU) +DECLARE_INSN(maddr32, MATCH_MADDR32, MASK_MADDR32) +DECLARE_INSN(max, MATCH_MAX, MASK_MAX) +DECLARE_INSN(maxu, MATCH_MAXU, MASK_MAXU) +DECLARE_INSN(min, MATCH_MIN, MASK_MIN) +DECLARE_INSN(minu, MATCH_MINU, MASK_MINU) +DECLARE_INSN(mnret, MATCH_MNRET, MASK_MNRET) +DECLARE_INSN(mret, MATCH_MRET, MASK_MRET) +DECLARE_INSN(msubr32, MATCH_MSUBR32, MASK_MSUBR32) +DECLARE_INSN(mul, MATCH_MUL, MASK_MUL) +DECLARE_INSN(mulh, MATCH_MULH, MASK_MULH) +DECLARE_INSN(mulhsu, MATCH_MULHSU, MASK_MULHSU) +DECLARE_INSN(mulhu, MATCH_MULHU, MASK_MULHU) +DECLARE_INSN(mulr64, MATCH_MULR64, MASK_MULR64) +DECLARE_INSN(mulsr64, MATCH_MULSR64, MASK_MULSR64) +DECLARE_INSN(mulw, MATCH_MULW, MASK_MULW) +DECLARE_INSN(or, MATCH_OR, MASK_OR) +DECLARE_INSN(ori, MATCH_ORI, MASK_ORI) +DECLARE_INSN(orn, MATCH_ORN, MASK_ORN) +DECLARE_INSN(pack, MATCH_PACK, MASK_PACK) +DECLARE_INSN(packh, MATCH_PACKH, MASK_PACKH) +DECLARE_INSN(packu, MATCH_PACKU, MASK_PACKU) +DECLARE_INSN(packuw, MATCH_PACKUW, MASK_PACKUW) +DECLARE_INSN(packw, MATCH_PACKW, MASK_PACKW) +DECLARE_INSN(pause, MATCH_PAUSE, MASK_PAUSE) +DECLARE_INSN(pbsad, MATCH_PBSAD, MASK_PBSAD) +DECLARE_INSN(pbsada, MATCH_PBSADA, MASK_PBSADA) +DECLARE_INSN(pkbb16, MATCH_PKBB16, MASK_PKBB16) +DECLARE_INSN(pkbt16, MATCH_PKBT16, MASK_PKBT16) +DECLARE_INSN(pkbt32, MATCH_PKBT32, MASK_PKBT32) +DECLARE_INSN(pktb16, MATCH_PKTB16, MASK_PKTB16) +DECLARE_INSN(pktb32, MATCH_PKTB32, MASK_PKTB32) +DECLARE_INSN(pktt16, MATCH_PKTT16, MASK_PKTT16) +DECLARE_INSN(prefetch_i, MATCH_PREFETCH_I, MASK_PREFETCH_I) +DECLARE_INSN(prefetch_r, MATCH_PREFETCH_R, MASK_PREFETCH_R) +DECLARE_INSN(prefetch_w, MATCH_PREFETCH_W, MASK_PREFETCH_W) +DECLARE_INSN(radd16, MATCH_RADD16, MASK_RADD16) +DECLARE_INSN(radd32, MATCH_RADD32, MASK_RADD32) +DECLARE_INSN(radd64, MATCH_RADD64, MASK_RADD64) +DECLARE_INSN(radd8, MATCH_RADD8, MASK_RADD8) +DECLARE_INSN(raddw, MATCH_RADDW, MASK_RADDW) +DECLARE_INSN(rcras16, MATCH_RCRAS16, MASK_RCRAS16) +DECLARE_INSN(rcras32, MATCH_RCRAS32, MASK_RCRAS32) +DECLARE_INSN(rcrsa16, MATCH_RCRSA16, MASK_RCRSA16) +DECLARE_INSN(rcrsa32, MATCH_RCRSA32, MASK_RCRSA32) +DECLARE_INSN(rem, MATCH_REM, MASK_REM) +DECLARE_INSN(remu, MATCH_REMU, MASK_REMU) +DECLARE_INSN(remuw, MATCH_REMUW, MASK_REMUW) +DECLARE_INSN(remw, MATCH_REMW, MASK_REMW) +DECLARE_INSN(rol, MATCH_ROL, MASK_ROL) +DECLARE_INSN(rolw, MATCH_ROLW, MASK_ROLW) +DECLARE_INSN(ror, MATCH_ROR, MASK_ROR) +DECLARE_INSN(rori, MATCH_RORI, MASK_RORI) +DECLARE_INSN(roriw, MATCH_RORIW, MASK_RORIW) +DECLARE_INSN(rorw, MATCH_RORW, MASK_RORW) +DECLARE_INSN(rstas16, MATCH_RSTAS16, MASK_RSTAS16) +DECLARE_INSN(rstas32, MATCH_RSTAS32, MASK_RSTAS32) +DECLARE_INSN(rstsa16, MATCH_RSTSA16, MASK_RSTSA16) +DECLARE_INSN(rstsa32, MATCH_RSTSA32, MASK_RSTSA32) +DECLARE_INSN(rsub16, MATCH_RSUB16, MASK_RSUB16) +DECLARE_INSN(rsub32, MATCH_RSUB32, MASK_RSUB32) +DECLARE_INSN(rsub64, MATCH_RSUB64, MASK_RSUB64) +DECLARE_INSN(rsub8, MATCH_RSUB8, MASK_RSUB8) +DECLARE_INSN(rsubw, MATCH_RSUBW, MASK_RSUBW) +DECLARE_INSN(sb, MATCH_SB, MASK_SB) +DECLARE_INSN(sc_d, MATCH_SC_D, MASK_SC_D) +DECLARE_INSN(sc_w, MATCH_SC_W, MASK_SC_W) +DECLARE_INSN(sclip16, MATCH_SCLIP16, MASK_SCLIP16) +DECLARE_INSN(sclip32, MATCH_SCLIP32, MASK_SCLIP32) +DECLARE_INSN(sclip8, MATCH_SCLIP8, MASK_SCLIP8) +DECLARE_INSN(scmple16, MATCH_SCMPLE16, MASK_SCMPLE16) +DECLARE_INSN(scmple8, MATCH_SCMPLE8, MASK_SCMPLE8) +DECLARE_INSN(scmplt16, MATCH_SCMPLT16, MASK_SCMPLT16) +DECLARE_INSN(scmplt8, MATCH_SCMPLT8, MASK_SCMPLT8) +DECLARE_INSN(sd, MATCH_SD, MASK_SD) +DECLARE_INSN(sext_b, MATCH_SEXT_B, MASK_SEXT_B) +DECLARE_INSN(sext_h, MATCH_SEXT_H, MASK_SEXT_H) +DECLARE_INSN(sfence_inval_ir, MATCH_SFENCE_INVAL_IR, MASK_SFENCE_INVAL_IR) +DECLARE_INSN(sfence_vma, MATCH_SFENCE_VMA, MASK_SFENCE_VMA) +DECLARE_INSN(sfence_w_inval, MATCH_SFENCE_W_INVAL, MASK_SFENCE_W_INVAL) +DECLARE_INSN(sh, MATCH_SH, MASK_SH) +DECLARE_INSN(sh1add, MATCH_SH1ADD, MASK_SH1ADD) +DECLARE_INSN(sh1add_uw, MATCH_SH1ADD_UW, MASK_SH1ADD_UW) +DECLARE_INSN(sh2add, MATCH_SH2ADD, MASK_SH2ADD) +DECLARE_INSN(sh2add_uw, MATCH_SH2ADD_UW, MASK_SH2ADD_UW) +DECLARE_INSN(sh3add, MATCH_SH3ADD, MASK_SH3ADD) +DECLARE_INSN(sh3add_uw, MATCH_SH3ADD_UW, MASK_SH3ADD_UW) +DECLARE_INSN(sha256sig0, MATCH_SHA256SIG0, MASK_SHA256SIG0) +DECLARE_INSN(sha256sig1, MATCH_SHA256SIG1, MASK_SHA256SIG1) +DECLARE_INSN(sha256sum0, MATCH_SHA256SUM0, MASK_SHA256SUM0) +DECLARE_INSN(sha256sum1, MATCH_SHA256SUM1, MASK_SHA256SUM1) +DECLARE_INSN(sha512sig0, MATCH_SHA512SIG0, MASK_SHA512SIG0) +DECLARE_INSN(sha512sig0h, MATCH_SHA512SIG0H, MASK_SHA512SIG0H) +DECLARE_INSN(sha512sig0l, MATCH_SHA512SIG0L, MASK_SHA512SIG0L) +DECLARE_INSN(sha512sig1, MATCH_SHA512SIG1, MASK_SHA512SIG1) +DECLARE_INSN(sha512sig1h, MATCH_SHA512SIG1H, MASK_SHA512SIG1H) +DECLARE_INSN(sha512sig1l, MATCH_SHA512SIG1L, MASK_SHA512SIG1L) +DECLARE_INSN(sha512sum0, MATCH_SHA512SUM0, MASK_SHA512SUM0) +DECLARE_INSN(sha512sum0r, MATCH_SHA512SUM0R, MASK_SHA512SUM0R) +DECLARE_INSN(sha512sum1, MATCH_SHA512SUM1, MASK_SHA512SUM1) +DECLARE_INSN(sha512sum1r, MATCH_SHA512SUM1R, MASK_SHA512SUM1R) +DECLARE_INSN(shfl, MATCH_SHFL, MASK_SHFL) +DECLARE_INSN(shfli, MATCH_SHFLI, MASK_SHFLI) +DECLARE_INSN(shflw, MATCH_SHFLW, MASK_SHFLW) +DECLARE_INSN(sinval_vma, MATCH_SINVAL_VMA, MASK_SINVAL_VMA) +DECLARE_INSN(sll, MATCH_SLL, MASK_SLL) +DECLARE_INSN(sll16, MATCH_SLL16, MASK_SLL16) +DECLARE_INSN(sll32, MATCH_SLL32, MASK_SLL32) +DECLARE_INSN(sll8, MATCH_SLL8, MASK_SLL8) +DECLARE_INSN(slli, MATCH_SLLI, MASK_SLLI) +DECLARE_INSN(slli16, MATCH_SLLI16, MASK_SLLI16) +DECLARE_INSN(slli32, MATCH_SLLI32, MASK_SLLI32) +DECLARE_INSN(slli8, MATCH_SLLI8, MASK_SLLI8) +DECLARE_INSN(slli_rv32, MATCH_SLLI_RV32, MASK_SLLI_RV32) +DECLARE_INSN(slli_uw, MATCH_SLLI_UW, MASK_SLLI_UW) +DECLARE_INSN(slliw, MATCH_SLLIW, MASK_SLLIW) +DECLARE_INSN(sllw, MATCH_SLLW, MASK_SLLW) +DECLARE_INSN(slo, MATCH_SLO, MASK_SLO) +DECLARE_INSN(sloi, MATCH_SLOI, MASK_SLOI) +DECLARE_INSN(sloiw, MATCH_SLOIW, MASK_SLOIW) +DECLARE_INSN(slow, MATCH_SLOW, MASK_SLOW) +DECLARE_INSN(slt, MATCH_SLT, MASK_SLT) +DECLARE_INSN(slti, MATCH_SLTI, MASK_SLTI) +DECLARE_INSN(sltiu, MATCH_SLTIU, MASK_SLTIU) +DECLARE_INSN(sltu, MATCH_SLTU, MASK_SLTU) +DECLARE_INSN(sm3p0, MATCH_SM3P0, MASK_SM3P0) +DECLARE_INSN(sm3p1, MATCH_SM3P1, MASK_SM3P1) +DECLARE_INSN(sm4ed, MATCH_SM4ED, MASK_SM4ED) +DECLARE_INSN(sm4ks, MATCH_SM4KS, MASK_SM4KS) +DECLARE_INSN(smal, MATCH_SMAL, MASK_SMAL) +DECLARE_INSN(smalbb, MATCH_SMALBB, MASK_SMALBB) +DECLARE_INSN(smalbt, MATCH_SMALBT, MASK_SMALBT) +DECLARE_INSN(smalda, MATCH_SMALDA, MASK_SMALDA) +DECLARE_INSN(smaldrs, MATCH_SMALDRS, MASK_SMALDRS) +DECLARE_INSN(smalds, MATCH_SMALDS, MASK_SMALDS) +DECLARE_INSN(smaltt, MATCH_SMALTT, MASK_SMALTT) +DECLARE_INSN(smalxda, MATCH_SMALXDA, MASK_SMALXDA) +DECLARE_INSN(smalxds, MATCH_SMALXDS, MASK_SMALXDS) +DECLARE_INSN(smaqa, MATCH_SMAQA, MASK_SMAQA) +DECLARE_INSN(smaqa_su, MATCH_SMAQA_SU, MASK_SMAQA_SU) +DECLARE_INSN(smar64, MATCH_SMAR64, MASK_SMAR64) +DECLARE_INSN(smax16, MATCH_SMAX16, MASK_SMAX16) +DECLARE_INSN(smax32, MATCH_SMAX32, MASK_SMAX32) +DECLARE_INSN(smax8, MATCH_SMAX8, MASK_SMAX8) +DECLARE_INSN(smbb16, MATCH_SMBB16, MASK_SMBB16) +DECLARE_INSN(smbt16, MATCH_SMBT16, MASK_SMBT16) +DECLARE_INSN(smbt32, MATCH_SMBT32, MASK_SMBT32) +DECLARE_INSN(smdrs, MATCH_SMDRS, MASK_SMDRS) +DECLARE_INSN(smdrs32, MATCH_SMDRS32, MASK_SMDRS32) +DECLARE_INSN(smds, MATCH_SMDS, MASK_SMDS) +DECLARE_INSN(smds32, MATCH_SMDS32, MASK_SMDS32) +DECLARE_INSN(smin16, MATCH_SMIN16, MASK_SMIN16) +DECLARE_INSN(smin32, MATCH_SMIN32, MASK_SMIN32) +DECLARE_INSN(smin8, MATCH_SMIN8, MASK_SMIN8) +DECLARE_INSN(smmul, MATCH_SMMUL, MASK_SMMUL) +DECLARE_INSN(smmul_u, MATCH_SMMUL_U, MASK_SMMUL_U) +DECLARE_INSN(smmwb, MATCH_SMMWB, MASK_SMMWB) +DECLARE_INSN(smmwb_u, MATCH_SMMWB_U, MASK_SMMWB_U) +DECLARE_INSN(smmwt, MATCH_SMMWT, MASK_SMMWT) +DECLARE_INSN(smmwt_u, MATCH_SMMWT_U, MASK_SMMWT_U) +DECLARE_INSN(smslda, MATCH_SMSLDA, MASK_SMSLDA) +DECLARE_INSN(smslxda, MATCH_SMSLXDA, MASK_SMSLXDA) +DECLARE_INSN(smsr64, MATCH_SMSR64, MASK_SMSR64) +DECLARE_INSN(smtt16, MATCH_SMTT16, MASK_SMTT16) +DECLARE_INSN(smtt32, MATCH_SMTT32, MASK_SMTT32) +DECLARE_INSN(smul16, MATCH_SMUL16, MASK_SMUL16) +DECLARE_INSN(smul8, MATCH_SMUL8, MASK_SMUL8) +DECLARE_INSN(smulx16, MATCH_SMULX16, MASK_SMULX16) +DECLARE_INSN(smulx8, MATCH_SMULX8, MASK_SMULX8) +DECLARE_INSN(smxds, MATCH_SMXDS, MASK_SMXDS) +DECLARE_INSN(smxds32, MATCH_SMXDS32, MASK_SMXDS32) +DECLARE_INSN(sra, MATCH_SRA, MASK_SRA) +DECLARE_INSN(sra16, MATCH_SRA16, MASK_SRA16) +DECLARE_INSN(sra16_u, MATCH_SRA16_U, MASK_SRA16_U) +DECLARE_INSN(sra32, MATCH_SRA32, MASK_SRA32) +DECLARE_INSN(sra32_u, MATCH_SRA32_U, MASK_SRA32_U) +DECLARE_INSN(sra8, MATCH_SRA8, MASK_SRA8) +DECLARE_INSN(sra8_u, MATCH_SRA8_U, MASK_SRA8_U) +DECLARE_INSN(sra_u, MATCH_SRA_U, MASK_SRA_U) +DECLARE_INSN(srai, MATCH_SRAI, MASK_SRAI) +DECLARE_INSN(srai16, MATCH_SRAI16, MASK_SRAI16) +DECLARE_INSN(srai16_u, MATCH_SRAI16_U, MASK_SRAI16_U) +DECLARE_INSN(srai32, MATCH_SRAI32, MASK_SRAI32) +DECLARE_INSN(srai32_u, MATCH_SRAI32_U, MASK_SRAI32_U) +DECLARE_INSN(srai8, MATCH_SRAI8, MASK_SRAI8) +DECLARE_INSN(srai8_u, MATCH_SRAI8_U, MASK_SRAI8_U) +DECLARE_INSN(srai_rv32, MATCH_SRAI_RV32, MASK_SRAI_RV32) +DECLARE_INSN(srai_u, MATCH_SRAI_U, MASK_SRAI_U) +DECLARE_INSN(sraiw, MATCH_SRAIW, MASK_SRAIW) +DECLARE_INSN(sraiw_u, MATCH_SRAIW_U, MASK_SRAIW_U) +DECLARE_INSN(sraw, MATCH_SRAW, MASK_SRAW) +DECLARE_INSN(sret, MATCH_SRET, MASK_SRET) +DECLARE_INSN(srl, MATCH_SRL, MASK_SRL) +DECLARE_INSN(srl16, MATCH_SRL16, MASK_SRL16) +DECLARE_INSN(srl16_u, MATCH_SRL16_U, MASK_SRL16_U) +DECLARE_INSN(srl32, MATCH_SRL32, MASK_SRL32) +DECLARE_INSN(srl32_u, MATCH_SRL32_U, MASK_SRL32_U) +DECLARE_INSN(srl8, MATCH_SRL8, MASK_SRL8) +DECLARE_INSN(srl8_u, MATCH_SRL8_U, MASK_SRL8_U) +DECLARE_INSN(srli, MATCH_SRLI, MASK_SRLI) +DECLARE_INSN(srli16, MATCH_SRLI16, MASK_SRLI16) +DECLARE_INSN(srli16_u, MATCH_SRLI16_U, MASK_SRLI16_U) +DECLARE_INSN(srli32, MATCH_SRLI32, MASK_SRLI32) +DECLARE_INSN(srli32_u, MATCH_SRLI32_U, MASK_SRLI32_U) +DECLARE_INSN(srli8, MATCH_SRLI8, MASK_SRLI8) +DECLARE_INSN(srli8_u, MATCH_SRLI8_U, MASK_SRLI8_U) +DECLARE_INSN(srli_rv32, MATCH_SRLI_RV32, MASK_SRLI_RV32) +DECLARE_INSN(srliw, MATCH_SRLIW, MASK_SRLIW) +DECLARE_INSN(srlw, MATCH_SRLW, MASK_SRLW) +DECLARE_INSN(sro, MATCH_SRO, MASK_SRO) +DECLARE_INSN(sroi, MATCH_SROI, MASK_SROI) +DECLARE_INSN(sroiw, MATCH_SROIW, MASK_SROIW) +DECLARE_INSN(srow, MATCH_SROW, MASK_SROW) +DECLARE_INSN(stas16, MATCH_STAS16, MASK_STAS16) +DECLARE_INSN(stas32, MATCH_STAS32, MASK_STAS32) +DECLARE_INSN(stsa16, MATCH_STSA16, MASK_STSA16) +DECLARE_INSN(stsa32, MATCH_STSA32, MASK_STSA32) +DECLARE_INSN(sub, MATCH_SUB, MASK_SUB) +DECLARE_INSN(sub16, MATCH_SUB16, MASK_SUB16) +DECLARE_INSN(sub32, MATCH_SUB32, MASK_SUB32) +DECLARE_INSN(sub64, MATCH_SUB64, MASK_SUB64) +DECLARE_INSN(sub8, MATCH_SUB8, MASK_SUB8) +DECLARE_INSN(subw, MATCH_SUBW, MASK_SUBW) +DECLARE_INSN(sunpkd810, MATCH_SUNPKD810, MASK_SUNPKD810) +DECLARE_INSN(sunpkd820, MATCH_SUNPKD820, MASK_SUNPKD820) +DECLARE_INSN(sunpkd830, MATCH_SUNPKD830, MASK_SUNPKD830) +DECLARE_INSN(sunpkd831, MATCH_SUNPKD831, MASK_SUNPKD831) +DECLARE_INSN(sunpkd832, MATCH_SUNPKD832, MASK_SUNPKD832) +DECLARE_INSN(sw, MATCH_SW, MASK_SW) +DECLARE_INSN(uclip16, MATCH_UCLIP16, MASK_UCLIP16) +DECLARE_INSN(uclip32, MATCH_UCLIP32, MASK_UCLIP32) +DECLARE_INSN(uclip8, MATCH_UCLIP8, MASK_UCLIP8) +DECLARE_INSN(ucmple16, MATCH_UCMPLE16, MASK_UCMPLE16) +DECLARE_INSN(ucmple8, MATCH_UCMPLE8, MASK_UCMPLE8) +DECLARE_INSN(ucmplt16, MATCH_UCMPLT16, MASK_UCMPLT16) +DECLARE_INSN(ucmplt8, MATCH_UCMPLT8, MASK_UCMPLT8) +DECLARE_INSN(ukadd16, MATCH_UKADD16, MASK_UKADD16) +DECLARE_INSN(ukadd32, MATCH_UKADD32, MASK_UKADD32) +DECLARE_INSN(ukadd64, MATCH_UKADD64, MASK_UKADD64) +DECLARE_INSN(ukadd8, MATCH_UKADD8, MASK_UKADD8) +DECLARE_INSN(ukaddh, MATCH_UKADDH, MASK_UKADDH) +DECLARE_INSN(ukaddw, MATCH_UKADDW, MASK_UKADDW) +DECLARE_INSN(ukcras16, MATCH_UKCRAS16, MASK_UKCRAS16) +DECLARE_INSN(ukcras32, MATCH_UKCRAS32, MASK_UKCRAS32) +DECLARE_INSN(ukcrsa16, MATCH_UKCRSA16, MASK_UKCRSA16) +DECLARE_INSN(ukcrsa32, MATCH_UKCRSA32, MASK_UKCRSA32) +DECLARE_INSN(ukmar64, MATCH_UKMAR64, MASK_UKMAR64) +DECLARE_INSN(ukmsr64, MATCH_UKMSR64, MASK_UKMSR64) +DECLARE_INSN(ukstas16, MATCH_UKSTAS16, MASK_UKSTAS16) +DECLARE_INSN(ukstas32, MATCH_UKSTAS32, MASK_UKSTAS32) +DECLARE_INSN(ukstsa16, MATCH_UKSTSA16, MASK_UKSTSA16) +DECLARE_INSN(ukstsa32, MATCH_UKSTSA32, MASK_UKSTSA32) +DECLARE_INSN(uksub16, MATCH_UKSUB16, MASK_UKSUB16) +DECLARE_INSN(uksub32, MATCH_UKSUB32, MASK_UKSUB32) +DECLARE_INSN(uksub64, MATCH_UKSUB64, MASK_UKSUB64) +DECLARE_INSN(uksub8, MATCH_UKSUB8, MASK_UKSUB8) +DECLARE_INSN(uksubh, MATCH_UKSUBH, MASK_UKSUBH) +DECLARE_INSN(uksubw, MATCH_UKSUBW, MASK_UKSUBW) +DECLARE_INSN(umaqa, MATCH_UMAQA, MASK_UMAQA) +DECLARE_INSN(umar64, MATCH_UMAR64, MASK_UMAR64) +DECLARE_INSN(umax16, MATCH_UMAX16, MASK_UMAX16) +DECLARE_INSN(umax32, MATCH_UMAX32, MASK_UMAX32) +DECLARE_INSN(umax8, MATCH_UMAX8, MASK_UMAX8) +DECLARE_INSN(umin16, MATCH_UMIN16, MASK_UMIN16) +DECLARE_INSN(umin32, MATCH_UMIN32, MASK_UMIN32) +DECLARE_INSN(umin8, MATCH_UMIN8, MASK_UMIN8) +DECLARE_INSN(umsr64, MATCH_UMSR64, MASK_UMSR64) +DECLARE_INSN(umul16, MATCH_UMUL16, MASK_UMUL16) +DECLARE_INSN(umul8, MATCH_UMUL8, MASK_UMUL8) +DECLARE_INSN(umulx16, MATCH_UMULX16, MASK_UMULX16) +DECLARE_INSN(umulx8, MATCH_UMULX8, MASK_UMULX8) +DECLARE_INSN(unshfl, MATCH_UNSHFL, MASK_UNSHFL) +DECLARE_INSN(unshfli, MATCH_UNSHFLI, MASK_UNSHFLI) +DECLARE_INSN(unshflw, MATCH_UNSHFLW, MASK_UNSHFLW) +DECLARE_INSN(uradd16, MATCH_URADD16, MASK_URADD16) +DECLARE_INSN(uradd32, MATCH_URADD32, MASK_URADD32) +DECLARE_INSN(uradd64, MATCH_URADD64, MASK_URADD64) +DECLARE_INSN(uradd8, MATCH_URADD8, MASK_URADD8) +DECLARE_INSN(uraddw, MATCH_URADDW, MASK_URADDW) +DECLARE_INSN(urcras16, MATCH_URCRAS16, MASK_URCRAS16) +DECLARE_INSN(urcras32, MATCH_URCRAS32, MASK_URCRAS32) +DECLARE_INSN(urcrsa16, MATCH_URCRSA16, MASK_URCRSA16) +DECLARE_INSN(urcrsa32, MATCH_URCRSA32, MASK_URCRSA32) +DECLARE_INSN(urstas16, MATCH_URSTAS16, MASK_URSTAS16) +DECLARE_INSN(urstas32, MATCH_URSTAS32, MASK_URSTAS32) +DECLARE_INSN(urstsa16, MATCH_URSTSA16, MASK_URSTSA16) +DECLARE_INSN(urstsa32, MATCH_URSTSA32, MASK_URSTSA32) +DECLARE_INSN(ursub16, MATCH_URSUB16, MASK_URSUB16) +DECLARE_INSN(ursub32, MATCH_URSUB32, MASK_URSUB32) +DECLARE_INSN(ursub64, MATCH_URSUB64, MASK_URSUB64) +DECLARE_INSN(ursub8, MATCH_URSUB8, MASK_URSUB8) +DECLARE_INSN(ursubw, MATCH_URSUBW, MASK_URSUBW) +DECLARE_INSN(vaadd_vv, MATCH_VAADD_VV, MASK_VAADD_VV) +DECLARE_INSN(vaadd_vx, MATCH_VAADD_VX, MASK_VAADD_VX) +DECLARE_INSN(vaaddu_vv, MATCH_VAADDU_VV, MASK_VAADDU_VV) +DECLARE_INSN(vaaddu_vx, MATCH_VAADDU_VX, MASK_VAADDU_VX) +DECLARE_INSN(vadc_vim, MATCH_VADC_VIM, MASK_VADC_VIM) +DECLARE_INSN(vadc_vvm, MATCH_VADC_VVM, MASK_VADC_VVM) +DECLARE_INSN(vadc_vxm, MATCH_VADC_VXM, MASK_VADC_VXM) +DECLARE_INSN(vadd_vi, MATCH_VADD_VI, MASK_VADD_VI) +DECLARE_INSN(vadd_vv, MATCH_VADD_VV, MASK_VADD_VV) +DECLARE_INSN(vadd_vx, MATCH_VADD_VX, MASK_VADD_VX) +DECLARE_INSN(vamoaddei16_v, MATCH_VAMOADDEI16_V, MASK_VAMOADDEI16_V) +DECLARE_INSN(vamoaddei32_v, MATCH_VAMOADDEI32_V, MASK_VAMOADDEI32_V) +DECLARE_INSN(vamoaddei64_v, MATCH_VAMOADDEI64_V, MASK_VAMOADDEI64_V) +DECLARE_INSN(vamoaddei8_v, MATCH_VAMOADDEI8_V, MASK_VAMOADDEI8_V) +DECLARE_INSN(vamoandei16_v, MATCH_VAMOANDEI16_V, MASK_VAMOANDEI16_V) +DECLARE_INSN(vamoandei32_v, MATCH_VAMOANDEI32_V, MASK_VAMOANDEI32_V) +DECLARE_INSN(vamoandei64_v, MATCH_VAMOANDEI64_V, MASK_VAMOANDEI64_V) +DECLARE_INSN(vamoandei8_v, MATCH_VAMOANDEI8_V, MASK_VAMOANDEI8_V) +DECLARE_INSN(vamomaxei16_v, MATCH_VAMOMAXEI16_V, MASK_VAMOMAXEI16_V) +DECLARE_INSN(vamomaxei32_v, MATCH_VAMOMAXEI32_V, MASK_VAMOMAXEI32_V) +DECLARE_INSN(vamomaxei64_v, MATCH_VAMOMAXEI64_V, MASK_VAMOMAXEI64_V) +DECLARE_INSN(vamomaxei8_v, MATCH_VAMOMAXEI8_V, MASK_VAMOMAXEI8_V) +DECLARE_INSN(vamomaxuei16_v, MATCH_VAMOMAXUEI16_V, MASK_VAMOMAXUEI16_V) +DECLARE_INSN(vamomaxuei32_v, MATCH_VAMOMAXUEI32_V, MASK_VAMOMAXUEI32_V) +DECLARE_INSN(vamomaxuei64_v, MATCH_VAMOMAXUEI64_V, MASK_VAMOMAXUEI64_V) +DECLARE_INSN(vamomaxuei8_v, MATCH_VAMOMAXUEI8_V, MASK_VAMOMAXUEI8_V) +DECLARE_INSN(vamominei16_v, MATCH_VAMOMINEI16_V, MASK_VAMOMINEI16_V) +DECLARE_INSN(vamominei32_v, MATCH_VAMOMINEI32_V, MASK_VAMOMINEI32_V) +DECLARE_INSN(vamominei64_v, MATCH_VAMOMINEI64_V, MASK_VAMOMINEI64_V) +DECLARE_INSN(vamominei8_v, MATCH_VAMOMINEI8_V, MASK_VAMOMINEI8_V) +DECLARE_INSN(vamominuei16_v, MATCH_VAMOMINUEI16_V, MASK_VAMOMINUEI16_V) +DECLARE_INSN(vamominuei32_v, MATCH_VAMOMINUEI32_V, MASK_VAMOMINUEI32_V) +DECLARE_INSN(vamominuei64_v, MATCH_VAMOMINUEI64_V, MASK_VAMOMINUEI64_V) +DECLARE_INSN(vamominuei8_v, MATCH_VAMOMINUEI8_V, MASK_VAMOMINUEI8_V) +DECLARE_INSN(vamoorei16_v, MATCH_VAMOOREI16_V, MASK_VAMOOREI16_V) +DECLARE_INSN(vamoorei32_v, MATCH_VAMOOREI32_V, MASK_VAMOOREI32_V) +DECLARE_INSN(vamoorei64_v, MATCH_VAMOOREI64_V, MASK_VAMOOREI64_V) +DECLARE_INSN(vamoorei8_v, MATCH_VAMOOREI8_V, MASK_VAMOOREI8_V) +DECLARE_INSN(vamoswapei16_v, MATCH_VAMOSWAPEI16_V, MASK_VAMOSWAPEI16_V) +DECLARE_INSN(vamoswapei32_v, MATCH_VAMOSWAPEI32_V, MASK_VAMOSWAPEI32_V) +DECLARE_INSN(vamoswapei64_v, MATCH_VAMOSWAPEI64_V, MASK_VAMOSWAPEI64_V) +DECLARE_INSN(vamoswapei8_v, MATCH_VAMOSWAPEI8_V, MASK_VAMOSWAPEI8_V) +DECLARE_INSN(vamoxorei16_v, MATCH_VAMOXOREI16_V, MASK_VAMOXOREI16_V) +DECLARE_INSN(vamoxorei32_v, MATCH_VAMOXOREI32_V, MASK_VAMOXOREI32_V) +DECLARE_INSN(vamoxorei64_v, MATCH_VAMOXOREI64_V, MASK_VAMOXOREI64_V) +DECLARE_INSN(vamoxorei8_v, MATCH_VAMOXOREI8_V, MASK_VAMOXOREI8_V) +DECLARE_INSN(vand_vi, MATCH_VAND_VI, MASK_VAND_VI) +DECLARE_INSN(vand_vv, MATCH_VAND_VV, MASK_VAND_VV) +DECLARE_INSN(vand_vx, MATCH_VAND_VX, MASK_VAND_VX) +DECLARE_INSN(vasub_vv, MATCH_VASUB_VV, MASK_VASUB_VV) +DECLARE_INSN(vasub_vx, MATCH_VASUB_VX, MASK_VASUB_VX) +DECLARE_INSN(vasubu_vv, MATCH_VASUBU_VV, MASK_VASUBU_VV) +DECLARE_INSN(vasubu_vx, MATCH_VASUBU_VX, MASK_VASUBU_VX) +DECLARE_INSN(vcompress_vm, MATCH_VCOMPRESS_VM, MASK_VCOMPRESS_VM) +DECLARE_INSN(vcpop_m, MATCH_VCPOP_M, MASK_VCPOP_M) +DECLARE_INSN(vdiv_vv, MATCH_VDIV_VV, MASK_VDIV_VV) +DECLARE_INSN(vdiv_vx, MATCH_VDIV_VX, MASK_VDIV_VX) +DECLARE_INSN(vdivu_vv, MATCH_VDIVU_VV, MASK_VDIVU_VV) +DECLARE_INSN(vdivu_vx, MATCH_VDIVU_VX, MASK_VDIVU_VX) +DECLARE_INSN(vfadd_vf, MATCH_VFADD_VF, MASK_VFADD_VF) +DECLARE_INSN(vfadd_vv, MATCH_VFADD_VV, MASK_VFADD_VV) +DECLARE_INSN(vfclass_v, MATCH_VFCLASS_V, MASK_VFCLASS_V) +DECLARE_INSN(vfcvt_f_x_v, MATCH_VFCVT_F_X_V, MASK_VFCVT_F_X_V) +DECLARE_INSN(vfcvt_f_xu_v, MATCH_VFCVT_F_XU_V, MASK_VFCVT_F_XU_V) +DECLARE_INSN(vfcvt_rtz_x_f_v, MATCH_VFCVT_RTZ_X_F_V, MASK_VFCVT_RTZ_X_F_V) +DECLARE_INSN(vfcvt_rtz_xu_f_v, MATCH_VFCVT_RTZ_XU_F_V, MASK_VFCVT_RTZ_XU_F_V) +DECLARE_INSN(vfcvt_x_f_v, MATCH_VFCVT_X_F_V, MASK_VFCVT_X_F_V) +DECLARE_INSN(vfcvt_xu_f_v, MATCH_VFCVT_XU_F_V, MASK_VFCVT_XU_F_V) +DECLARE_INSN(vfdiv_vf, MATCH_VFDIV_VF, MASK_VFDIV_VF) +DECLARE_INSN(vfdiv_vv, MATCH_VFDIV_VV, MASK_VFDIV_VV) +DECLARE_INSN(vfirst_m, MATCH_VFIRST_M, MASK_VFIRST_M) +DECLARE_INSN(vfmacc_vf, MATCH_VFMACC_VF, MASK_VFMACC_VF) +DECLARE_INSN(vfmacc_vv, MATCH_VFMACC_VV, MASK_VFMACC_VV) +DECLARE_INSN(vfmadd_vf, MATCH_VFMADD_VF, MASK_VFMADD_VF) +DECLARE_INSN(vfmadd_vv, MATCH_VFMADD_VV, MASK_VFMADD_VV) +DECLARE_INSN(vfmax_vf, MATCH_VFMAX_VF, MASK_VFMAX_VF) +DECLARE_INSN(vfmax_vv, MATCH_VFMAX_VV, MASK_VFMAX_VV) +DECLARE_INSN(vfmerge_vfm, MATCH_VFMERGE_VFM, MASK_VFMERGE_VFM) +DECLARE_INSN(vfmin_vf, MATCH_VFMIN_VF, MASK_VFMIN_VF) +DECLARE_INSN(vfmin_vv, MATCH_VFMIN_VV, MASK_VFMIN_VV) +DECLARE_INSN(vfmsac_vf, MATCH_VFMSAC_VF, MASK_VFMSAC_VF) +DECLARE_INSN(vfmsac_vv, MATCH_VFMSAC_VV, MASK_VFMSAC_VV) +DECLARE_INSN(vfmsub_vf, MATCH_VFMSUB_VF, MASK_VFMSUB_VF) +DECLARE_INSN(vfmsub_vv, MATCH_VFMSUB_VV, MASK_VFMSUB_VV) +DECLARE_INSN(vfmul_vf, MATCH_VFMUL_VF, MASK_VFMUL_VF) +DECLARE_INSN(vfmul_vv, MATCH_VFMUL_VV, MASK_VFMUL_VV) +DECLARE_INSN(vfmv_f_s, MATCH_VFMV_F_S, MASK_VFMV_F_S) +DECLARE_INSN(vfmv_s_f, MATCH_VFMV_S_F, MASK_VFMV_S_F) +DECLARE_INSN(vfmv_v_f, MATCH_VFMV_V_F, MASK_VFMV_V_F) +DECLARE_INSN(vfncvt_f_f_w, MATCH_VFNCVT_F_F_W, MASK_VFNCVT_F_F_W) +DECLARE_INSN(vfncvt_f_x_w, MATCH_VFNCVT_F_X_W, MASK_VFNCVT_F_X_W) +DECLARE_INSN(vfncvt_f_xu_w, MATCH_VFNCVT_F_XU_W, MASK_VFNCVT_F_XU_W) +DECLARE_INSN(vfncvt_rod_f_f_w, MATCH_VFNCVT_ROD_F_F_W, MASK_VFNCVT_ROD_F_F_W) +DECLARE_INSN(vfncvt_rtz_x_f_w, MATCH_VFNCVT_RTZ_X_F_W, MASK_VFNCVT_RTZ_X_F_W) +DECLARE_INSN(vfncvt_rtz_xu_f_w, MATCH_VFNCVT_RTZ_XU_F_W, MASK_VFNCVT_RTZ_XU_F_W) +DECLARE_INSN(vfncvt_x_f_w, MATCH_VFNCVT_X_F_W, MASK_VFNCVT_X_F_W) +DECLARE_INSN(vfncvt_xu_f_w, MATCH_VFNCVT_XU_F_W, MASK_VFNCVT_XU_F_W) +DECLARE_INSN(vfnmacc_vf, MATCH_VFNMACC_VF, MASK_VFNMACC_VF) +DECLARE_INSN(vfnmacc_vv, MATCH_VFNMACC_VV, MASK_VFNMACC_VV) +DECLARE_INSN(vfnmadd_vf, MATCH_VFNMADD_VF, MASK_VFNMADD_VF) +DECLARE_INSN(vfnmadd_vv, MATCH_VFNMADD_VV, MASK_VFNMADD_VV) +DECLARE_INSN(vfnmsac_vf, MATCH_VFNMSAC_VF, MASK_VFNMSAC_VF) +DECLARE_INSN(vfnmsac_vv, MATCH_VFNMSAC_VV, MASK_VFNMSAC_VV) +DECLARE_INSN(vfnmsub_vf, MATCH_VFNMSUB_VF, MASK_VFNMSUB_VF) +DECLARE_INSN(vfnmsub_vv, MATCH_VFNMSUB_VV, MASK_VFNMSUB_VV) +DECLARE_INSN(vfrdiv_vf, MATCH_VFRDIV_VF, MASK_VFRDIV_VF) +DECLARE_INSN(vfrec7_v, MATCH_VFREC7_V, MASK_VFREC7_V) +DECLARE_INSN(vfredmax_vs, MATCH_VFREDMAX_VS, MASK_VFREDMAX_VS) +DECLARE_INSN(vfredmin_vs, MATCH_VFREDMIN_VS, MASK_VFREDMIN_VS) +DECLARE_INSN(vfredosum_vs, MATCH_VFREDOSUM_VS, MASK_VFREDOSUM_VS) +DECLARE_INSN(vfredusum_vs, MATCH_VFREDUSUM_VS, MASK_VFREDUSUM_VS) +DECLARE_INSN(vfrsqrt7_v, MATCH_VFRSQRT7_V, MASK_VFRSQRT7_V) +DECLARE_INSN(vfrsub_vf, MATCH_VFRSUB_VF, MASK_VFRSUB_VF) +DECLARE_INSN(vfsgnj_vf, MATCH_VFSGNJ_VF, MASK_VFSGNJ_VF) +DECLARE_INSN(vfsgnj_vv, MATCH_VFSGNJ_VV, MASK_VFSGNJ_VV) +DECLARE_INSN(vfsgnjn_vf, MATCH_VFSGNJN_VF, MASK_VFSGNJN_VF) +DECLARE_INSN(vfsgnjn_vv, MATCH_VFSGNJN_VV, MASK_VFSGNJN_VV) +DECLARE_INSN(vfsgnjx_vf, MATCH_VFSGNJX_VF, MASK_VFSGNJX_VF) +DECLARE_INSN(vfsgnjx_vv, MATCH_VFSGNJX_VV, MASK_VFSGNJX_VV) +DECLARE_INSN(vfslide1down_vf, MATCH_VFSLIDE1DOWN_VF, MASK_VFSLIDE1DOWN_VF) +DECLARE_INSN(vfslide1up_vf, MATCH_VFSLIDE1UP_VF, MASK_VFSLIDE1UP_VF) +DECLARE_INSN(vfsqrt_v, MATCH_VFSQRT_V, MASK_VFSQRT_V) +DECLARE_INSN(vfsub_vf, MATCH_VFSUB_VF, MASK_VFSUB_VF) +DECLARE_INSN(vfsub_vv, MATCH_VFSUB_VV, MASK_VFSUB_VV) +DECLARE_INSN(vfwadd_vf, MATCH_VFWADD_VF, MASK_VFWADD_VF) +DECLARE_INSN(vfwadd_vv, MATCH_VFWADD_VV, MASK_VFWADD_VV) +DECLARE_INSN(vfwadd_wf, MATCH_VFWADD_WF, MASK_VFWADD_WF) +DECLARE_INSN(vfwadd_wv, MATCH_VFWADD_WV, MASK_VFWADD_WV) +DECLARE_INSN(vfwcvt_f_f_v, MATCH_VFWCVT_F_F_V, MASK_VFWCVT_F_F_V) +DECLARE_INSN(vfwcvt_f_x_v, MATCH_VFWCVT_F_X_V, MASK_VFWCVT_F_X_V) +DECLARE_INSN(vfwcvt_f_xu_v, MATCH_VFWCVT_F_XU_V, MASK_VFWCVT_F_XU_V) +DECLARE_INSN(vfwcvt_rtz_x_f_v, MATCH_VFWCVT_RTZ_X_F_V, MASK_VFWCVT_RTZ_X_F_V) +DECLARE_INSN(vfwcvt_rtz_xu_f_v, MATCH_VFWCVT_RTZ_XU_F_V, MASK_VFWCVT_RTZ_XU_F_V) +DECLARE_INSN(vfwcvt_x_f_v, MATCH_VFWCVT_X_F_V, MASK_VFWCVT_X_F_V) +DECLARE_INSN(vfwcvt_xu_f_v, MATCH_VFWCVT_XU_F_V, MASK_VFWCVT_XU_F_V) +DECLARE_INSN(vfwmacc_vf, MATCH_VFWMACC_VF, MASK_VFWMACC_VF) +DECLARE_INSN(vfwmacc_vv, MATCH_VFWMACC_VV, MASK_VFWMACC_VV) +DECLARE_INSN(vfwmsac_vf, MATCH_VFWMSAC_VF, MASK_VFWMSAC_VF) +DECLARE_INSN(vfwmsac_vv, MATCH_VFWMSAC_VV, MASK_VFWMSAC_VV) +DECLARE_INSN(vfwmul_vf, MATCH_VFWMUL_VF, MASK_VFWMUL_VF) +DECLARE_INSN(vfwmul_vv, MATCH_VFWMUL_VV, MASK_VFWMUL_VV) +DECLARE_INSN(vfwnmacc_vf, MATCH_VFWNMACC_VF, MASK_VFWNMACC_VF) +DECLARE_INSN(vfwnmacc_vv, MATCH_VFWNMACC_VV, MASK_VFWNMACC_VV) +DECLARE_INSN(vfwnmsac_vf, MATCH_VFWNMSAC_VF, MASK_VFWNMSAC_VF) +DECLARE_INSN(vfwnmsac_vv, MATCH_VFWNMSAC_VV, MASK_VFWNMSAC_VV) +DECLARE_INSN(vfwredosum_vs, MATCH_VFWREDOSUM_VS, MASK_VFWREDOSUM_VS) +DECLARE_INSN(vfwredusum_vs, MATCH_VFWREDUSUM_VS, MASK_VFWREDUSUM_VS) +DECLARE_INSN(vfwsub_vf, MATCH_VFWSUB_VF, MASK_VFWSUB_VF) +DECLARE_INSN(vfwsub_vv, MATCH_VFWSUB_VV, MASK_VFWSUB_VV) +DECLARE_INSN(vfwsub_wf, MATCH_VFWSUB_WF, MASK_VFWSUB_WF) +DECLARE_INSN(vfwsub_wv, MATCH_VFWSUB_WV, MASK_VFWSUB_WV) +DECLARE_INSN(vid_v, MATCH_VID_V, MASK_VID_V) +DECLARE_INSN(viota_m, MATCH_VIOTA_M, MASK_VIOTA_M) +DECLARE_INSN(vl1re16_v, MATCH_VL1RE16_V, MASK_VL1RE16_V) +DECLARE_INSN(vl1re32_v, MATCH_VL1RE32_V, MASK_VL1RE32_V) +DECLARE_INSN(vl1re64_v, MATCH_VL1RE64_V, MASK_VL1RE64_V) +DECLARE_INSN(vl1re8_v, MATCH_VL1RE8_V, MASK_VL1RE8_V) +DECLARE_INSN(vl2re16_v, MATCH_VL2RE16_V, MASK_VL2RE16_V) +DECLARE_INSN(vl2re32_v, MATCH_VL2RE32_V, MASK_VL2RE32_V) +DECLARE_INSN(vl2re64_v, MATCH_VL2RE64_V, MASK_VL2RE64_V) +DECLARE_INSN(vl2re8_v, MATCH_VL2RE8_V, MASK_VL2RE8_V) +DECLARE_INSN(vl4re16_v, MATCH_VL4RE16_V, MASK_VL4RE16_V) +DECLARE_INSN(vl4re32_v, MATCH_VL4RE32_V, MASK_VL4RE32_V) +DECLARE_INSN(vl4re64_v, MATCH_VL4RE64_V, MASK_VL4RE64_V) +DECLARE_INSN(vl4re8_v, MATCH_VL4RE8_V, MASK_VL4RE8_V) +DECLARE_INSN(vl8re16_v, MATCH_VL8RE16_V, MASK_VL8RE16_V) +DECLARE_INSN(vl8re32_v, MATCH_VL8RE32_V, MASK_VL8RE32_V) +DECLARE_INSN(vl8re64_v, MATCH_VL8RE64_V, MASK_VL8RE64_V) +DECLARE_INSN(vl8re8_v, MATCH_VL8RE8_V, MASK_VL8RE8_V) +DECLARE_INSN(vle1024_v, MATCH_VLE1024_V, MASK_VLE1024_V) +DECLARE_INSN(vle1024ff_v, MATCH_VLE1024FF_V, MASK_VLE1024FF_V) +DECLARE_INSN(vle128_v, MATCH_VLE128_V, MASK_VLE128_V) +DECLARE_INSN(vle128ff_v, MATCH_VLE128FF_V, MASK_VLE128FF_V) +DECLARE_INSN(vle16_v, MATCH_VLE16_V, MASK_VLE16_V) +DECLARE_INSN(vle16ff_v, MATCH_VLE16FF_V, MASK_VLE16FF_V) +DECLARE_INSN(vle256_v, MATCH_VLE256_V, MASK_VLE256_V) +DECLARE_INSN(vle256ff_v, MATCH_VLE256FF_V, MASK_VLE256FF_V) +DECLARE_INSN(vle32_v, MATCH_VLE32_V, MASK_VLE32_V) +DECLARE_INSN(vle32ff_v, MATCH_VLE32FF_V, MASK_VLE32FF_V) +DECLARE_INSN(vle512_v, MATCH_VLE512_V, MASK_VLE512_V) +DECLARE_INSN(vle512ff_v, MATCH_VLE512FF_V, MASK_VLE512FF_V) +DECLARE_INSN(vle64_v, MATCH_VLE64_V, MASK_VLE64_V) +DECLARE_INSN(vle64ff_v, MATCH_VLE64FF_V, MASK_VLE64FF_V) +DECLARE_INSN(vle8_v, MATCH_VLE8_V, MASK_VLE8_V) +DECLARE_INSN(vle8ff_v, MATCH_VLE8FF_V, MASK_VLE8FF_V) +DECLARE_INSN(vlm_v, MATCH_VLM_V, MASK_VLM_V) +DECLARE_INSN(vloxei1024_v, MATCH_VLOXEI1024_V, MASK_VLOXEI1024_V) +DECLARE_INSN(vloxei128_v, MATCH_VLOXEI128_V, MASK_VLOXEI128_V) +DECLARE_INSN(vloxei16_v, MATCH_VLOXEI16_V, MASK_VLOXEI16_V) +DECLARE_INSN(vloxei256_v, MATCH_VLOXEI256_V, MASK_VLOXEI256_V) +DECLARE_INSN(vloxei32_v, MATCH_VLOXEI32_V, MASK_VLOXEI32_V) +DECLARE_INSN(vloxei512_v, MATCH_VLOXEI512_V, MASK_VLOXEI512_V) +DECLARE_INSN(vloxei64_v, MATCH_VLOXEI64_V, MASK_VLOXEI64_V) +DECLARE_INSN(vloxei8_v, MATCH_VLOXEI8_V, MASK_VLOXEI8_V) +DECLARE_INSN(vlse1024_v, MATCH_VLSE1024_V, MASK_VLSE1024_V) +DECLARE_INSN(vlse128_v, MATCH_VLSE128_V, MASK_VLSE128_V) +DECLARE_INSN(vlse16_v, MATCH_VLSE16_V, MASK_VLSE16_V) +DECLARE_INSN(vlse256_v, MATCH_VLSE256_V, MASK_VLSE256_V) +DECLARE_INSN(vlse32_v, MATCH_VLSE32_V, MASK_VLSE32_V) +DECLARE_INSN(vlse512_v, MATCH_VLSE512_V, MASK_VLSE512_V) +DECLARE_INSN(vlse64_v, MATCH_VLSE64_V, MASK_VLSE64_V) +DECLARE_INSN(vlse8_v, MATCH_VLSE8_V, MASK_VLSE8_V) +DECLARE_INSN(vluxei1024_v, MATCH_VLUXEI1024_V, MASK_VLUXEI1024_V) +DECLARE_INSN(vluxei128_v, MATCH_VLUXEI128_V, MASK_VLUXEI128_V) +DECLARE_INSN(vluxei16_v, MATCH_VLUXEI16_V, MASK_VLUXEI16_V) +DECLARE_INSN(vluxei256_v, MATCH_VLUXEI256_V, MASK_VLUXEI256_V) +DECLARE_INSN(vluxei32_v, MATCH_VLUXEI32_V, MASK_VLUXEI32_V) +DECLARE_INSN(vluxei512_v, MATCH_VLUXEI512_V, MASK_VLUXEI512_V) +DECLARE_INSN(vluxei64_v, MATCH_VLUXEI64_V, MASK_VLUXEI64_V) +DECLARE_INSN(vluxei8_v, MATCH_VLUXEI8_V, MASK_VLUXEI8_V) +DECLARE_INSN(vmacc_vv, MATCH_VMACC_VV, MASK_VMACC_VV) +DECLARE_INSN(vmacc_vx, MATCH_VMACC_VX, MASK_VMACC_VX) +DECLARE_INSN(vmadc_vi, MATCH_VMADC_VI, MASK_VMADC_VI) +DECLARE_INSN(vmadc_vim, MATCH_VMADC_VIM, MASK_VMADC_VIM) +DECLARE_INSN(vmadc_vv, MATCH_VMADC_VV, MASK_VMADC_VV) +DECLARE_INSN(vmadc_vvm, MATCH_VMADC_VVM, MASK_VMADC_VVM) +DECLARE_INSN(vmadc_vx, MATCH_VMADC_VX, MASK_VMADC_VX) +DECLARE_INSN(vmadc_vxm, MATCH_VMADC_VXM, MASK_VMADC_VXM) +DECLARE_INSN(vmadd_vv, MATCH_VMADD_VV, MASK_VMADD_VV) +DECLARE_INSN(vmadd_vx, MATCH_VMADD_VX, MASK_VMADD_VX) +DECLARE_INSN(vmand_mm, MATCH_VMAND_MM, MASK_VMAND_MM) +DECLARE_INSN(vmandn_mm, MATCH_VMANDN_MM, MASK_VMANDN_MM) +DECLARE_INSN(vmax_vv, MATCH_VMAX_VV, MASK_VMAX_VV) +DECLARE_INSN(vmax_vx, MATCH_VMAX_VX, MASK_VMAX_VX) +DECLARE_INSN(vmaxu_vv, MATCH_VMAXU_VV, MASK_VMAXU_VV) +DECLARE_INSN(vmaxu_vx, MATCH_VMAXU_VX, MASK_VMAXU_VX) +DECLARE_INSN(vmerge_vim, MATCH_VMERGE_VIM, MASK_VMERGE_VIM) +DECLARE_INSN(vmerge_vvm, MATCH_VMERGE_VVM, MASK_VMERGE_VVM) +DECLARE_INSN(vmerge_vxm, MATCH_VMERGE_VXM, MASK_VMERGE_VXM) +DECLARE_INSN(vmfeq_vf, MATCH_VMFEQ_VF, MASK_VMFEQ_VF) +DECLARE_INSN(vmfeq_vv, MATCH_VMFEQ_VV, MASK_VMFEQ_VV) +DECLARE_INSN(vmfge_vf, MATCH_VMFGE_VF, MASK_VMFGE_VF) +DECLARE_INSN(vmfgt_vf, MATCH_VMFGT_VF, MASK_VMFGT_VF) +DECLARE_INSN(vmfle_vf, MATCH_VMFLE_VF, MASK_VMFLE_VF) +DECLARE_INSN(vmfle_vv, MATCH_VMFLE_VV, MASK_VMFLE_VV) +DECLARE_INSN(vmflt_vf, MATCH_VMFLT_VF, MASK_VMFLT_VF) +DECLARE_INSN(vmflt_vv, MATCH_VMFLT_VV, MASK_VMFLT_VV) +DECLARE_INSN(vmfne_vf, MATCH_VMFNE_VF, MASK_VMFNE_VF) +DECLARE_INSN(vmfne_vv, MATCH_VMFNE_VV, MASK_VMFNE_VV) +DECLARE_INSN(vmin_vv, MATCH_VMIN_VV, MASK_VMIN_VV) +DECLARE_INSN(vmin_vx, MATCH_VMIN_VX, MASK_VMIN_VX) +DECLARE_INSN(vminu_vv, MATCH_VMINU_VV, MASK_VMINU_VV) +DECLARE_INSN(vminu_vx, MATCH_VMINU_VX, MASK_VMINU_VX) +DECLARE_INSN(vmnand_mm, MATCH_VMNAND_MM, MASK_VMNAND_MM) +DECLARE_INSN(vmnor_mm, MATCH_VMNOR_MM, MASK_VMNOR_MM) +DECLARE_INSN(vmor_mm, MATCH_VMOR_MM, MASK_VMOR_MM) +DECLARE_INSN(vmorn_mm, MATCH_VMORN_MM, MASK_VMORN_MM) +DECLARE_INSN(vmsbc_vv, MATCH_VMSBC_VV, MASK_VMSBC_VV) +DECLARE_INSN(vmsbc_vvm, MATCH_VMSBC_VVM, MASK_VMSBC_VVM) +DECLARE_INSN(vmsbc_vx, MATCH_VMSBC_VX, MASK_VMSBC_VX) +DECLARE_INSN(vmsbc_vxm, MATCH_VMSBC_VXM, MASK_VMSBC_VXM) +DECLARE_INSN(vmsbf_m, MATCH_VMSBF_M, MASK_VMSBF_M) +DECLARE_INSN(vmseq_vi, MATCH_VMSEQ_VI, MASK_VMSEQ_VI) +DECLARE_INSN(vmseq_vv, MATCH_VMSEQ_VV, MASK_VMSEQ_VV) +DECLARE_INSN(vmseq_vx, MATCH_VMSEQ_VX, MASK_VMSEQ_VX) +DECLARE_INSN(vmsgt_vi, MATCH_VMSGT_VI, MASK_VMSGT_VI) +DECLARE_INSN(vmsgt_vx, MATCH_VMSGT_VX, MASK_VMSGT_VX) +DECLARE_INSN(vmsgtu_vi, MATCH_VMSGTU_VI, MASK_VMSGTU_VI) +DECLARE_INSN(vmsgtu_vx, MATCH_VMSGTU_VX, MASK_VMSGTU_VX) +DECLARE_INSN(vmsif_m, MATCH_VMSIF_M, MASK_VMSIF_M) +DECLARE_INSN(vmsle_vi, MATCH_VMSLE_VI, MASK_VMSLE_VI) +DECLARE_INSN(vmsle_vv, MATCH_VMSLE_VV, MASK_VMSLE_VV) +DECLARE_INSN(vmsle_vx, MATCH_VMSLE_VX, MASK_VMSLE_VX) +DECLARE_INSN(vmsleu_vi, MATCH_VMSLEU_VI, MASK_VMSLEU_VI) +DECLARE_INSN(vmsleu_vv, MATCH_VMSLEU_VV, MASK_VMSLEU_VV) +DECLARE_INSN(vmsleu_vx, MATCH_VMSLEU_VX, MASK_VMSLEU_VX) +DECLARE_INSN(vmslt_vv, MATCH_VMSLT_VV, MASK_VMSLT_VV) +DECLARE_INSN(vmslt_vx, MATCH_VMSLT_VX, MASK_VMSLT_VX) +DECLARE_INSN(vmsltu_vv, MATCH_VMSLTU_VV, MASK_VMSLTU_VV) +DECLARE_INSN(vmsltu_vx, MATCH_VMSLTU_VX, MASK_VMSLTU_VX) +DECLARE_INSN(vmsne_vi, MATCH_VMSNE_VI, MASK_VMSNE_VI) +DECLARE_INSN(vmsne_vv, MATCH_VMSNE_VV, MASK_VMSNE_VV) +DECLARE_INSN(vmsne_vx, MATCH_VMSNE_VX, MASK_VMSNE_VX) +DECLARE_INSN(vmsof_m, MATCH_VMSOF_M, MASK_VMSOF_M) +DECLARE_INSN(vmul_vv, MATCH_VMUL_VV, MASK_VMUL_VV) +DECLARE_INSN(vmul_vx, MATCH_VMUL_VX, MASK_VMUL_VX) +DECLARE_INSN(vmulh_vv, MATCH_VMULH_VV, MASK_VMULH_VV) +DECLARE_INSN(vmulh_vx, MATCH_VMULH_VX, MASK_VMULH_VX) +DECLARE_INSN(vmulhsu_vv, MATCH_VMULHSU_VV, MASK_VMULHSU_VV) +DECLARE_INSN(vmulhsu_vx, MATCH_VMULHSU_VX, MASK_VMULHSU_VX) +DECLARE_INSN(vmulhu_vv, MATCH_VMULHU_VV, MASK_VMULHU_VV) +DECLARE_INSN(vmulhu_vx, MATCH_VMULHU_VX, MASK_VMULHU_VX) +DECLARE_INSN(vmv1r_v, MATCH_VMV1R_V, MASK_VMV1R_V) +DECLARE_INSN(vmv2r_v, MATCH_VMV2R_V, MASK_VMV2R_V) +DECLARE_INSN(vmv4r_v, MATCH_VMV4R_V, MASK_VMV4R_V) +DECLARE_INSN(vmv8r_v, MATCH_VMV8R_V, MASK_VMV8R_V) +DECLARE_INSN(vmv_s_x, MATCH_VMV_S_X, MASK_VMV_S_X) +DECLARE_INSN(vmv_v_i, MATCH_VMV_V_I, MASK_VMV_V_I) +DECLARE_INSN(vmv_v_v, MATCH_VMV_V_V, MASK_VMV_V_V) +DECLARE_INSN(vmv_v_x, MATCH_VMV_V_X, MASK_VMV_V_X) +DECLARE_INSN(vmv_x_s, MATCH_VMV_X_S, MASK_VMV_X_S) +DECLARE_INSN(vmxnor_mm, MATCH_VMXNOR_MM, MASK_VMXNOR_MM) +DECLARE_INSN(vmxor_mm, MATCH_VMXOR_MM, MASK_VMXOR_MM) +DECLARE_INSN(vnclip_wi, MATCH_VNCLIP_WI, MASK_VNCLIP_WI) +DECLARE_INSN(vnclip_wv, MATCH_VNCLIP_WV, MASK_VNCLIP_WV) +DECLARE_INSN(vnclip_wx, MATCH_VNCLIP_WX, MASK_VNCLIP_WX) +DECLARE_INSN(vnclipu_wi, MATCH_VNCLIPU_WI, MASK_VNCLIPU_WI) +DECLARE_INSN(vnclipu_wv, MATCH_VNCLIPU_WV, MASK_VNCLIPU_WV) +DECLARE_INSN(vnclipu_wx, MATCH_VNCLIPU_WX, MASK_VNCLIPU_WX) +DECLARE_INSN(vnmsac_vv, MATCH_VNMSAC_VV, MASK_VNMSAC_VV) +DECLARE_INSN(vnmsac_vx, MATCH_VNMSAC_VX, MASK_VNMSAC_VX) +DECLARE_INSN(vnmsub_vv, MATCH_VNMSUB_VV, MASK_VNMSUB_VV) +DECLARE_INSN(vnmsub_vx, MATCH_VNMSUB_VX, MASK_VNMSUB_VX) +DECLARE_INSN(vnsra_wi, MATCH_VNSRA_WI, MASK_VNSRA_WI) +DECLARE_INSN(vnsra_wv, MATCH_VNSRA_WV, MASK_VNSRA_WV) +DECLARE_INSN(vnsra_wx, MATCH_VNSRA_WX, MASK_VNSRA_WX) +DECLARE_INSN(vnsrl_wi, MATCH_VNSRL_WI, MASK_VNSRL_WI) +DECLARE_INSN(vnsrl_wv, MATCH_VNSRL_WV, MASK_VNSRL_WV) +DECLARE_INSN(vnsrl_wx, MATCH_VNSRL_WX, MASK_VNSRL_WX) +DECLARE_INSN(vor_vi, MATCH_VOR_VI, MASK_VOR_VI) +DECLARE_INSN(vor_vv, MATCH_VOR_VV, MASK_VOR_VV) +DECLARE_INSN(vor_vx, MATCH_VOR_VX, MASK_VOR_VX) +DECLARE_INSN(vredand_vs, MATCH_VREDAND_VS, MASK_VREDAND_VS) +DECLARE_INSN(vredmax_vs, MATCH_VREDMAX_VS, MASK_VREDMAX_VS) +DECLARE_INSN(vredmaxu_vs, MATCH_VREDMAXU_VS, MASK_VREDMAXU_VS) +DECLARE_INSN(vredmin_vs, MATCH_VREDMIN_VS, MASK_VREDMIN_VS) +DECLARE_INSN(vredminu_vs, MATCH_VREDMINU_VS, MASK_VREDMINU_VS) +DECLARE_INSN(vredor_vs, MATCH_VREDOR_VS, MASK_VREDOR_VS) +DECLARE_INSN(vredsum_vs, MATCH_VREDSUM_VS, MASK_VREDSUM_VS) +DECLARE_INSN(vredxor_vs, MATCH_VREDXOR_VS, MASK_VREDXOR_VS) +DECLARE_INSN(vrem_vv, MATCH_VREM_VV, MASK_VREM_VV) +DECLARE_INSN(vrem_vx, MATCH_VREM_VX, MASK_VREM_VX) +DECLARE_INSN(vremu_vv, MATCH_VREMU_VV, MASK_VREMU_VV) +DECLARE_INSN(vremu_vx, MATCH_VREMU_VX, MASK_VREMU_VX) +DECLARE_INSN(vrgather_vi, MATCH_VRGATHER_VI, MASK_VRGATHER_VI) +DECLARE_INSN(vrgather_vv, MATCH_VRGATHER_VV, MASK_VRGATHER_VV) +DECLARE_INSN(vrgather_vx, MATCH_VRGATHER_VX, MASK_VRGATHER_VX) +DECLARE_INSN(vrgatherei16_vv, MATCH_VRGATHEREI16_VV, MASK_VRGATHEREI16_VV) +DECLARE_INSN(vrsub_vi, MATCH_VRSUB_VI, MASK_VRSUB_VI) +DECLARE_INSN(vrsub_vx, MATCH_VRSUB_VX, MASK_VRSUB_VX) +DECLARE_INSN(vs1r_v, MATCH_VS1R_V, MASK_VS1R_V) +DECLARE_INSN(vs2r_v, MATCH_VS2R_V, MASK_VS2R_V) +DECLARE_INSN(vs4r_v, MATCH_VS4R_V, MASK_VS4R_V) +DECLARE_INSN(vs8r_v, MATCH_VS8R_V, MASK_VS8R_V) +DECLARE_INSN(vsadd_vi, MATCH_VSADD_VI, MASK_VSADD_VI) +DECLARE_INSN(vsadd_vv, MATCH_VSADD_VV, MASK_VSADD_VV) +DECLARE_INSN(vsadd_vx, MATCH_VSADD_VX, MASK_VSADD_VX) +DECLARE_INSN(vsaddu_vi, MATCH_VSADDU_VI, MASK_VSADDU_VI) +DECLARE_INSN(vsaddu_vv, MATCH_VSADDU_VV, MASK_VSADDU_VV) +DECLARE_INSN(vsaddu_vx, MATCH_VSADDU_VX, MASK_VSADDU_VX) +DECLARE_INSN(vsbc_vvm, MATCH_VSBC_VVM, MASK_VSBC_VVM) +DECLARE_INSN(vsbc_vxm, MATCH_VSBC_VXM, MASK_VSBC_VXM) +DECLARE_INSN(vse1024_v, MATCH_VSE1024_V, MASK_VSE1024_V) +DECLARE_INSN(vse128_v, MATCH_VSE128_V, MASK_VSE128_V) +DECLARE_INSN(vse16_v, MATCH_VSE16_V, MASK_VSE16_V) +DECLARE_INSN(vse256_v, MATCH_VSE256_V, MASK_VSE256_V) +DECLARE_INSN(vse32_v, MATCH_VSE32_V, MASK_VSE32_V) +DECLARE_INSN(vse512_v, MATCH_VSE512_V, MASK_VSE512_V) +DECLARE_INSN(vse64_v, MATCH_VSE64_V, MASK_VSE64_V) +DECLARE_INSN(vse8_v, MATCH_VSE8_V, MASK_VSE8_V) +DECLARE_INSN(vsetivli, MATCH_VSETIVLI, MASK_VSETIVLI) +DECLARE_INSN(vsetvl, MATCH_VSETVL, MASK_VSETVL) +DECLARE_INSN(vsetvli, MATCH_VSETVLI, MASK_VSETVLI) +DECLARE_INSN(vsext_vf2, MATCH_VSEXT_VF2, MASK_VSEXT_VF2) +DECLARE_INSN(vsext_vf4, MATCH_VSEXT_VF4, MASK_VSEXT_VF4) +DECLARE_INSN(vsext_vf8, MATCH_VSEXT_VF8, MASK_VSEXT_VF8) +DECLARE_INSN(vslide1down_vx, MATCH_VSLIDE1DOWN_VX, MASK_VSLIDE1DOWN_VX) +DECLARE_INSN(vslide1up_vx, MATCH_VSLIDE1UP_VX, MASK_VSLIDE1UP_VX) +DECLARE_INSN(vslidedown_vi, MATCH_VSLIDEDOWN_VI, MASK_VSLIDEDOWN_VI) +DECLARE_INSN(vslidedown_vx, MATCH_VSLIDEDOWN_VX, MASK_VSLIDEDOWN_VX) +DECLARE_INSN(vslideup_vi, MATCH_VSLIDEUP_VI, MASK_VSLIDEUP_VI) +DECLARE_INSN(vslideup_vx, MATCH_VSLIDEUP_VX, MASK_VSLIDEUP_VX) +DECLARE_INSN(vsll_vi, MATCH_VSLL_VI, MASK_VSLL_VI) +DECLARE_INSN(vsll_vv, MATCH_VSLL_VV, MASK_VSLL_VV) +DECLARE_INSN(vsll_vx, MATCH_VSLL_VX, MASK_VSLL_VX) +DECLARE_INSN(vsm_v, MATCH_VSM_V, MASK_VSM_V) +DECLARE_INSN(vsmul_vv, MATCH_VSMUL_VV, MASK_VSMUL_VV) +DECLARE_INSN(vsmul_vx, MATCH_VSMUL_VX, MASK_VSMUL_VX) +DECLARE_INSN(vsoxei1024_v, MATCH_VSOXEI1024_V, MASK_VSOXEI1024_V) +DECLARE_INSN(vsoxei128_v, MATCH_VSOXEI128_V, MASK_VSOXEI128_V) +DECLARE_INSN(vsoxei16_v, MATCH_VSOXEI16_V, MASK_VSOXEI16_V) +DECLARE_INSN(vsoxei256_v, MATCH_VSOXEI256_V, MASK_VSOXEI256_V) +DECLARE_INSN(vsoxei32_v, MATCH_VSOXEI32_V, MASK_VSOXEI32_V) +DECLARE_INSN(vsoxei512_v, MATCH_VSOXEI512_V, MASK_VSOXEI512_V) +DECLARE_INSN(vsoxei64_v, MATCH_VSOXEI64_V, MASK_VSOXEI64_V) +DECLARE_INSN(vsoxei8_v, MATCH_VSOXEI8_V, MASK_VSOXEI8_V) +DECLARE_INSN(vsra_vi, MATCH_VSRA_VI, MASK_VSRA_VI) +DECLARE_INSN(vsra_vv, MATCH_VSRA_VV, MASK_VSRA_VV) +DECLARE_INSN(vsra_vx, MATCH_VSRA_VX, MASK_VSRA_VX) +DECLARE_INSN(vsrl_vi, MATCH_VSRL_VI, MASK_VSRL_VI) +DECLARE_INSN(vsrl_vv, MATCH_VSRL_VV, MASK_VSRL_VV) +DECLARE_INSN(vsrl_vx, MATCH_VSRL_VX, MASK_VSRL_VX) +DECLARE_INSN(vsse1024_v, MATCH_VSSE1024_V, MASK_VSSE1024_V) +DECLARE_INSN(vsse128_v, MATCH_VSSE128_V, MASK_VSSE128_V) +DECLARE_INSN(vsse16_v, MATCH_VSSE16_V, MASK_VSSE16_V) +DECLARE_INSN(vsse256_v, MATCH_VSSE256_V, MASK_VSSE256_V) +DECLARE_INSN(vsse32_v, MATCH_VSSE32_V, MASK_VSSE32_V) +DECLARE_INSN(vsse512_v, MATCH_VSSE512_V, MASK_VSSE512_V) +DECLARE_INSN(vsse64_v, MATCH_VSSE64_V, MASK_VSSE64_V) +DECLARE_INSN(vsse8_v, MATCH_VSSE8_V, MASK_VSSE8_V) +DECLARE_INSN(vssra_vi, MATCH_VSSRA_VI, MASK_VSSRA_VI) +DECLARE_INSN(vssra_vv, MATCH_VSSRA_VV, MASK_VSSRA_VV) +DECLARE_INSN(vssra_vx, MATCH_VSSRA_VX, MASK_VSSRA_VX) +DECLARE_INSN(vssrl_vi, MATCH_VSSRL_VI, MASK_VSSRL_VI) +DECLARE_INSN(vssrl_vv, MATCH_VSSRL_VV, MASK_VSSRL_VV) +DECLARE_INSN(vssrl_vx, MATCH_VSSRL_VX, MASK_VSSRL_VX) +DECLARE_INSN(vssub_vv, MATCH_VSSUB_VV, MASK_VSSUB_VV) +DECLARE_INSN(vssub_vx, MATCH_VSSUB_VX, MASK_VSSUB_VX) +DECLARE_INSN(vssubu_vv, MATCH_VSSUBU_VV, MASK_VSSUBU_VV) +DECLARE_INSN(vssubu_vx, MATCH_VSSUBU_VX, MASK_VSSUBU_VX) +DECLARE_INSN(vsub_vv, MATCH_VSUB_VV, MASK_VSUB_VV) +DECLARE_INSN(vsub_vx, MATCH_VSUB_VX, MASK_VSUB_VX) +DECLARE_INSN(vsuxei1024_v, MATCH_VSUXEI1024_V, MASK_VSUXEI1024_V) +DECLARE_INSN(vsuxei128_v, MATCH_VSUXEI128_V, MASK_VSUXEI128_V) +DECLARE_INSN(vsuxei16_v, MATCH_VSUXEI16_V, MASK_VSUXEI16_V) +DECLARE_INSN(vsuxei256_v, MATCH_VSUXEI256_V, MASK_VSUXEI256_V) +DECLARE_INSN(vsuxei32_v, MATCH_VSUXEI32_V, MASK_VSUXEI32_V) +DECLARE_INSN(vsuxei512_v, MATCH_VSUXEI512_V, MASK_VSUXEI512_V) +DECLARE_INSN(vsuxei64_v, MATCH_VSUXEI64_V, MASK_VSUXEI64_V) +DECLARE_INSN(vsuxei8_v, MATCH_VSUXEI8_V, MASK_VSUXEI8_V) +DECLARE_INSN(vwadd_vv, MATCH_VWADD_VV, MASK_VWADD_VV) +DECLARE_INSN(vwadd_vx, MATCH_VWADD_VX, MASK_VWADD_VX) +DECLARE_INSN(vwadd_wv, MATCH_VWADD_WV, MASK_VWADD_WV) +DECLARE_INSN(vwadd_wx, MATCH_VWADD_WX, MASK_VWADD_WX) +DECLARE_INSN(vwaddu_vv, MATCH_VWADDU_VV, MASK_VWADDU_VV) +DECLARE_INSN(vwaddu_vx, MATCH_VWADDU_VX, MASK_VWADDU_VX) +DECLARE_INSN(vwaddu_wv, MATCH_VWADDU_WV, MASK_VWADDU_WV) +DECLARE_INSN(vwaddu_wx, MATCH_VWADDU_WX, MASK_VWADDU_WX) +DECLARE_INSN(vwmacc_vv, MATCH_VWMACC_VV, MASK_VWMACC_VV) +DECLARE_INSN(vwmacc_vx, MATCH_VWMACC_VX, MASK_VWMACC_VX) +DECLARE_INSN(vwmaccsu_vv, MATCH_VWMACCSU_VV, MASK_VWMACCSU_VV) +DECLARE_INSN(vwmaccsu_vx, MATCH_VWMACCSU_VX, MASK_VWMACCSU_VX) +DECLARE_INSN(vwmaccu_vv, MATCH_VWMACCU_VV, MASK_VWMACCU_VV) +DECLARE_INSN(vwmaccu_vx, MATCH_VWMACCU_VX, MASK_VWMACCU_VX) +DECLARE_INSN(vwmaccus_vx, MATCH_VWMACCUS_VX, MASK_VWMACCUS_VX) +DECLARE_INSN(vwmul_vv, MATCH_VWMUL_VV, MASK_VWMUL_VV) +DECLARE_INSN(vwmul_vx, MATCH_VWMUL_VX, MASK_VWMUL_VX) +DECLARE_INSN(vwmulsu_vv, MATCH_VWMULSU_VV, MASK_VWMULSU_VV) +DECLARE_INSN(vwmulsu_vx, MATCH_VWMULSU_VX, MASK_VWMULSU_VX) +DECLARE_INSN(vwmulu_vv, MATCH_VWMULU_VV, MASK_VWMULU_VV) +DECLARE_INSN(vwmulu_vx, MATCH_VWMULU_VX, MASK_VWMULU_VX) +DECLARE_INSN(vwredsum_vs, MATCH_VWREDSUM_VS, MASK_VWREDSUM_VS) +DECLARE_INSN(vwredsumu_vs, MATCH_VWREDSUMU_VS, MASK_VWREDSUMU_VS) +DECLARE_INSN(vwsub_vv, MATCH_VWSUB_VV, MASK_VWSUB_VV) +DECLARE_INSN(vwsub_vx, MATCH_VWSUB_VX, MASK_VWSUB_VX) +DECLARE_INSN(vwsub_wv, MATCH_VWSUB_WV, MASK_VWSUB_WV) +DECLARE_INSN(vwsub_wx, MATCH_VWSUB_WX, MASK_VWSUB_WX) +DECLARE_INSN(vwsubu_vv, MATCH_VWSUBU_VV, MASK_VWSUBU_VV) +DECLARE_INSN(vwsubu_vx, MATCH_VWSUBU_VX, MASK_VWSUBU_VX) +DECLARE_INSN(vwsubu_wv, MATCH_VWSUBU_WV, MASK_VWSUBU_WV) +DECLARE_INSN(vwsubu_wx, MATCH_VWSUBU_WX, MASK_VWSUBU_WX) +DECLARE_INSN(vxor_vi, MATCH_VXOR_VI, MASK_VXOR_VI) +DECLARE_INSN(vxor_vv, MATCH_VXOR_VV, MASK_VXOR_VV) +DECLARE_INSN(vxor_vx, MATCH_VXOR_VX, MASK_VXOR_VX) +DECLARE_INSN(vzext_vf2, MATCH_VZEXT_VF2, MASK_VZEXT_VF2) +DECLARE_INSN(vzext_vf4, MATCH_VZEXT_VF4, MASK_VZEXT_VF4) +DECLARE_INSN(vzext_vf8, MATCH_VZEXT_VF8, MASK_VZEXT_VF8) +DECLARE_INSN(wfi, MATCH_WFI, MASK_WFI) +DECLARE_INSN(wrs_nto, MATCH_WRS_NTO, MASK_WRS_NTO) +DECLARE_INSN(wrs_sto, MATCH_WRS_STO, MASK_WRS_STO) +DECLARE_INSN(xnor, MATCH_XNOR, MASK_XNOR) +DECLARE_INSN(xor, MATCH_XOR, MASK_XOR) +DECLARE_INSN(xori, MATCH_XORI, MASK_XORI) +DECLARE_INSN(xperm16, MATCH_XPERM16, MASK_XPERM16) +DECLARE_INSN(xperm32, MATCH_XPERM32, MASK_XPERM32) +DECLARE_INSN(xperm4, MATCH_XPERM4, MASK_XPERM4) +DECLARE_INSN(xperm8, MATCH_XPERM8, MASK_XPERM8) +DECLARE_INSN(zunpkd810, MATCH_ZUNPKD810, MASK_ZUNPKD810) +DECLARE_INSN(zunpkd820, MATCH_ZUNPKD820, MASK_ZUNPKD820) +DECLARE_INSN(zunpkd830, MATCH_ZUNPKD830, MASK_ZUNPKD830) +DECLARE_INSN(zunpkd831, MATCH_ZUNPKD831, MASK_ZUNPKD831) +DECLARE_INSN(zunpkd832, MATCH_ZUNPKD832, MASK_ZUNPKD832) +#endif +#ifdef DECLARE_CSR +DECLARE_CSR(fflags, CSR_FFLAGS) +DECLARE_CSR(frm, CSR_FRM) +DECLARE_CSR(fcsr, CSR_FCSR) +DECLARE_CSR(vstart, CSR_VSTART) +DECLARE_CSR(vxsat, CSR_VXSAT) +DECLARE_CSR(vxrm, CSR_VXRM) +DECLARE_CSR(vcsr, CSR_VCSR) +DECLARE_CSR(seed, CSR_SEED) +DECLARE_CSR(jvt, CSR_JVT) +DECLARE_CSR(cycle, CSR_CYCLE) +DECLARE_CSR(time, CSR_TIME) +DECLARE_CSR(instret, CSR_INSTRET) +DECLARE_CSR(hpmcounter3, CSR_HPMCOUNTER3) +DECLARE_CSR(hpmcounter4, CSR_HPMCOUNTER4) +DECLARE_CSR(hpmcounter5, CSR_HPMCOUNTER5) +DECLARE_CSR(hpmcounter6, CSR_HPMCOUNTER6) +DECLARE_CSR(hpmcounter7, CSR_HPMCOUNTER7) +DECLARE_CSR(hpmcounter8, CSR_HPMCOUNTER8) +DECLARE_CSR(hpmcounter9, CSR_HPMCOUNTER9) +DECLARE_CSR(hpmcounter10, CSR_HPMCOUNTER10) +DECLARE_CSR(hpmcounter11, CSR_HPMCOUNTER11) +DECLARE_CSR(hpmcounter12, CSR_HPMCOUNTER12) +DECLARE_CSR(hpmcounter13, CSR_HPMCOUNTER13) +DECLARE_CSR(hpmcounter14, CSR_HPMCOUNTER14) +DECLARE_CSR(hpmcounter15, CSR_HPMCOUNTER15) +DECLARE_CSR(hpmcounter16, CSR_HPMCOUNTER16) +DECLARE_CSR(hpmcounter17, CSR_HPMCOUNTER17) +DECLARE_CSR(hpmcounter18, CSR_HPMCOUNTER18) +DECLARE_CSR(hpmcounter19, CSR_HPMCOUNTER19) +DECLARE_CSR(hpmcounter20, CSR_HPMCOUNTER20) +DECLARE_CSR(hpmcounter21, CSR_HPMCOUNTER21) +DECLARE_CSR(hpmcounter22, CSR_HPMCOUNTER22) +DECLARE_CSR(hpmcounter23, CSR_HPMCOUNTER23) +DECLARE_CSR(hpmcounter24, CSR_HPMCOUNTER24) +DECLARE_CSR(hpmcounter25, CSR_HPMCOUNTER25) +DECLARE_CSR(hpmcounter26, CSR_HPMCOUNTER26) +DECLARE_CSR(hpmcounter27, CSR_HPMCOUNTER27) +DECLARE_CSR(hpmcounter28, CSR_HPMCOUNTER28) +DECLARE_CSR(hpmcounter29, CSR_HPMCOUNTER29) +DECLARE_CSR(hpmcounter30, CSR_HPMCOUNTER30) +DECLARE_CSR(hpmcounter31, CSR_HPMCOUNTER31) +DECLARE_CSR(vl, CSR_VL) +DECLARE_CSR(vtype, CSR_VTYPE) +DECLARE_CSR(vlenb, CSR_VLENB) +DECLARE_CSR(sstatus, CSR_SSTATUS) +DECLARE_CSR(sedeleg, CSR_SEDELEG) +DECLARE_CSR(sideleg, CSR_SIDELEG) +DECLARE_CSR(sie, CSR_SIE) +DECLARE_CSR(stvec, CSR_STVEC) +DECLARE_CSR(scounteren, CSR_SCOUNTEREN) +DECLARE_CSR(senvcfg, CSR_SENVCFG) +DECLARE_CSR(sstateen0, CSR_SSTATEEN0) +DECLARE_CSR(sstateen1, CSR_SSTATEEN1) +DECLARE_CSR(sstateen2, CSR_SSTATEEN2) +DECLARE_CSR(sstateen3, CSR_SSTATEEN3) +DECLARE_CSR(sscratch, CSR_SSCRATCH) +DECLARE_CSR(sepc, CSR_SEPC) +DECLARE_CSR(scause, CSR_SCAUSE) +DECLARE_CSR(stval, CSR_STVAL) +DECLARE_CSR(sip, CSR_SIP) +DECLARE_CSR(stimecmp, CSR_STIMECMP) +DECLARE_CSR(siselect, CSR_SISELECT) +DECLARE_CSR(sireg, CSR_SIREG) +DECLARE_CSR(stopei, CSR_STOPEI) +DECLARE_CSR(satp, CSR_SATP) +DECLARE_CSR(scontext, CSR_SCONTEXT) +DECLARE_CSR(vsstatus, CSR_VSSTATUS) +DECLARE_CSR(vsie, CSR_VSIE) +DECLARE_CSR(vstvec, CSR_VSTVEC) +DECLARE_CSR(vsscratch, CSR_VSSCRATCH) +DECLARE_CSR(vsepc, CSR_VSEPC) +DECLARE_CSR(vscause, CSR_VSCAUSE) +DECLARE_CSR(vstval, CSR_VSTVAL) +DECLARE_CSR(vsip, CSR_VSIP) +DECLARE_CSR(vstimecmp, CSR_VSTIMECMP) +DECLARE_CSR(vsiselect, CSR_VSISELECT) +DECLARE_CSR(vsireg, CSR_VSIREG) +DECLARE_CSR(vstopei, CSR_VSTOPEI) +DECLARE_CSR(vsatp, CSR_VSATP) +DECLARE_CSR(hstatus, CSR_HSTATUS) +DECLARE_CSR(hedeleg, CSR_HEDELEG) +DECLARE_CSR(hideleg, CSR_HIDELEG) +DECLARE_CSR(hie, CSR_HIE) +DECLARE_CSR(htimedelta, CSR_HTIMEDELTA) +DECLARE_CSR(hcounteren, CSR_HCOUNTEREN) +DECLARE_CSR(hgeie, CSR_HGEIE) +DECLARE_CSR(hvien, CSR_HVIEN) +DECLARE_CSR(hvictl, CSR_HVICTL) +DECLARE_CSR(henvcfg, CSR_HENVCFG) +DECLARE_CSR(hstateen0, CSR_HSTATEEN0) +DECLARE_CSR(hstateen1, CSR_HSTATEEN1) +DECLARE_CSR(hstateen2, CSR_HSTATEEN2) +DECLARE_CSR(hstateen3, CSR_HSTATEEN3) +DECLARE_CSR(htval, CSR_HTVAL) +DECLARE_CSR(hip, CSR_HIP) +DECLARE_CSR(hvip, CSR_HVIP) +DECLARE_CSR(hviprio1, CSR_HVIPRIO1) +DECLARE_CSR(hviprio2, CSR_HVIPRIO2) +DECLARE_CSR(htinst, CSR_HTINST) +DECLARE_CSR(hgatp, CSR_HGATP) +DECLARE_CSR(hcontext, CSR_HCONTEXT) +DECLARE_CSR(hgeip, CSR_HGEIP) +DECLARE_CSR(vstopi, CSR_VSTOPI) +DECLARE_CSR(scountovf, CSR_SCOUNTOVF) +DECLARE_CSR(stopi, CSR_STOPI) +DECLARE_CSR(utvt, CSR_UTVT) +DECLARE_CSR(unxti, CSR_UNXTI) +DECLARE_CSR(uintstatus, CSR_UINTSTATUS) +DECLARE_CSR(uscratchcsw, CSR_USCRATCHCSW) +DECLARE_CSR(uscratchcswl, CSR_USCRATCHCSWL) +DECLARE_CSR(stvt, CSR_STVT) +DECLARE_CSR(snxti, CSR_SNXTI) +DECLARE_CSR(sintstatus, CSR_SINTSTATUS) +DECLARE_CSR(sscratchcsw, CSR_SSCRATCHCSW) +DECLARE_CSR(sscratchcswl, CSR_SSCRATCHCSWL) +DECLARE_CSR(mtvt, CSR_MTVT) +DECLARE_CSR(mnxti, CSR_MNXTI) +DECLARE_CSR(mintstatus, CSR_MINTSTATUS) +DECLARE_CSR(mscratchcsw, CSR_MSCRATCHCSW) +DECLARE_CSR(mscratchcswl, CSR_MSCRATCHCSWL) +DECLARE_CSR(mstatus, CSR_MSTATUS) +DECLARE_CSR(misa, CSR_MISA) +DECLARE_CSR(medeleg, CSR_MEDELEG) +DECLARE_CSR(mideleg, CSR_MIDELEG) +DECLARE_CSR(mie, CSR_MIE) +DECLARE_CSR(mtvec, CSR_MTVEC) +DECLARE_CSR(mcounteren, CSR_MCOUNTEREN) +DECLARE_CSR(mvien, CSR_MVIEN) +DECLARE_CSR(mvip, CSR_MVIP) +DECLARE_CSR(menvcfg, CSR_MENVCFG) +DECLARE_CSR(mstateen0, CSR_MSTATEEN0) +DECLARE_CSR(mstateen1, CSR_MSTATEEN1) +DECLARE_CSR(mstateen2, CSR_MSTATEEN2) +DECLARE_CSR(mstateen3, CSR_MSTATEEN3) +DECLARE_CSR(mcountinhibit, CSR_MCOUNTINHIBIT) +DECLARE_CSR(mscratch, CSR_MSCRATCH) +DECLARE_CSR(mepc, CSR_MEPC) +DECLARE_CSR(mcause, CSR_MCAUSE) +DECLARE_CSR(mtval, CSR_MTVAL) +DECLARE_CSR(mip, CSR_MIP) +DECLARE_CSR(mtinst, CSR_MTINST) +DECLARE_CSR(mtval2, CSR_MTVAL2) +DECLARE_CSR(miselect, CSR_MISELECT) +DECLARE_CSR(mireg, CSR_MIREG) +DECLARE_CSR(mtopei, CSR_MTOPEI) +DECLARE_CSR(pmpcfg0, CSR_PMPCFG0) +DECLARE_CSR(pmpcfg1, CSR_PMPCFG1) +DECLARE_CSR(pmpcfg2, CSR_PMPCFG2) +DECLARE_CSR(pmpcfg3, CSR_PMPCFG3) +DECLARE_CSR(pmpcfg4, CSR_PMPCFG4) +DECLARE_CSR(pmpcfg5, CSR_PMPCFG5) +DECLARE_CSR(pmpcfg6, CSR_PMPCFG6) +DECLARE_CSR(pmpcfg7, CSR_PMPCFG7) +DECLARE_CSR(pmpcfg8, CSR_PMPCFG8) +DECLARE_CSR(pmpcfg9, CSR_PMPCFG9) +DECLARE_CSR(pmpcfg10, CSR_PMPCFG10) +DECLARE_CSR(pmpcfg11, CSR_PMPCFG11) +DECLARE_CSR(pmpcfg12, CSR_PMPCFG12) +DECLARE_CSR(pmpcfg13, CSR_PMPCFG13) +DECLARE_CSR(pmpcfg14, CSR_PMPCFG14) +DECLARE_CSR(pmpcfg15, CSR_PMPCFG15) +DECLARE_CSR(pmpaddr0, CSR_PMPADDR0) +DECLARE_CSR(pmpaddr1, CSR_PMPADDR1) +DECLARE_CSR(pmpaddr2, CSR_PMPADDR2) +DECLARE_CSR(pmpaddr3, CSR_PMPADDR3) +DECLARE_CSR(pmpaddr4, CSR_PMPADDR4) +DECLARE_CSR(pmpaddr5, CSR_PMPADDR5) +DECLARE_CSR(pmpaddr6, CSR_PMPADDR6) +DECLARE_CSR(pmpaddr7, CSR_PMPADDR7) +DECLARE_CSR(pmpaddr8, CSR_PMPADDR8) +DECLARE_CSR(pmpaddr9, CSR_PMPADDR9) +DECLARE_CSR(pmpaddr10, CSR_PMPADDR10) +DECLARE_CSR(pmpaddr11, CSR_PMPADDR11) +DECLARE_CSR(pmpaddr12, CSR_PMPADDR12) +DECLARE_CSR(pmpaddr13, CSR_PMPADDR13) +DECLARE_CSR(pmpaddr14, CSR_PMPADDR14) +DECLARE_CSR(pmpaddr15, CSR_PMPADDR15) +DECLARE_CSR(pmpaddr16, CSR_PMPADDR16) +DECLARE_CSR(pmpaddr17, CSR_PMPADDR17) +DECLARE_CSR(pmpaddr18, CSR_PMPADDR18) +DECLARE_CSR(pmpaddr19, CSR_PMPADDR19) +DECLARE_CSR(pmpaddr20, CSR_PMPADDR20) +DECLARE_CSR(pmpaddr21, CSR_PMPADDR21) +DECLARE_CSR(pmpaddr22, CSR_PMPADDR22) +DECLARE_CSR(pmpaddr23, CSR_PMPADDR23) +DECLARE_CSR(pmpaddr24, CSR_PMPADDR24) +DECLARE_CSR(pmpaddr25, CSR_PMPADDR25) +DECLARE_CSR(pmpaddr26, CSR_PMPADDR26) +DECLARE_CSR(pmpaddr27, CSR_PMPADDR27) +DECLARE_CSR(pmpaddr28, CSR_PMPADDR28) +DECLARE_CSR(pmpaddr29, CSR_PMPADDR29) +DECLARE_CSR(pmpaddr30, CSR_PMPADDR30) +DECLARE_CSR(pmpaddr31, CSR_PMPADDR31) +DECLARE_CSR(pmpaddr32, CSR_PMPADDR32) +DECLARE_CSR(pmpaddr33, CSR_PMPADDR33) +DECLARE_CSR(pmpaddr34, CSR_PMPADDR34) +DECLARE_CSR(pmpaddr35, CSR_PMPADDR35) +DECLARE_CSR(pmpaddr36, CSR_PMPADDR36) +DECLARE_CSR(pmpaddr37, CSR_PMPADDR37) +DECLARE_CSR(pmpaddr38, CSR_PMPADDR38) +DECLARE_CSR(pmpaddr39, CSR_PMPADDR39) +DECLARE_CSR(pmpaddr40, CSR_PMPADDR40) +DECLARE_CSR(pmpaddr41, CSR_PMPADDR41) +DECLARE_CSR(pmpaddr42, CSR_PMPADDR42) +DECLARE_CSR(pmpaddr43, CSR_PMPADDR43) +DECLARE_CSR(pmpaddr44, CSR_PMPADDR44) +DECLARE_CSR(pmpaddr45, CSR_PMPADDR45) +DECLARE_CSR(pmpaddr46, CSR_PMPADDR46) +DECLARE_CSR(pmpaddr47, CSR_PMPADDR47) +DECLARE_CSR(pmpaddr48, CSR_PMPADDR48) +DECLARE_CSR(pmpaddr49, CSR_PMPADDR49) +DECLARE_CSR(pmpaddr50, CSR_PMPADDR50) +DECLARE_CSR(pmpaddr51, CSR_PMPADDR51) +DECLARE_CSR(pmpaddr52, CSR_PMPADDR52) +DECLARE_CSR(pmpaddr53, CSR_PMPADDR53) +DECLARE_CSR(pmpaddr54, CSR_PMPADDR54) +DECLARE_CSR(pmpaddr55, CSR_PMPADDR55) +DECLARE_CSR(pmpaddr56, CSR_PMPADDR56) +DECLARE_CSR(pmpaddr57, CSR_PMPADDR57) +DECLARE_CSR(pmpaddr58, CSR_PMPADDR58) +DECLARE_CSR(pmpaddr59, CSR_PMPADDR59) +DECLARE_CSR(pmpaddr60, CSR_PMPADDR60) +DECLARE_CSR(pmpaddr61, CSR_PMPADDR61) +DECLARE_CSR(pmpaddr62, CSR_PMPADDR62) +DECLARE_CSR(pmpaddr63, CSR_PMPADDR63) +DECLARE_CSR(mseccfg, CSR_MSECCFG) +DECLARE_CSR(tselect, CSR_TSELECT) +DECLARE_CSR(tdata1, CSR_TDATA1) +DECLARE_CSR(tdata2, CSR_TDATA2) +DECLARE_CSR(tdata3, CSR_TDATA3) +DECLARE_CSR(tinfo, CSR_TINFO) +DECLARE_CSR(tcontrol, CSR_TCONTROL) +DECLARE_CSR(mcontext, CSR_MCONTEXT) +DECLARE_CSR(mscontext, CSR_MSCONTEXT) +DECLARE_CSR(dcsr, CSR_DCSR) +DECLARE_CSR(dpc, CSR_DPC) +DECLARE_CSR(dscratch0, CSR_DSCRATCH0) +DECLARE_CSR(dscratch1, CSR_DSCRATCH1) +DECLARE_CSR(mcycle, CSR_MCYCLE) +DECLARE_CSR(minstret, CSR_MINSTRET) +DECLARE_CSR(mhpmcounter3, CSR_MHPMCOUNTER3) +DECLARE_CSR(mhpmcounter4, CSR_MHPMCOUNTER4) +DECLARE_CSR(mhpmcounter5, CSR_MHPMCOUNTER5) +DECLARE_CSR(mhpmcounter6, CSR_MHPMCOUNTER6) +DECLARE_CSR(mhpmcounter7, CSR_MHPMCOUNTER7) +DECLARE_CSR(mhpmcounter8, CSR_MHPMCOUNTER8) +DECLARE_CSR(mhpmcounter9, CSR_MHPMCOUNTER9) +DECLARE_CSR(mhpmcounter10, CSR_MHPMCOUNTER10) +DECLARE_CSR(mhpmcounter11, CSR_MHPMCOUNTER11) +DECLARE_CSR(mhpmcounter12, CSR_MHPMCOUNTER12) +DECLARE_CSR(mhpmcounter13, CSR_MHPMCOUNTER13) +DECLARE_CSR(mhpmcounter14, CSR_MHPMCOUNTER14) +DECLARE_CSR(mhpmcounter15, CSR_MHPMCOUNTER15) +DECLARE_CSR(mhpmcounter16, CSR_MHPMCOUNTER16) +DECLARE_CSR(mhpmcounter17, CSR_MHPMCOUNTER17) +DECLARE_CSR(mhpmcounter18, CSR_MHPMCOUNTER18) +DECLARE_CSR(mhpmcounter19, CSR_MHPMCOUNTER19) +DECLARE_CSR(mhpmcounter20, CSR_MHPMCOUNTER20) +DECLARE_CSR(mhpmcounter21, CSR_MHPMCOUNTER21) +DECLARE_CSR(mhpmcounter22, CSR_MHPMCOUNTER22) +DECLARE_CSR(mhpmcounter23, CSR_MHPMCOUNTER23) +DECLARE_CSR(mhpmcounter24, CSR_MHPMCOUNTER24) +DECLARE_CSR(mhpmcounter25, CSR_MHPMCOUNTER25) +DECLARE_CSR(mhpmcounter26, CSR_MHPMCOUNTER26) +DECLARE_CSR(mhpmcounter27, CSR_MHPMCOUNTER27) +DECLARE_CSR(mhpmcounter28, CSR_MHPMCOUNTER28) +DECLARE_CSR(mhpmcounter29, CSR_MHPMCOUNTER29) +DECLARE_CSR(mhpmcounter30, CSR_MHPMCOUNTER30) +DECLARE_CSR(mhpmcounter31, CSR_MHPMCOUNTER31) +DECLARE_CSR(mhpmevent3, CSR_MHPMEVENT3) +DECLARE_CSR(mhpmevent4, CSR_MHPMEVENT4) +DECLARE_CSR(mhpmevent5, CSR_MHPMEVENT5) +DECLARE_CSR(mhpmevent6, CSR_MHPMEVENT6) +DECLARE_CSR(mhpmevent7, CSR_MHPMEVENT7) +DECLARE_CSR(mhpmevent8, CSR_MHPMEVENT8) +DECLARE_CSR(mhpmevent9, CSR_MHPMEVENT9) +DECLARE_CSR(mhpmevent10, CSR_MHPMEVENT10) +DECLARE_CSR(mhpmevent11, CSR_MHPMEVENT11) +DECLARE_CSR(mhpmevent12, CSR_MHPMEVENT12) +DECLARE_CSR(mhpmevent13, CSR_MHPMEVENT13) +DECLARE_CSR(mhpmevent14, CSR_MHPMEVENT14) +DECLARE_CSR(mhpmevent15, CSR_MHPMEVENT15) +DECLARE_CSR(mhpmevent16, CSR_MHPMEVENT16) +DECLARE_CSR(mhpmevent17, CSR_MHPMEVENT17) +DECLARE_CSR(mhpmevent18, CSR_MHPMEVENT18) +DECLARE_CSR(mhpmevent19, CSR_MHPMEVENT19) +DECLARE_CSR(mhpmevent20, CSR_MHPMEVENT20) +DECLARE_CSR(mhpmevent21, CSR_MHPMEVENT21) +DECLARE_CSR(mhpmevent22, CSR_MHPMEVENT22) +DECLARE_CSR(mhpmevent23, CSR_MHPMEVENT23) +DECLARE_CSR(mhpmevent24, CSR_MHPMEVENT24) +DECLARE_CSR(mhpmevent25, CSR_MHPMEVENT25) +DECLARE_CSR(mhpmevent26, CSR_MHPMEVENT26) +DECLARE_CSR(mhpmevent27, CSR_MHPMEVENT27) +DECLARE_CSR(mhpmevent28, CSR_MHPMEVENT28) +DECLARE_CSR(mhpmevent29, CSR_MHPMEVENT29) +DECLARE_CSR(mhpmevent30, CSR_MHPMEVENT30) +DECLARE_CSR(mhpmevent31, CSR_MHPMEVENT31) +DECLARE_CSR(mvendorid, CSR_MVENDORID) +DECLARE_CSR(marchid, CSR_MARCHID) +DECLARE_CSR(mimpid, CSR_MIMPID) +DECLARE_CSR(mhartid, CSR_MHARTID) +DECLARE_CSR(mconfigptr, CSR_MCONFIGPTR) +DECLARE_CSR(mtopi, CSR_MTOPI) +DECLARE_CSR(sieh, CSR_SIEH) +DECLARE_CSR(siph, CSR_SIPH) +DECLARE_CSR(stimecmph, CSR_STIMECMPH) +DECLARE_CSR(vsieh, CSR_VSIEH) +DECLARE_CSR(vsiph, CSR_VSIPH) +DECLARE_CSR(vstimecmph, CSR_VSTIMECMPH) +DECLARE_CSR(htimedeltah, CSR_HTIMEDELTAH) +DECLARE_CSR(hidelegh, CSR_HIDELEGH) +DECLARE_CSR(hvienh, CSR_HVIENH) +DECLARE_CSR(henvcfgh, CSR_HENVCFGH) +DECLARE_CSR(hviph, CSR_HVIPH) +DECLARE_CSR(hviprio1h, CSR_HVIPRIO1H) +DECLARE_CSR(hviprio2h, CSR_HVIPRIO2H) +DECLARE_CSR(hstateen0h, CSR_HSTATEEN0H) +DECLARE_CSR(hstateen1h, CSR_HSTATEEN1H) +DECLARE_CSR(hstateen2h, CSR_HSTATEEN2H) +DECLARE_CSR(hstateen3h, CSR_HSTATEEN3H) +DECLARE_CSR(cycleh, CSR_CYCLEH) +DECLARE_CSR(timeh, CSR_TIMEH) +DECLARE_CSR(instreth, CSR_INSTRETH) +DECLARE_CSR(hpmcounter3h, CSR_HPMCOUNTER3H) +DECLARE_CSR(hpmcounter4h, CSR_HPMCOUNTER4H) +DECLARE_CSR(hpmcounter5h, CSR_HPMCOUNTER5H) +DECLARE_CSR(hpmcounter6h, CSR_HPMCOUNTER6H) +DECLARE_CSR(hpmcounter7h, CSR_HPMCOUNTER7H) +DECLARE_CSR(hpmcounter8h, CSR_HPMCOUNTER8H) +DECLARE_CSR(hpmcounter9h, CSR_HPMCOUNTER9H) +DECLARE_CSR(hpmcounter10h, CSR_HPMCOUNTER10H) +DECLARE_CSR(hpmcounter11h, CSR_HPMCOUNTER11H) +DECLARE_CSR(hpmcounter12h, CSR_HPMCOUNTER12H) +DECLARE_CSR(hpmcounter13h, CSR_HPMCOUNTER13H) +DECLARE_CSR(hpmcounter14h, CSR_HPMCOUNTER14H) +DECLARE_CSR(hpmcounter15h, CSR_HPMCOUNTER15H) +DECLARE_CSR(hpmcounter16h, CSR_HPMCOUNTER16H) +DECLARE_CSR(hpmcounter17h, CSR_HPMCOUNTER17H) +DECLARE_CSR(hpmcounter18h, CSR_HPMCOUNTER18H) +DECLARE_CSR(hpmcounter19h, CSR_HPMCOUNTER19H) +DECLARE_CSR(hpmcounter20h, CSR_HPMCOUNTER20H) +DECLARE_CSR(hpmcounter21h, CSR_HPMCOUNTER21H) +DECLARE_CSR(hpmcounter22h, CSR_HPMCOUNTER22H) +DECLARE_CSR(hpmcounter23h, CSR_HPMCOUNTER23H) +DECLARE_CSR(hpmcounter24h, CSR_HPMCOUNTER24H) +DECLARE_CSR(hpmcounter25h, CSR_HPMCOUNTER25H) +DECLARE_CSR(hpmcounter26h, CSR_HPMCOUNTER26H) +DECLARE_CSR(hpmcounter27h, CSR_HPMCOUNTER27H) +DECLARE_CSR(hpmcounter28h, CSR_HPMCOUNTER28H) +DECLARE_CSR(hpmcounter29h, CSR_HPMCOUNTER29H) +DECLARE_CSR(hpmcounter30h, CSR_HPMCOUNTER30H) +DECLARE_CSR(hpmcounter31h, CSR_HPMCOUNTER31H) +DECLARE_CSR(mstatush, CSR_MSTATUSH) +DECLARE_CSR(midelegh, CSR_MIDELEGH) +DECLARE_CSR(mieh, CSR_MIEH) +DECLARE_CSR(mvienh, CSR_MVIENH) +DECLARE_CSR(mviph, CSR_MVIPH) +DECLARE_CSR(menvcfgh, CSR_MENVCFGH) +DECLARE_CSR(mstateen0h, CSR_MSTATEEN0H) +DECLARE_CSR(mstateen1h, CSR_MSTATEEN1H) +DECLARE_CSR(mstateen2h, CSR_MSTATEEN2H) +DECLARE_CSR(mstateen3h, CSR_MSTATEEN3H) +DECLARE_CSR(miph, CSR_MIPH) +DECLARE_CSR(mhpmevent3h, CSR_MHPMEVENT3H) +DECLARE_CSR(mhpmevent4h, CSR_MHPMEVENT4H) +DECLARE_CSR(mhpmevent5h, CSR_MHPMEVENT5H) +DECLARE_CSR(mhpmevent6h, CSR_MHPMEVENT6H) +DECLARE_CSR(mhpmevent7h, CSR_MHPMEVENT7H) +DECLARE_CSR(mhpmevent8h, CSR_MHPMEVENT8H) +DECLARE_CSR(mhpmevent9h, CSR_MHPMEVENT9H) +DECLARE_CSR(mhpmevent10h, CSR_MHPMEVENT10H) +DECLARE_CSR(mhpmevent11h, CSR_MHPMEVENT11H) +DECLARE_CSR(mhpmevent12h, CSR_MHPMEVENT12H) +DECLARE_CSR(mhpmevent13h, CSR_MHPMEVENT13H) +DECLARE_CSR(mhpmevent14h, CSR_MHPMEVENT14H) +DECLARE_CSR(mhpmevent15h, CSR_MHPMEVENT15H) +DECLARE_CSR(mhpmevent16h, CSR_MHPMEVENT16H) +DECLARE_CSR(mhpmevent17h, CSR_MHPMEVENT17H) +DECLARE_CSR(mhpmevent18h, CSR_MHPMEVENT18H) +DECLARE_CSR(mhpmevent19h, CSR_MHPMEVENT19H) +DECLARE_CSR(mhpmevent20h, CSR_MHPMEVENT20H) +DECLARE_CSR(mhpmevent21h, CSR_MHPMEVENT21H) +DECLARE_CSR(mhpmevent22h, CSR_MHPMEVENT22H) +DECLARE_CSR(mhpmevent23h, CSR_MHPMEVENT23H) +DECLARE_CSR(mhpmevent24h, CSR_MHPMEVENT24H) +DECLARE_CSR(mhpmevent25h, CSR_MHPMEVENT25H) +DECLARE_CSR(mhpmevent26h, CSR_MHPMEVENT26H) +DECLARE_CSR(mhpmevent27h, CSR_MHPMEVENT27H) +DECLARE_CSR(mhpmevent28h, CSR_MHPMEVENT28H) +DECLARE_CSR(mhpmevent29h, CSR_MHPMEVENT29H) +DECLARE_CSR(mhpmevent30h, CSR_MHPMEVENT30H) +DECLARE_CSR(mhpmevent31h, CSR_MHPMEVENT31H) +DECLARE_CSR(mnscratch, CSR_MNSCRATCH) +DECLARE_CSR(mnepc, CSR_MNEPC) +DECLARE_CSR(mncause, CSR_MNCAUSE) +DECLARE_CSR(mnstatus, CSR_MNSTATUS) +DECLARE_CSR(mseccfgh, CSR_MSECCFGH) +DECLARE_CSR(mcycleh, CSR_MCYCLEH) +DECLARE_CSR(minstreth, CSR_MINSTRETH) +DECLARE_CSR(mhpmcounter3h, CSR_MHPMCOUNTER3H) +DECLARE_CSR(mhpmcounter4h, CSR_MHPMCOUNTER4H) +DECLARE_CSR(mhpmcounter5h, CSR_MHPMCOUNTER5H) +DECLARE_CSR(mhpmcounter6h, CSR_MHPMCOUNTER6H) +DECLARE_CSR(mhpmcounter7h, CSR_MHPMCOUNTER7H) +DECLARE_CSR(mhpmcounter8h, CSR_MHPMCOUNTER8H) +DECLARE_CSR(mhpmcounter9h, CSR_MHPMCOUNTER9H) +DECLARE_CSR(mhpmcounter10h, CSR_MHPMCOUNTER10H) +DECLARE_CSR(mhpmcounter11h, CSR_MHPMCOUNTER11H) +DECLARE_CSR(mhpmcounter12h, CSR_MHPMCOUNTER12H) +DECLARE_CSR(mhpmcounter13h, CSR_MHPMCOUNTER13H) +DECLARE_CSR(mhpmcounter14h, CSR_MHPMCOUNTER14H) +DECLARE_CSR(mhpmcounter15h, CSR_MHPMCOUNTER15H) +DECLARE_CSR(mhpmcounter16h, CSR_MHPMCOUNTER16H) +DECLARE_CSR(mhpmcounter17h, CSR_MHPMCOUNTER17H) +DECLARE_CSR(mhpmcounter18h, CSR_MHPMCOUNTER18H) +DECLARE_CSR(mhpmcounter19h, CSR_MHPMCOUNTER19H) +DECLARE_CSR(mhpmcounter20h, CSR_MHPMCOUNTER20H) +DECLARE_CSR(mhpmcounter21h, CSR_MHPMCOUNTER21H) +DECLARE_CSR(mhpmcounter22h, CSR_MHPMCOUNTER22H) +DECLARE_CSR(mhpmcounter23h, CSR_MHPMCOUNTER23H) +DECLARE_CSR(mhpmcounter24h, CSR_MHPMCOUNTER24H) +DECLARE_CSR(mhpmcounter25h, CSR_MHPMCOUNTER25H) +DECLARE_CSR(mhpmcounter26h, CSR_MHPMCOUNTER26H) +DECLARE_CSR(mhpmcounter27h, CSR_MHPMCOUNTER27H) +DECLARE_CSR(mhpmcounter28h, CSR_MHPMCOUNTER28H) +DECLARE_CSR(mhpmcounter29h, CSR_MHPMCOUNTER29H) +DECLARE_CSR(mhpmcounter30h, CSR_MHPMCOUNTER30H) +DECLARE_CSR(mhpmcounter31h, CSR_MHPMCOUNTER31H) +#endif +#ifdef DECLARE_CAUSE +DECLARE_CAUSE("misaligned fetch", CAUSE_MISALIGNED_FETCH) +DECLARE_CAUSE("fetch access", CAUSE_FETCH_ACCESS) +DECLARE_CAUSE("illegal instruction", CAUSE_ILLEGAL_INSTRUCTION) +DECLARE_CAUSE("breakpoint", CAUSE_BREAKPOINT) +DECLARE_CAUSE("misaligned load", CAUSE_MISALIGNED_LOAD) +DECLARE_CAUSE("load access", CAUSE_LOAD_ACCESS) +DECLARE_CAUSE("misaligned store", CAUSE_MISALIGNED_STORE) +DECLARE_CAUSE("store access", CAUSE_STORE_ACCESS) +DECLARE_CAUSE("user_ecall", CAUSE_USER_ECALL) +DECLARE_CAUSE("supervisor_ecall", CAUSE_SUPERVISOR_ECALL) +DECLARE_CAUSE("virtual_supervisor_ecall", CAUSE_VIRTUAL_SUPERVISOR_ECALL) +DECLARE_CAUSE("machine_ecall", CAUSE_MACHINE_ECALL) +DECLARE_CAUSE("fetch page fault", CAUSE_FETCH_PAGE_FAULT) +DECLARE_CAUSE("load page fault", CAUSE_LOAD_PAGE_FAULT) +DECLARE_CAUSE("store page fault", CAUSE_STORE_PAGE_FAULT) +DECLARE_CAUSE("fetch guest page fault", CAUSE_FETCH_GUEST_PAGE_FAULT) +DECLARE_CAUSE("load guest page fault", CAUSE_LOAD_GUEST_PAGE_FAULT) +DECLARE_CAUSE("virtual instruction", CAUSE_VIRTUAL_INSTRUCTION) +DECLARE_CAUSE("store guest page fault", CAUSE_STORE_GUEST_PAGE_FAULT) +#endif diff --git a/tests/riscv-test-env/p/link.ld b/tests/riscv-test-env/p/link.ld new file mode 100644 index 000000000..b3e315e78 --- /dev/null +++ b/tests/riscv-test-env/p/link.ld @@ -0,0 +1,17 @@ +OUTPUT_ARCH( "riscv" ) +ENTRY(_start) + +SECTIONS +{ + . = 0x80000000; + .text.init : { *(.text.init) } + . = ALIGN(0x1000); + .tohost : { *(.tohost) } + . = ALIGN(0x1000); + .text : { *(.text) } + . = ALIGN(0x1000); + .data : { *(.data) } + .bss : { *(.bss) } + _end = .; +} + diff --git a/tests/riscv-test-env/p/riscv_test.h b/tests/riscv-test-env/p/riscv_test.h new file mode 100644 index 000000000..e8f4de339 --- /dev/null +++ b/tests/riscv-test-env/p/riscv_test.h @@ -0,0 +1,282 @@ +// See LICENSE for license details. + +#ifndef _ENV_PHYSICAL_SINGLE_CORE_H +#define _ENV_PHYSICAL_SINGLE_CORE_H + +#include "../encoding.h" + +//----------------------------------------------------------------------- +// Begin Macro +//----------------------------------------------------------------------- + +#define RVTEST_RV64U \ + .macro init; \ + .endm + +#define RVTEST_RV64UF \ + .macro init; \ + RVTEST_FP_ENABLE; \ + .endm + +#define RVTEST_RV64UV \ + .macro init; \ + RVTEST_VECTOR_ENABLE; \ + .endm + +#define RVTEST_RV32U \ + .macro init; \ + .endm + +#define RVTEST_RV32UF \ + .macro init; \ + RVTEST_FP_ENABLE; \ + .endm + +#define RVTEST_RV32UV \ + .macro init; \ + RVTEST_VECTOR_ENABLE; \ + .endm + +#define RVTEST_RV64M \ + .macro init; \ + RVTEST_ENABLE_MACHINE; \ + .endm + +#define RVTEST_RV64S \ + .macro init; \ + RVTEST_ENABLE_SUPERVISOR; \ + .endm + +#define RVTEST_RV32M \ + .macro init; \ + RVTEST_ENABLE_MACHINE; \ + .endm + +#define RVTEST_RV32S \ + .macro init; \ + RVTEST_ENABLE_SUPERVISOR; \ + .endm + +#if __riscv_xlen == 64 +# define CHECK_XLEN li a0, 1; slli a0, a0, 31; bgez a0, 1f; RVTEST_PASS; 1: +#else +# define CHECK_XLEN li a0, 1; slli a0, a0, 31; bltz a0, 1f; RVTEST_PASS; 1: +#endif + +#define INIT_XREG \ + li x1, 0; \ + li x2, 0; \ + li x3, 0; \ + li x4, 0; \ + li x5, 0; \ + li x6, 0; \ + li x7, 0; \ + li x8, 0; \ + li x9, 0; \ + li x10, 0; \ + li x11, 0; \ + li x12, 0; \ + li x13, 0; \ + li x14, 0; \ + li x15, 0; \ + li x16, 0; \ + li x17, 0; \ + li x18, 0; \ + li x19, 0; \ + li x20, 0; \ + li x21, 0; \ + li x22, 0; \ + li x23, 0; \ + li x24, 0; \ + li x25, 0; \ + li x26, 0; \ + li x27, 0; \ + li x28, 0; \ + li x29, 0; \ + li x30, 0; \ + li x31, 0; + +#define INIT_PMP \ + la t0, 1f; \ + csrw mtvec, t0; \ + /* Set up a PMP to permit all accesses */ \ + li t0, (1 << (31 + (__riscv_xlen / 64) * (53 - 31))) - 1; \ + csrw pmpaddr0, t0; \ + li t0, PMP_NAPOT | PMP_R | PMP_W | PMP_X; \ + csrw pmpcfg0, t0; \ + .align 2; \ +1: + +#define INIT_RNMI \ + la t0, 1f; \ + csrw mtvec, t0; \ + csrwi CSR_MNSTATUS, MNSTATUS_NMIE; \ + .align 2; \ +1: + +#define INIT_SATP \ + la t0, 1f; \ + csrw mtvec, t0; \ + csrwi satp, 0; \ + .align 2; \ +1: + +#define DELEGATE_NO_TRAPS \ + csrwi mie, 0; \ + la t0, 1f; \ + csrw mtvec, t0; \ + csrwi medeleg, 0; \ + csrwi mideleg, 0; \ + .align 2; \ +1: + +#define RVTEST_ENABLE_SUPERVISOR \ + li a0, MSTATUS_MPP & (MSTATUS_MPP >> 1); \ + csrs mstatus, a0; \ + li a0, SIP_SSIP | SIP_STIP; \ + csrs mideleg, a0; \ + +#define RVTEST_ENABLE_MACHINE \ + li a0, MSTATUS_MPP; \ + csrs mstatus, a0; \ + +#define RVTEST_FP_ENABLE \ + li a0, MSTATUS_FS & (MSTATUS_FS >> 1); \ + csrs mstatus, a0; \ + csrwi fcsr, 0 + +#define RVTEST_VECTOR_ENABLE \ + li a0, (MSTATUS_VS & (MSTATUS_VS >> 1)) | \ + (MSTATUS_FS & (MSTATUS_FS >> 1)); \ + csrs mstatus, a0; \ + csrwi fcsr, 0; \ + csrwi vcsr, 0; + +#define RISCV_MULTICORE_DISABLE \ + csrr a0, mhartid; \ + 1: bnez a0, 1b + +#define EXTRA_TVEC_USER +#define EXTRA_TVEC_MACHINE +#define EXTRA_INIT +#define EXTRA_INIT_TIMER +#define FILTER_TRAP +#define FILTER_PAGE_FAULT + +#define INTERRUPT_HANDLER j other_exception /* No interrupts should occur */ + +#define RVTEST_CODE_BEGIN \ + .section .text.init; \ + .align 6; \ + .weak stvec_handler; \ + .weak mtvec_handler; \ + .globl _start; \ +_start: \ + /* reset vector */ \ + j reset_vector; \ + .align 2; \ +trap_vector: \ + /* test whether the test came from pass/fail */ \ + csrr t5, mcause; \ + li t6, CAUSE_USER_ECALL; \ + beq t5, t6, write_tohost; \ + li t6, CAUSE_SUPERVISOR_ECALL; \ + beq t5, t6, write_tohost; \ + li t6, CAUSE_MACHINE_ECALL; \ + beq t5, t6, write_tohost; \ + /* if an mtvec_handler is defined, jump to it */ \ + la t5, mtvec_handler; \ + beqz t5, 1f; \ + jr t5; \ + /* was it an interrupt or an exception? */ \ + 1: csrr t5, mcause; \ + bgez t5, handle_exception; \ + INTERRUPT_HANDLER; \ +handle_exception: \ + /* we don't know how to handle whatever the exception was */ \ + other_exception: \ + /* some unhandlable exception occurred */ \ + 1: ori TESTNUM, TESTNUM, 1337; \ + write_tohost: \ + sw TESTNUM, tohost, t5; \ + sw zero, tohost + 4, t5; \ + j write_tohost; \ +reset_vector: \ + INIT_XREG; \ + RISCV_MULTICORE_DISABLE; \ + INIT_RNMI; \ + INIT_SATP; \ + INIT_PMP; \ + DELEGATE_NO_TRAPS; \ + li TESTNUM, 0; \ + la t0, trap_vector; \ + csrw mtvec, t0; \ + CHECK_XLEN; \ + /* if an stvec_handler is defined, delegate exceptions to it */ \ + la t0, stvec_handler; \ + beqz t0, 1f; \ + csrw stvec, t0; \ + li t0, (1 << CAUSE_LOAD_PAGE_FAULT) | \ + (1 << CAUSE_STORE_PAGE_FAULT) | \ + (1 << CAUSE_FETCH_PAGE_FAULT) | \ + (1 << CAUSE_MISALIGNED_FETCH) | \ + (1 << CAUSE_USER_ECALL) | \ + (1 << CAUSE_BREAKPOINT); \ + csrw medeleg, t0; \ +1: csrwi mstatus, 0; \ + init; \ + EXTRA_INIT; \ + EXTRA_INIT_TIMER; \ + la t0, 1f; \ + csrw mepc, t0; \ + csrr a0, mhartid; \ + mret; \ +1: + +//----------------------------------------------------------------------- +// End Macro +//----------------------------------------------------------------------- + +#define EXIT_POS 0x40000000; +#define EXIT_CODE 0xdeadbeef; +#define RVTEST_CODE_END \ + li x1, EXIT_POS; \ + li x2, EXIT_CODE; \ + sw x2, 0(x1); + +//----------------------------------------------------------------------- +// Pass/Fail Macro +//----------------------------------------------------------------------- + +#define RVTEST_PASS \ + fence; \ + li TESTNUM, 1; \ + li a7, 93; \ + li a0, 0; \ + +#define TESTNUM gp +#define RVTEST_FAIL \ + fence; \ +1: beqz TESTNUM, 1b; \ + sll TESTNUM, TESTNUM, 1; \ + or TESTNUM, TESTNUM, 1; \ + li a7, 93; \ + addi a0, TESTNUM, 0; \ + +//----------------------------------------------------------------------- +// Data Section Macro +//----------------------------------------------------------------------- + +#define EXTRA_DATA + +#define RVTEST_DATA_BEGIN \ + EXTRA_DATA \ + .pushsection .tohost,"aw",@progbits; \ + .align 6; .global tohost; tohost: .dword 0; .size tohost, 8; \ + .align 6; .global fromhost; fromhost: .dword 0; .size fromhost, 8;\ + .popsection; \ + .align 4; .global begin_signature; begin_signature: + +#define RVTEST_DATA_END .align 4; .global end_signature; end_signature: + +#endif diff --git a/tests/riscv-test-env/pm/link.ld b/tests/riscv-test-env/pm/link.ld new file mode 120000 index 000000000..86b45f9f6 --- /dev/null +++ b/tests/riscv-test-env/pm/link.ld @@ -0,0 +1 @@ +../p/link.ld \ No newline at end of file diff --git a/tests/riscv-test-env/pm/riscv_test.h b/tests/riscv-test-env/pm/riscv_test.h new file mode 100644 index 000000000..38a0e86b8 --- /dev/null +++ b/tests/riscv-test-env/pm/riscv_test.h @@ -0,0 +1,11 @@ +// See LICENSE for license details. + +#ifndef _ENV_PHYSICAL_MULTI_CORE_H +#define _ENV_PHYSICAL_MULTI_CORE_H + +#include "../p/riscv_test.h" + +#undef RISCV_MULTICORE_DISABLE +#define RISCV_MULTICORE_DISABLE + +#endif diff --git a/tests/riscv-test-env/pt/link.ld b/tests/riscv-test-env/pt/link.ld new file mode 120000 index 000000000..86b45f9f6 --- /dev/null +++ b/tests/riscv-test-env/pt/link.ld @@ -0,0 +1 @@ +../p/link.ld \ No newline at end of file diff --git a/tests/riscv-test-env/pt/riscv_test.h b/tests/riscv-test-env/pt/riscv_test.h new file mode 100644 index 000000000..34c2a331a --- /dev/null +++ b/tests/riscv-test-env/pt/riscv_test.h @@ -0,0 +1,69 @@ +// See LICENSE for license details. + +#ifndef _ENV_PHYSICAL_SINGLE_CORE_TIMER_H +#define _ENV_PHYSICAL_SINGLE_CORE_TIMER_H + +#include "../p/riscv_test.h" + +#define TIMER_INTERVAL 2 + +#undef EXTRA_INIT_TIMER +#define EXTRA_INIT_TIMER \ + li a0, MIP_MTIP; \ + csrs mie, a0; \ + csrr a0, mtime; \ + addi a0, a0, TIMER_INTERVAL; \ + csrw mtimecmp, a0; \ + +#if SSTATUS_XS != 0x18000 +# error +#endif +#define XS_SHIFT 15 + +#undef INTERRUPT_HANDLER +#define INTERRUPT_HANDLER \ + slli t5, t5, 1; \ + srli t5, t5, 1; \ + add t5, t5, -IRQ_M_TIMER; \ + bnez t5, other_exception; /* other interrups shouldn't happen */\ + csrr t5, mtime; \ + addi t5, t5, TIMER_INTERVAL; \ + csrw mtimecmp, t5; \ + mret; \ + +//----------------------------------------------------------------------- +// Data Section Macro +//----------------------------------------------------------------------- + +#undef EXTRA_DATA +#define EXTRA_DATA \ + .align 3; \ +regspill: \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ +evac: \ + .skip 32768; \ + +#endif diff --git a/tests/riscv-test-env/v/entry.S b/tests/riscv-test-env/v/entry.S new file mode 100644 index 000000000..13d46a349 --- /dev/null +++ b/tests/riscv-test-env/v/entry.S @@ -0,0 +1,164 @@ +#include "riscv_test.h" + +#if __riscv_xlen == 64 +# define STORE sd +# define LOAD ld +# define REGBYTES 8 +#else +# define STORE sw +# define LOAD lw +# define REGBYTES 4 +#endif + +#define STACK_TOP (_end + RISCV_PGSIZE * 4) + + .section ".text.init","ax",@progbits + .globl _start + .align 2 +_start: + j handle_reset + + /* NMI vector */ + .align 2 +nmi_vector: + j wtf + + .align 2 +trap_vector: + j wtf + +handle_reset: + li x1, 0 + li x2, 0 + li x3, 0 + li x4, 0 + li x5, 0 + li x6, 0 + li x7, 0 + li x8, 0 + li x9, 0 + li x10, 0 + li x11, 0 + li x12, 0 + li x13, 0 + li x14, 0 + li x15, 0 + li x16, 0 + li x17, 0 + li x18, 0 + li x19, 0 + li x20, 0 + li x21, 0 + li x22, 0 + li x23, 0 + li x24, 0 + li x25, 0 + li x26, 0 + li x27, 0 + li x28, 0 + li x29, 0 + li x30, 0 + li x31, 0 + + INIT_RNMI + + la t0, trap_vector + csrw mtvec, t0 + la sp, STACK_TOP - SIZEOF_TRAPFRAME_T + csrr t0, mhartid + slli t0, t0, 12 + add sp, sp, t0 + csrw mscratch, sp + call extra_boot + la a0, userstart + j vm_boot + + .globl pop_tf +pop_tf: + LOAD t0,33*REGBYTES(a0) + csrw sepc,t0 + LOAD x1,1*REGBYTES(a0) + LOAD x2,2*REGBYTES(a0) + LOAD x3,3*REGBYTES(a0) + LOAD x4,4*REGBYTES(a0) + LOAD x5,5*REGBYTES(a0) + LOAD x6,6*REGBYTES(a0) + LOAD x7,7*REGBYTES(a0) + LOAD x8,8*REGBYTES(a0) + LOAD x9,9*REGBYTES(a0) + LOAD x11,11*REGBYTES(a0) + LOAD x12,12*REGBYTES(a0) + LOAD x13,13*REGBYTES(a0) + LOAD x14,14*REGBYTES(a0) + LOAD x15,15*REGBYTES(a0) + LOAD x16,16*REGBYTES(a0) + LOAD x17,17*REGBYTES(a0) + LOAD x18,18*REGBYTES(a0) + LOAD x19,19*REGBYTES(a0) + LOAD x20,20*REGBYTES(a0) + LOAD x21,21*REGBYTES(a0) + LOAD x22,22*REGBYTES(a0) + LOAD x23,23*REGBYTES(a0) + LOAD x24,24*REGBYTES(a0) + LOAD x25,25*REGBYTES(a0) + LOAD x26,26*REGBYTES(a0) + LOAD x27,27*REGBYTES(a0) + LOAD x28,28*REGBYTES(a0) + LOAD x29,29*REGBYTES(a0) + LOAD x30,30*REGBYTES(a0) + LOAD x31,31*REGBYTES(a0) + LOAD a0,10*REGBYTES(a0) + sret + + .global trap_entry + .align 2 +trap_entry: + csrrw sp, sscratch, sp + + # save gprs + STORE x1,1*REGBYTES(sp) + STORE x3,3*REGBYTES(sp) + STORE x4,4*REGBYTES(sp) + STORE x5,5*REGBYTES(sp) + STORE x6,6*REGBYTES(sp) + STORE x7,7*REGBYTES(sp) + STORE x8,8*REGBYTES(sp) + STORE x9,9*REGBYTES(sp) + STORE x10,10*REGBYTES(sp) + STORE x11,11*REGBYTES(sp) + STORE x12,12*REGBYTES(sp) + STORE x13,13*REGBYTES(sp) + STORE x14,14*REGBYTES(sp) + STORE x15,15*REGBYTES(sp) + STORE x16,16*REGBYTES(sp) + STORE x17,17*REGBYTES(sp) + STORE x18,18*REGBYTES(sp) + STORE x19,19*REGBYTES(sp) + STORE x20,20*REGBYTES(sp) + STORE x21,21*REGBYTES(sp) + STORE x22,22*REGBYTES(sp) + STORE x23,23*REGBYTES(sp) + STORE x24,24*REGBYTES(sp) + STORE x25,25*REGBYTES(sp) + STORE x26,26*REGBYTES(sp) + STORE x27,27*REGBYTES(sp) + STORE x28,28*REGBYTES(sp) + STORE x29,29*REGBYTES(sp) + STORE x30,30*REGBYTES(sp) + STORE x31,31*REGBYTES(sp) + + csrrw t0,sscratch,sp + STORE t0,2*REGBYTES(sp) + + # get sr, epc, badvaddr, cause + csrr t0,sstatus + STORE t0,32*REGBYTES(sp) + csrr t0,sepc + STORE t0,33*REGBYTES(sp) + csrr t0,stval + STORE t0,34*REGBYTES(sp) + csrr t0,scause + STORE t0,35*REGBYTES(sp) + + move a0, sp + j handle_trap diff --git a/tests/riscv-test-env/v/link.ld b/tests/riscv-test-env/v/link.ld new file mode 120000 index 000000000..86b45f9f6 --- /dev/null +++ b/tests/riscv-test-env/v/link.ld @@ -0,0 +1 @@ +../p/link.ld \ No newline at end of file diff --git a/tests/riscv-test-env/v/riscv_test.h b/tests/riscv-test-env/v/riscv_test.h new file mode 100644 index 000000000..f56c0228c --- /dev/null +++ b/tests/riscv-test-env/v/riscv_test.h @@ -0,0 +1,94 @@ +// See LICENSE for license details. + +#ifndef _ENV_VIRTUAL_SINGLE_CORE_H +#define _ENV_VIRTUAL_SINGLE_CORE_H + +#include "../p/riscv_test.h" + +//----------------------------------------------------------------------- +// Begin Macro +//----------------------------------------------------------------------- + +#undef RVTEST_FP_ENABLE +#define RVTEST_FP_ENABLE fssr x0 + +#undef RVTEST_VECTOR_ENABLE +#define RVTEST_VECTOR_ENABLE \ + csrwi fcsr, 0; \ + csrwi vcsr, 0; + +#undef RVTEST_CODE_BEGIN +#define RVTEST_CODE_BEGIN \ + .text; \ + .global extra_boot; \ +extra_boot: \ + EXTRA_INIT \ + ret; \ +.global trap_filter; \ +trap_filter: \ + FILTER_TRAP \ + li a0, 0; \ + ret; \ +.global pf_filter; \ +pf_filter: \ + FILTER_PAGE_FAULT \ + li a0, 0; \ + ret; \ + .global userstart; \ +userstart: \ + init + +//----------------------------------------------------------------------- +// Pass/Fail Macro +//----------------------------------------------------------------------- + +#undef RVTEST_PASS +#define RVTEST_PASS li a0, 1; scall + +#undef RVTEST_FAIL +#define RVTEST_FAIL sll a0, TESTNUM, 1; 1:beqz a0, 1b; or a0, a0, 1; scall; + +//----------------------------------------------------------------------- +// Data Section Macro +//----------------------------------------------------------------------- + +#undef RVTEST_DATA_END +#define RVTEST_DATA_END + +//----------------------------------------------------------------------- +// Supervisor mode definitions and macros +//----------------------------------------------------------------------- + +#ifndef LFSR_BITS +#define LFSR_BITS 6 +#endif + +#define MAX_TEST_PAGES ((1 << LFSR_BITS)-1) // this must be the period of the LFSR below +#define LFSR_NEXT(x) (((((x)^((x)>>1)) & 1) << (LFSR_BITS-1)) | ((x) >> 1)) + +#define PGSHIFT 12 +#define PGSIZE (1UL << PGSHIFT) + +#define SIZEOF_TRAPFRAME_T ((__riscv_xlen / 8) * 36) + +#ifndef __ASSEMBLER__ + +typedef unsigned long pte_t; +#define LEVELS (sizeof(pte_t) == sizeof(uint64_t) ? 3 : 2) +#define PTIDXBITS (PGSHIFT - (sizeof(pte_t) == 8 ? 3 : 2)) +#define VPN_BITS (PTIDXBITS * LEVELS) +#define VA_BITS (VPN_BITS + PGSHIFT) +#define PTES_PER_PT (1UL << RISCV_PGLEVEL_BITS) +#define MEGAPAGE_SIZE (PTES_PER_PT * PGSIZE) + +typedef struct +{ + long gpr[32]; + long sr; + long epc; + long badvaddr; + long cause; +} trapframe_t; +#endif + +#endif diff --git a/tests/riscv-test-env/v/string.c b/tests/riscv-test-env/v/string.c new file mode 100644 index 000000000..4ffedc0a4 --- /dev/null +++ b/tests/riscv-test-env/v/string.c @@ -0,0 +1,114 @@ +#include +#include +#include + +void* memcpy(void* dest, const void* src, size_t len) +{ + if ((((uintptr_t)dest | (uintptr_t)src | len) & (sizeof(uintptr_t)-1)) == 0) { + const uintptr_t* s = src; + uintptr_t *d = dest; + while (d < (uintptr_t*)(dest + len)) + *d++ = *s++; + } else { + const char* s = src; + char *d = dest; + while (d < (char*)(dest + len)) + *d++ = *s++; + } + return dest; +} + +void* memset(void* dest, int byte, size_t len) +{ + if ((((uintptr_t)dest | len) & (sizeof(uintptr_t)-1)) == 0) { + uintptr_t word = byte & 0xFF; + word |= word << 8; + word |= word << 16; + word |= word << 16 << 16; + + uintptr_t *d = dest; + while (d < (uintptr_t*)(dest + len)) + *d++ = word; + } else { + char *d = dest; + while (d < (char*)(dest + len)) + *d++ = byte; + } + return dest; +} + +size_t strlen(const char *s) +{ + const char *p = s; + while (*p) + p++; + return p - s; +} + +int strcmp(const char* s1, const char* s2) +{ + unsigned char c1, c2; + + do { + c1 = *s1++; + c2 = *s2++; + } while (c1 != 0 && c1 == c2); + + return c1 - c2; +} + +int memcmp(const void* s1, const void* s2, size_t n) +{ + if ((((uintptr_t)s1 | (uintptr_t)s2) & (sizeof(uintptr_t)-1)) == 0) { + const uintptr_t* u1 = s1; + const uintptr_t* u2 = s2; + const uintptr_t* end = u1 + (n / sizeof(uintptr_t)); + while (u1 < end) { + if (*u1 != *u2) + break; + u1++; + u2++; + } + n -= (const void*)u1 - s1; + s1 = u1; + s2 = u2; + } + + while (n--) { + unsigned char c1 = *(const unsigned char*)s1++; + unsigned char c2 = *(const unsigned char*)s2++; + if (c1 != c2) + return c1 - c2; + } + + return 0; +} + +char* strcpy(char* dest, const char* src) +{ + char* d = dest; + while ((*d++ = *src++)) + ; + return dest; +} + +long atol(const char* str) +{ + long res = 0; + int sign = 0; + + while (*str == ' ') + str++; + + if (*str == '-' || *str == '+') { + sign = *str == '-'; + str++; + } + + while (*str) { + res *= 10; + res += *str++ - '0'; + } + + return sign ? -res : res; +} diff --git a/tests/riscv-test-env/v/vm.c b/tests/riscv-test-env/v/vm.c new file mode 100644 index 000000000..178d90ba3 --- /dev/null +++ b/tests/riscv-test-env/v/vm.c @@ -0,0 +1,315 @@ +// See LICENSE for license details. + +#include +#include +#include + +#include "riscv_test.h" + +#if __riscv_xlen == 32 +# define SATP_MODE_CHOICE SATP_MODE_SV32 +#elif defined(Sv48) +# define SATP_MODE_CHOICE SATP_MODE_SV48 +#else +# define SATP_MODE_CHOICE SATP_MODE_SV39 +#endif + +void trap_entry(); +void pop_tf(trapframe_t*); + +extern volatile uint64_t tohost; +extern volatile uint64_t fromhost; + +static void do_tohost(uint64_t tohost_value) +{ + while (tohost) + fromhost = 0; + tohost = tohost_value; +} + +#define pa2kva(pa) ((void*)(pa) - DRAM_BASE - MEGAPAGE_SIZE) +#define uva2kva(pa) ((void*)(pa) - MEGAPAGE_SIZE) + +#define flush_page(addr) asm volatile ("sfence.vma %0" : : "r" (addr) : "memory") + +static uint64_t lfsr63(uint64_t x) +{ + uint64_t bit = (x ^ (x >> 1)) & 1; + return (x >> 1) | (bit << 62); +} + +static void cputchar(int x) +{ + do_tohost(0x0101000000000000 | (unsigned char)x); +} + +static void cputstring(const char* s) +{ + while (*s) + cputchar(*s++); +} + +static void terminate(int code) +{ + do_tohost(code); + while (1); +} + +void wtf() +{ + terminate(841); +} + +#define stringify1(x) #x +#define stringify(x) stringify1(x) +#define assert(x) do { \ + if (x) break; \ + cputstring("Assertion failed: " stringify(x) "\n"); \ + terminate(3); \ +} while(0) + +#define l1pt pt[0] +#define user_l2pt pt[1] +#if SATP_MODE_CHOICE == SATP_MODE_SV48 +# define NPT 6 +# define kernel_l2pt pt[2] +# define kernel_l3pt pt[3] +# define user_l3pt pt[4] +# define user_llpt pt[5] +#elif SATP_MODE_CHOICE == SATP_MODE_SV39 +# define NPT 4 +# define kernel_l2pt pt[2] +# define user_llpt pt[3] +#elif SATP_MODE_CHOICE == SATP_MODE_SV32 +# define NPT 2 +# define user_llpt user_l2pt +#else +# error Unknown SATP_MODE_CHOICE +#endif +pte_t pt[NPT][PTES_PER_PT] __attribute__((aligned(PGSIZE))); + +typedef struct { pte_t addr; void* next; } freelist_t; + +freelist_t user_mapping[MAX_TEST_PAGES]; +freelist_t freelist_nodes[MAX_TEST_PAGES]; +freelist_t *freelist_head, *freelist_tail; + +void printhex(uint64_t x) +{ + char str[17]; + for (int i = 0; i < 16; i++) + { + str[15-i] = (x & 0xF) + ((x & 0xF) < 10 ? '0' : 'a'-10); + x >>= 4; + } + str[16] = 0; + + cputstring(str); +} + +static void evict(unsigned long addr) +{ + assert(addr >= PGSIZE && addr < MAX_TEST_PAGES * PGSIZE); + addr = addr/PGSIZE*PGSIZE; + + freelist_t* node = &user_mapping[addr/PGSIZE]; + if (node->addr) + { + // check accessed and dirty bits + assert(user_llpt[addr/PGSIZE] & PTE_A); + uintptr_t sstatus = set_csr(sstatus, SSTATUS_SUM); + if (memcmp((void*)addr, uva2kva(addr), PGSIZE)) { + assert(user_llpt[addr/PGSIZE] & PTE_D); + memcpy(uva2kva(addr), (void*)addr, PGSIZE); + } + write_csr(sstatus, sstatus); + + user_mapping[addr/PGSIZE].addr = 0; + + if (freelist_tail == 0) + freelist_head = freelist_tail = node; + else + { + freelist_tail->next = node; + freelist_tail = node; + } + } +} + +extern int pf_filter(uintptr_t addr, uintptr_t *pte, int *copy); +extern int trap_filter(trapframe_t *tf); + +void handle_fault(uintptr_t addr, uintptr_t cause) +{ + uintptr_t filter_encodings = 0; + int copy_page = 1; + + assert(addr >= PGSIZE && addr < MAX_TEST_PAGES * PGSIZE); + addr = addr/PGSIZE*PGSIZE; + + if (user_llpt[addr/PGSIZE]) { + if (!(user_llpt[addr/PGSIZE] & PTE_A)) { + user_llpt[addr/PGSIZE] |= PTE_A; + } else { + assert(!(user_llpt[addr/PGSIZE] & PTE_D) && cause == CAUSE_STORE_PAGE_FAULT); + user_llpt[addr/PGSIZE] |= PTE_D; + } + flush_page(addr); + return; + } + + freelist_t* node = freelist_head; + assert(node); + freelist_head = node->next; + if (freelist_head == freelist_tail) + freelist_tail = 0; + + uintptr_t new_pte = (node->addr >> PGSHIFT << PTE_PPN_SHIFT) | PTE_V | PTE_U | PTE_R | PTE_W | PTE_X; + + if (pf_filter(addr, &filter_encodings, ©_page)) { + new_pte = (node->addr >> PGSHIFT << PTE_PPN_SHIFT) | filter_encodings; + } + + user_llpt[addr/PGSIZE] = new_pte | PTE_A | PTE_D; + flush_page(addr); + + assert(user_mapping[addr/PGSIZE].addr == 0); + user_mapping[addr/PGSIZE] = *node; + + uintptr_t sstatus = set_csr(sstatus, SSTATUS_SUM); + memcpy((void*)addr, uva2kva(addr), PGSIZE); + write_csr(sstatus, sstatus); + + user_llpt[addr/PGSIZE] = new_pte; + flush_page(addr); + + asm volatile ("fence.i"); +} + +void handle_trap(trapframe_t* tf) +{ + if (trap_filter(tf)) { + pop_tf(tf); + } + + if (tf->cause == CAUSE_USER_ECALL) + { + int n = tf->gpr[10]; + + for (long i = 1; i < MAX_TEST_PAGES; i++) + evict(i*PGSIZE); + + terminate(n); + } + else if (tf->cause == CAUSE_ILLEGAL_INSTRUCTION) + { + assert(tf->epc % 4 == 0); + + int* fssr; + asm ("jal %0, 1f; fssr x0; 1:" : "=r"(fssr)); + + if (*(int*)tf->epc == *fssr) + terminate(1); // FP test on non-FP hardware. "succeed." + else + assert(!"illegal instruction"); + tf->epc += 4; + } + else if (tf->cause == CAUSE_FETCH_PAGE_FAULT || tf->cause == CAUSE_LOAD_PAGE_FAULT || tf->cause == CAUSE_STORE_PAGE_FAULT) + handle_fault(tf->badvaddr, tf->cause); + else + assert(!"unexpected exception"); + + pop_tf(tf); +} + +static void coherence_torture() +{ + // cause coherence misses without affecting program semantics + uint64_t random = ENTROPY; + while (1) { + uintptr_t paddr = DRAM_BASE + ((random % (2 * (MAX_TEST_PAGES + 1) * PGSIZE)) & -4); +#ifdef __riscv_atomic + if (random & 1) // perform a no-op write + asm volatile ("amoadd.w zero, zero, (%0)" :: "r"(paddr)); + else // perform a read +#endif + asm volatile ("lw zero, (%0)" :: "r"(paddr)); + random = lfsr63(random); + } +} + +void vm_boot(uintptr_t test_addr) +{ + uint64_t random = ENTROPY; + if (read_csr(mhartid) > 0) + coherence_torture(); + + _Static_assert(SIZEOF_TRAPFRAME_T == sizeof(trapframe_t), "???"); + +#if (MAX_TEST_PAGES > PTES_PER_PT) || (DRAM_BASE % MEGAPAGE_SIZE) != 0 +# error +#endif + // map user to lowermost megapage + l1pt[0] = ((pte_t)user_l2pt >> PGSHIFT << PTE_PPN_SHIFT) | PTE_V; + // map kernel to uppermost megapage +#if SATP_MODE_CHOICE == SATP_MODE_SV48 + l1pt[PTES_PER_PT-1] = ((pte_t)kernel_l2pt >> PGSHIFT << PTE_PPN_SHIFT) | PTE_V; + kernel_l2pt[PTES_PER_PT-1] = ((pte_t)kernel_l3pt >> PGSHIFT << PTE_PPN_SHIFT) | PTE_V; + kernel_l3pt[PTES_PER_PT-1] = (DRAM_BASE/RISCV_PGSIZE << PTE_PPN_SHIFT) | PTE_V | PTE_R | PTE_W | PTE_X | PTE_A | PTE_D; + user_l2pt[0] = ((pte_t)user_l3pt >> PGSHIFT << PTE_PPN_SHIFT) | PTE_V; + user_l3pt[0] = ((pte_t)user_llpt >> PGSHIFT << PTE_PPN_SHIFT) | PTE_V; +#elif SATP_MODE_CHOICE == SATP_MODE_SV39 + l1pt[PTES_PER_PT-1] = ((pte_t)kernel_l2pt >> PGSHIFT << PTE_PPN_SHIFT) | PTE_V; + kernel_l2pt[PTES_PER_PT-1] = (DRAM_BASE/RISCV_PGSIZE << PTE_PPN_SHIFT) | PTE_V | PTE_R | PTE_W | PTE_X | PTE_A | PTE_D; + user_l2pt[0] = ((pte_t)user_llpt >> PGSHIFT << PTE_PPN_SHIFT) | PTE_V; +#elif SATP_MODE_CHOICE == SATP_MODE_SV32 + l1pt[PTES_PER_PT-1] = (DRAM_BASE/RISCV_PGSIZE << PTE_PPN_SHIFT) | PTE_V | PTE_R | PTE_W | PTE_X | PTE_A | PTE_D; +#else +# error +#endif + uintptr_t vm_choice = SATP_MODE_CHOICE; + uintptr_t satp_value = ((uintptr_t)l1pt >> PGSHIFT) + | (vm_choice * (SATP_MODE & ~(SATP_MODE<<1))); + write_csr(satp, satp_value); + if (read_csr(satp) != satp_value) + assert(!"unsupported satp mode"); + + // Set up PMPs if present, ignoring illegal instruction trap if not. + uintptr_t pmpc = PMP_NAPOT | PMP_R | PMP_W | PMP_X; + uintptr_t pmpa = ((uintptr_t)1 << (__riscv_xlen == 32 ? 31 : 53)) - 1; + asm volatile ("la t0, 1f\n\t" + "csrrw t0, mtvec, t0\n\t" + "csrw pmpaddr0, %1\n\t" + "csrw pmpcfg0, %0\n\t" + ".align 2\n\t" + "1: csrw mtvec, t0" + : : "r" (pmpc), "r" (pmpa) : "t0"); + + // set up supervisor trap handling + write_csr(stvec, pa2kva(trap_entry)); + write_csr(sscratch, pa2kva(read_csr(mscratch))); + write_csr(medeleg, + (1 << CAUSE_USER_ECALL) | + (1 << CAUSE_FETCH_PAGE_FAULT) | + (1 << CAUSE_LOAD_PAGE_FAULT) | + (1 << CAUSE_STORE_PAGE_FAULT)); + // FPU on; accelerator on; vector unit on + write_csr(mstatus, MSTATUS_FS | MSTATUS_XS | MSTATUS_VS); + write_csr(mie, 0); + + random = 1 + (random % MAX_TEST_PAGES); + freelist_head = pa2kva((void*)&freelist_nodes[0]); + freelist_tail = pa2kva(&freelist_nodes[MAX_TEST_PAGES-1]); + for (long i = 0; i < MAX_TEST_PAGES; i++) + { + freelist_nodes[i].addr = DRAM_BASE + (MAX_TEST_PAGES + random)*PGSIZE; + freelist_nodes[i].next = pa2kva(&freelist_nodes[i+1]); + random = LFSR_NEXT(random); + } + freelist_nodes[MAX_TEST_PAGES-1].next = 0; + + trapframe_t tf; + memset(&tf, 0, sizeof(tf)); + tf.epc = test_addr - DRAM_BASE; + pop_tf(&tf); +} diff --git a/tests/rvv_bench/_include/bench.h b/tests/rvv_bench/_include/bench.h deleted file mode 100644 index 126346d4a..000000000 --- a/tests/rvv_bench/_include/bench.h +++ /dev/null @@ -1,170 +0,0 @@ -#include "config.h" -#include "nolibc.h" - -#ifndef BENCH_NEXT - #define BENCH_NEXT NEXT -#endif - -#define MX(f, F) f(F##_m1) f(F##_m2) f(F##_m4) f(F##_m8) -#define STR(x) STR_(x) -#define STR_(x) #x - -#define ROTL(x, n) (((x) << (n)) | ((x) >> (8 * sizeof(x) - (n)))) - -#if defined(__clang__) || defined(__GNUC__) || defined(__INTEL_COMPILER) - -#define BENCH_CLOBBER() ({ __asm volatile("" ::: "memory"); }) -#define BENCH_VOLATILE(x) \ - ({ __asm volatile("" : "+g"(x) : "g"(x) : "memory"); }) -#define BENCH_VOLATILE_REG(x) \ - ({ __asm volatile("" : "+r"(x) : "r"(x) : "memory"); }) -#define BENCH_VOLATILE_MEM(x) \ - ({ __asm volatile("" : "+m"(x) : "m"(x) : "memory"); }) -#define BENCH_FENCE() ({ __asm volatile("fence.i"); }) - -#define BENCH_MAY_ALIAS __attribute__((__may_alias__)) - -#else - -#define BENCH_CLOBBER() -#define BENCH_CLOBBER_WITH(x) (bench__use_ptr(&(x)), BENCH_CLOBBER()) -#define BENCH_CLOBBER_WITH_REG(x) (bench__use_ptr(&(x)), BENCH_CLOBBER()) -#define BENCH_CLOBBER_WITH_MEM(x) (bench__use_ptr(&(x)), BENCH_CLOBBER()) -static void bench_use_ptr(char const volatile *x) {} - -#define BENCH_MAY_ALIAS - -#endif - -static int compare_ux(void const *a, void const *b) { - ux A = *(ux *)a, B = *(ux *)b; - return A < B ? -1 : A > B ? 1 : 0; -} - -typedef struct { - ux x, y, z; -} RandState; -static RandState randState = {123, 456, 789}; - -/* RomuDuoJr, see https://romu-random.org/ */ -static ux urand(void) { - ux xp = randState.x, yp = randState.y, zp = randState.z; - randState.x = 3323815723u * zp; - randState.y = ROTL(yp - xp, 6); - randState.z = ROTL(zp - yp, 22); - return xp; -} - -typedef struct { - char const *name; - void *func; -} Impl; -typedef struct { - size_t N; - char const *name; - ux (*func)(void *, size_t); -} Bench; - -static unsigned char *mem = 0; - -void bench_main(void); -ux checksum(size_t n); -void init(void); - -static void memrand(void *ptr, size_t n) { - unsigned char *p = ptr; -#ifdef __GNUC__ - typedef ux __attribute__((__may_alias__)) uxa; - for (; n && (uintptr_t)p % sizeof(uxa); --n) - *p++ = urand(); - uxa *px = (uxa *)p; - for (; n > sizeof(ux); n -= sizeof(ux)) - *px++ = urand(); - p = (unsigned char *)px; -#endif - while (n--) - *p++ = urand(); -} - -#if __STDC_HOSTED__ -#include -#else -static ux heap[1 + MAX_MEM / sizeof(ux)]; -#endif - -int test(void) { - -#if __STDC_HOSTED__ - mem = malloc(MAX_MEM); -#else - mem = (unsigned char *)heap; -#endif - - size_t x; - randState.x ^= rv_cycles() * 7; - randState.y += rv_cycles() ^ (uintptr_t)&x + 666 * (uintptr_t)mem; - - /* initialize memory */ - memrand(mem, MAX_MEM); - - init(); - bench_main(); -#if __STDC_HOSTED__ - free(mem); -#endif - return 0; -} - -static fx bench_time(size_t n, Impl impl, Bench bench) { - static ux arr[MAX_REPEATS]; - size_t total = 0, repeats = 0; - for (; repeats < MAX_REPEATS; ++repeats) { - total += arr[repeats] = bench.func(impl.func, n); - if (repeats > MIN_REPEATS && total > STOP_CYCLES) - break; - } -#if MAX_REPEATS > 4 - qsort(arr, repeats, sizeof *arr, compare_ux); - ux sum = 0, count = 0; - for (size_t i = repeats * 0.2f; i < repeats * 0.8f; ++i, ++count) - sum += arr[i]; -#else - ux sum = 0, count = repeats; - for (size_t i = 0; i < repeats; ++i) - sum += arr[i]; -#endif - return n / ((fx)sum / count); -} - -static void bench_run(size_t nImpls, Impl *impls, size_t nBenches, - Bench *benches) { - for (Bench *b = benches; b != benches + nBenches; ++b) { - size_t N = b->N; - for (Impl *i = impls; i != impls + nImpls; ++i) { - printf("["); - for (size_t n = 1; n < N; n = BENCH_NEXT(n)) { - ux si = 0, s0 = 0; - printf("%f, ", bench_time(n, *i, *b)); - } - printf("],\n"); - } - printf("]\n},\n"); - } -} - -#define TIME \ - for (ux beg = rv_cycles(), _once = 1; _once; \ - BENCH_FENCE(), _cycles += rv_cycles() - beg, _once = 0) - -#define BENCH(name) \ - ux bench_##name(void *_func, size_t n) { \ - Func *f = _func; \ - ux _cycles = 0; -#define BENCH_END \ - return _cycles; \ - } - -#define BENCH_MAIN(impls, benches) \ - void bench_main(void) { \ - bench_run(ARR_LEN(impls), impls, ARR_LEN(benches), benches); \ - } diff --git a/tests/rvv_bench/_include/config.h b/tests/rvv_bench/_include/config.h deleted file mode 100644 index 44f1009b0..000000000 --- a/tests/rvv_bench/_include/config.h +++ /dev/null @@ -1,25 +0,0 @@ -/* processor specific configs */ -#define HAS_E64 (__riscv_v_elen >= 64) -#define HAS_F16 0 - -/* the maximum number of bytes to allocate, minimum of 4096 */ -#define MAX_MEM (4096 * 8) -/* the byte count for the next run */ -#define NEXT(c) (c + c / 3 + 3) - -/* minimum number of repeats, to sample median from */ -#define MIN_REPEATS 1 -/* maxium number of repeats, executed until more than STOP_TIME has elapsed */ -#define MAX_REPEATS 1 - -/* stop repeats early afer this many cycles have elapsed */ -#define STOP_CYCLES (1024 * 1024 * 500) - -/* custom scaling factors for benchmarks, these are used to make sure each - * benchmark approximately takes the same amount of time. */ - -#define SCALE_mandelbrot(N) ((N) / 10) -#define SCALE_mergelines(N) ((N) / 10) - -/* benchmark specific configurations */ -#define mandelbrot_ITER 100 diff --git a/tests/rvv_bench/_include/nolibc.h b/tests/rvv_bench/_include/nolibc.h deleted file mode 100644 index 88f31d136..000000000 --- a/tests/rvv_bench/_include/nolibc.h +++ /dev/null @@ -1,80 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include -#include -#include - -#if __riscv_xlen == 32 -typedef uint32_t ux; -typedef float fx; -#define IF64(...) -#elif __riscv_xlen == 64 -typedef uint64_t ux; -typedef double fx; -#define IF64(...) __VA_ARGS__ -#else -#error "unsupported XLEN" -#endif -#define ARR_LEN(x) (sizeof x / sizeof *(x)) - -static void memwrite(void const *ptr, size_t len) { - fwrite(ptr, 1, len, stdout); -} - -static size_t memread(void *ptr, size_t len) { - return fread(ptr, 1, len, stdin); -} - -static inline ux rv_cycles(void) { - ux cycle; - __asm volatile("csrr %0, mcycle" : "=r"(cycle)); - return cycle; -} - -static void memswap(void *a, void *b, size_t size) { - unsigned char *A = (unsigned char *)a, *B = (unsigned char *)b; - unsigned char *aEnd = A + size; - while (A < aEnd) { - unsigned char temp = *A; - *A++ = *B; - *B++ = temp; - } -} - -static ux usqrt(ux y) { - ux L = 0, R = y + 1; - while (L != R - 1) { - ux M = (L + R) / 2; - if (M * M <= y) - L = M; - else - R = M; - } - return L; -} - -static ux uhash(ux x) { -#if __riscv_xlen == 32 - /* MurmurHash3 32-bit finalizer */ - x ^= x >> 16; - x *= 0x85ebca6b; - x ^= x >> 13; - x *= 0xc2b2ae35; - x ^= x >> 16; -#else - /* splitmix64 finalizer */ - x ^= x >> 30; - x *= 0xbf58476d1ce4e5b9U; - x ^= x >> 27; - x *= 0x94d049bb133111ebU; - x ^= x >> 31; -#endif - return x; -} - -#define IFHOSTED(...) __VA_ARGS__ diff --git a/tests/rvv_bench/_include/template.S b/tests/rvv_bench/_include/template.S deleted file mode 100644 index eabdd5017..000000000 --- a/tests/rvv_bench/_include/template.S +++ /dev/null @@ -1,80 +0,0 @@ -#define HAS_RVV_1_0 1 -#include "config.h" -.text -.balign 8 - -#define CAT_(a,b) a##b -#define CAT(a,b) CAT_(a,b) - -#define STR(x) #x -#define STRe(x) STR(x) - -#define MX_N 0 -#include STRe(INC) - -#undef MX_N - -#define MX_N 1 -#define MX8(x) x##m8 -#define MX4(x) x##m4 -#define MX2(x) x##m2 -#define MX(x) x##m1 -#if HAS_RVV_1_0 -#define MXf2(x) x##mf2 -#define MXf4(x) x##mf4 -# define MXf8(x) x##mf8 -#endif -#include STRe(INC) - -#undef MX_N -#undef MX8 -#undef MX4 -#undef MX2 -#undef MX -#undef MXf2 -#undef MXf4 -#undef MXf8 - -#define MX_N 2 -#define MX4(x) x##m8 -#define MX2(x) x##m4 -#define MX(x) x##m2 -#define MXf2(x) x##m1 -#if HAS_RVV_1_0 -#define MXf4(x) x##mf2 -# define MXf8(x) x##mf4 -#endif -#include STRe(INC) - -#undef MX_N -#undef MX4 -#undef MX2 -#undef MX -#undef MXf2 -#undef MXf4 -#undef MXf8 - -#define MX_N 4 -#define MX2(x) x##m8 -#define MX(x) x##m4 -#define MXf2(x) x##m2 -#define MXf4(x) x##m1 -#if HAS_RVV_1_0 -# define MXf8(x) x##mf2 -#endif -#include STRe(INC) - -#undef MX_N -#undef MX2 -#undef MX -#undef MXf2 -#undef MXf4 -#undef MXf8 - -#define MX_N 8 -#define MX(x) x##m8 -#define MXf2(x) x##m4 -#define MXf4(x) x##m2 -#define MXf8(x) x##m1 -#include STRe(INC) - diff --git a/tests/rvv_bench/_include/thirdparty/boring.c b/tests/rvv_bench/_include/thirdparty/boring.c deleted file mode 100644 index e7cea237e..000000000 --- a/tests/rvv_bench/_include/thirdparty/boring.c +++ /dev/null @@ -1,383 +0,0 @@ -/* Copyright (c) 2014, Google Inc. - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY - * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ - -// Adapted from the public domain, estream code by D. Bernstein. - -#include "boring.h" - - -extern void *memcpy(void *restrict dest, void const *restrict src, size_t n); - -#define U8TO32_LITTLE(p) \ - (((uint32_t)((p)[0])) | ((uint32_t)((p)[1]) << 8) | \ - ((uint32_t)((p)[2]) << 16) | ((uint32_t)((p)[3]) << 24)) - -// sigma contains the ChaCha constants, which happen to be an ASCII string. -static const uint8_t sigma[16] = { 'e', 'x', 'p', 'a', 'n', 'd', ' ', '3', - '2', '-', 'b', 'y', 't', 'e', ' ', 'k' }; - -#define ROTATE(v, n) (((v) << (n)) | ((v) >> (32 - (n)))) - -// QUARTERROUND updates a, b, c, d with a ChaCha "quarter" round. -#define QUARTERROUND(a, b, c, d) \ - x[a] += x[b]; x[d] = ROTATE(x[d] ^ x[a], 16); \ - x[c] += x[d]; x[b] = ROTATE(x[b] ^ x[c], 12); \ - x[a] += x[b]; x[d] = ROTATE(x[d] ^ x[a], 8); \ - x[c] += x[d]; x[b] = ROTATE(x[b] ^ x[c], 7); - -#define U32TO8_LITTLE(p, v) \ - { \ - (p)[0] = (v >> 0) & 0xff; \ - (p)[1] = (v >> 8) & 0xff; \ - (p)[2] = (v >> 16) & 0xff; \ - (p)[3] = (v >> 24) & 0xff; \ - } - -// chacha_core performs 20 rounds of ChaCha on the input words in -// |input| and writes the 64 output bytes to |output|. -static void chacha_core(uint8_t output[64], const uint32_t input[16]) { - uint32_t x[16]; - int i; - - memcpy(x, input, sizeof(uint32_t) * 16); - for (i = 20; i > 0; i -= 2) { - QUARTERROUND(0, 4, 8, 12) - QUARTERROUND(1, 5, 9, 13) - QUARTERROUND(2, 6, 10, 14) - QUARTERROUND(3, 7, 11, 15) - QUARTERROUND(0, 5, 10, 15) - QUARTERROUND(1, 6, 11, 12) - QUARTERROUND(2, 7, 8, 13) - QUARTERROUND(3, 4, 9, 14) - } - - for (i = 0; i < 16; ++i) { - x[i] += input[i]; - } - for (i = 0; i < 16; ++i) { - U32TO8_LITTLE(output + 4 * i, x[i]); - } -} - -void boring_chacha20(uint8_t *out, const uint8_t *in, size_t in_len, - const uint8_t key[32], const uint8_t nonce[12], - uint32_t counter) { - - uint32_t input[16]; - uint8_t buf[64]; - size_t todo, i; - - input[0] = U8TO32_LITTLE(sigma + 0); - input[1] = U8TO32_LITTLE(sigma + 4); - input[2] = U8TO32_LITTLE(sigma + 8); - input[3] = U8TO32_LITTLE(sigma + 12); - - input[4] = U8TO32_LITTLE(key + 0); - input[5] = U8TO32_LITTLE(key + 4); - input[6] = U8TO32_LITTLE(key + 8); - input[7] = U8TO32_LITTLE(key + 12); - - input[8] = U8TO32_LITTLE(key + 16); - input[9] = U8TO32_LITTLE(key + 20); - input[10] = U8TO32_LITTLE(key + 24); - input[11] = U8TO32_LITTLE(key + 28); - - input[12] = counter; - input[13] = U8TO32_LITTLE(nonce + 0); - input[14] = U8TO32_LITTLE(nonce + 4); - input[15] = U8TO32_LITTLE(nonce + 8); - - while (in_len > 0) { - todo = sizeof(buf); - if (in_len < todo) { - todo = in_len; - } - - chacha_core(buf, input); - for (i = 0; i < todo; i++) { - out[i] = in[i] ^ buf[i]; - } - - out += todo; - in += todo; - in_len -= todo; - - input[12]++; - } -} - -///// poly1305 - -static uint32_t U8TO32_LE(const uint8_t *m) { - uint32_t r; - memcpy(&r, m, sizeof(r)); - return r; -} - -static void U32TO8_LE(uint8_t *m, uint32_t v) { - memcpy(m, &v, sizeof(v)); -} - - -static uint64_t mul32x32_64(uint32_t a, uint32_t b) { return (uint64_t)a * b; } - -struct poly1305_state_st { - uint32_t r0, r1, r2, r3, r4; - uint32_t s1, s2, s3, s4; - uint32_t h0, h1, h2, h3, h4; - uint8_t buf[16]; - unsigned int buf_used; - uint8_t key[16]; -}; - -static inline struct poly1305_state_st *poly1305_aligned_state( - poly1305_state *state) { - return (struct poly1305_state_st *)(((uintptr_t)state + 63) & ~63); -} - -static void poly1305_update(struct poly1305_state_st *state, const uint8_t *in, - size_t len) { - uint32_t t0, t1, t2, t3; - uint64_t t[5]; - uint32_t b; - uint64_t c; - size_t j; - uint8_t mp[16]; - - if (len < 16) { - goto poly1305_donna_atmost15bytes; - } - - poly1305_donna_16bytes: - t0 = U8TO32_LE(in); - t1 = U8TO32_LE(in + 4); - t2 = U8TO32_LE(in + 8); - t3 = U8TO32_LE(in + 12); - - in += 16; - len -= 16; - - state->h0 += t0 & 0x3ffffff; - state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff; - state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff; - state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff; - state->h4 += (t3 >> 8) | (1 << 24); - - poly1305_donna_mul: - t[0] = mul32x32_64(state->h0, state->r0) + mul32x32_64(state->h1, state->s4) + - mul32x32_64(state->h2, state->s3) + mul32x32_64(state->h3, state->s2) + - mul32x32_64(state->h4, state->s1); - t[1] = mul32x32_64(state->h0, state->r1) + mul32x32_64(state->h1, state->r0) + - mul32x32_64(state->h2, state->s4) + mul32x32_64(state->h3, state->s3) + - mul32x32_64(state->h4, state->s2); - t[2] = mul32x32_64(state->h0, state->r2) + mul32x32_64(state->h1, state->r1) + - mul32x32_64(state->h2, state->r0) + mul32x32_64(state->h3, state->s4) + - mul32x32_64(state->h4, state->s3); - t[3] = mul32x32_64(state->h0, state->r3) + mul32x32_64(state->h1, state->r2) + - mul32x32_64(state->h2, state->r1) + mul32x32_64(state->h3, state->r0) + - mul32x32_64(state->h4, state->s4); - t[4] = mul32x32_64(state->h0, state->r4) + mul32x32_64(state->h1, state->r3) + - mul32x32_64(state->h2, state->r2) + mul32x32_64(state->h3, state->r1) + - mul32x32_64(state->h4, state->r0); - - state->h0 = (uint32_t)t[0] & 0x3ffffff; - c = (t[0] >> 26); - t[1] += c; - state->h1 = (uint32_t)t[1] & 0x3ffffff; - b = (uint32_t)(t[1] >> 26); - t[2] += b; - state->h2 = (uint32_t)t[2] & 0x3ffffff; - b = (uint32_t)(t[2] >> 26); - t[3] += b; - state->h3 = (uint32_t)t[3] & 0x3ffffff; - b = (uint32_t)(t[3] >> 26); - t[4] += b; - state->h4 = (uint32_t)t[4] & 0x3ffffff; - b = (uint32_t)(t[4] >> 26); - state->h0 += b * 5; - - if (len >= 16) { - goto poly1305_donna_16bytes; - } - - // final bytes - poly1305_donna_atmost15bytes: - if (!len) { - return; - } - - for (j = 0; j < len; j++) { - mp[j] = in[j]; - } - mp[j++] = 1; - for (; j < 16; j++) { - mp[j] = 0; - } - len = 0; - - t0 = U8TO32_LE(mp + 0); - t1 = U8TO32_LE(mp + 4); - t2 = U8TO32_LE(mp + 8); - t3 = U8TO32_LE(mp + 12); - - state->h0 += t0 & 0x3ffffff; - state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff; - state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff; - state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff; - state->h4 += (t3 >> 8); - - goto poly1305_donna_mul; -} - -void boring_poly1305_init(poly1305_state *statep, const uint8_t key[32]) { - struct poly1305_state_st *state = poly1305_aligned_state(statep); - uint32_t t0, t1, t2, t3; - - t0 = U8TO32_LE(key + 0); - t1 = U8TO32_LE(key + 4); - t2 = U8TO32_LE(key + 8); - t3 = U8TO32_LE(key + 12); - - // precompute multipliers - state->r0 = t0 & 0x3ffffff; - t0 >>= 26; - t0 |= t1 << 6; - state->r1 = t0 & 0x3ffff03; - t1 >>= 20; - t1 |= t2 << 12; - state->r2 = t1 & 0x3ffc0ff; - t2 >>= 14; - t2 |= t3 << 18; - state->r3 = t2 & 0x3f03fff; - t3 >>= 8; - state->r4 = t3 & 0x00fffff; - - state->s1 = state->r1 * 5; - state->s2 = state->r2 * 5; - state->s3 = state->r3 * 5; - state->s4 = state->r4 * 5; - - // init state - state->h0 = 0; - state->h1 = 0; - state->h2 = 0; - state->h3 = 0; - state->h4 = 0; - - state->buf_used = 0; - memcpy(state->key, key + 16, sizeof(state->key)); -} - -void boring_poly1305_update(poly1305_state *statep, const uint8_t *in, - size_t in_len) { - unsigned int i; - struct poly1305_state_st *state = poly1305_aligned_state(statep); - - if (state->buf_used) { - unsigned todo = 16 - state->buf_used; - if (todo > in_len) { - todo = (unsigned)in_len; - } - for (i = 0; i < todo; i++) { - state->buf[state->buf_used + i] = in[i]; - } - state->buf_used += todo; - in_len -= todo; - in += todo; - - if (state->buf_used == 16) { - poly1305_update(state, state->buf, 16); - state->buf_used = 0; - } - } - - if (in_len >= 16) { - size_t todo = in_len & ~0xf; - poly1305_update(state, in, todo); - in += todo; - in_len &= 0xf; - } - - if (in_len) { - for (i = 0; i < in_len; i++) { - state->buf[i] = in[i]; - } - state->buf_used = (unsigned)in_len; - } -} - -void boring_poly1305_finish(poly1305_state *statep, uint8_t mac[16]) { - struct poly1305_state_st *state = poly1305_aligned_state(statep); - uint64_t f0, f1, f2, f3; - uint32_t g0, g1, g2, g3, g4; - uint32_t b, nb; - - if (state->buf_used) { - poly1305_update(state, state->buf, state->buf_used); - } - - b = state->h0 >> 26; - state->h0 = state->h0 & 0x3ffffff; - state->h1 += b; - b = state->h1 >> 26; - state->h1 = state->h1 & 0x3ffffff; - state->h2 += b; - b = state->h2 >> 26; - state->h2 = state->h2 & 0x3ffffff; - state->h3 += b; - b = state->h3 >> 26; - state->h3 = state->h3 & 0x3ffffff; - state->h4 += b; - b = state->h4 >> 26; - state->h4 = state->h4 & 0x3ffffff; - state->h0 += b * 5; - - g0 = state->h0 + 5; - b = g0 >> 26; - g0 &= 0x3ffffff; - g1 = state->h1 + b; - b = g1 >> 26; - g1 &= 0x3ffffff; - g2 = state->h2 + b; - b = g2 >> 26; - g2 &= 0x3ffffff; - g3 = state->h3 + b; - b = g3 >> 26; - g3 &= 0x3ffffff; - g4 = state->h4 + b - (1 << 26); - - b = (g4 >> 31) - 1; - nb = ~b; - state->h0 = (state->h0 & nb) | (g0 & b); - state->h1 = (state->h1 & nb) | (g1 & b); - state->h2 = (state->h2 & nb) | (g2 & b); - state->h3 = (state->h3 & nb) | (g3 & b); - state->h4 = (state->h4 & nb) | (g4 & b); - - f0 = ((state->h0) | (state->h1 << 26)) + (uint64_t)U8TO32_LE(&state->key[0]); - f1 = ((state->h1 >> 6) | (state->h2 << 20)) + - (uint64_t)U8TO32_LE(&state->key[4]); - f2 = ((state->h2 >> 12) | (state->h3 << 14)) + - (uint64_t)U8TO32_LE(&state->key[8]); - f3 = ((state->h3 >> 18) | (state->h4 << 8)) + - (uint64_t)U8TO32_LE(&state->key[12]); - - U32TO8_LE(&mac[0], f0); - f1 += (f0 >> 32); - U32TO8_LE(&mac[4], f1); - f2 += (f1 >> 32); - U32TO8_LE(&mac[8], f2); - f3 += (f2 >> 32); - U32TO8_LE(&mac[12], f3); -} diff --git a/tests/rvv_bench/_include/thirdparty/boring.h b/tests/rvv_bench/_include/thirdparty/boring.h deleted file mode 100644 index 3fb2300b6..000000000 --- a/tests/rvv_bench/_include/thirdparty/boring.h +++ /dev/null @@ -1,31 +0,0 @@ -/* Copyright (c) 2014, Google Inc. - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY - * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ - -#include -#include - -void boring_chacha20(uint8_t *out, const uint8_t *in, - size_t in_len, const uint8_t key[32], - const uint8_t nonce[12], uint32_t counter); - -typedef uint8_t poly1305_state[512]; - -void boring_poly1305_init(poly1305_state *state, - const uint8_t key[32]); - -void boring_poly1305_update(poly1305_state *state, - const uint8_t *in, size_t in_len); - -void boring_poly1305_finish(poly1305_state *state, - uint8_t mac[16]); diff --git a/tests/rvv_bench/_include/thirdparty/rvv-rollback.S b/tests/rvv_bench/_include/thirdparty/rvv-rollback.S deleted file mode 100644 index e941604bb..000000000 --- a/tests/rvv_bench/_include/thirdparty/rvv-rollback.S +++ /dev/null @@ -1,255 +0,0 @@ -# rvv-rollback.S -- A minimal benchmarking library -# Olaf Bernstein -# Distributed under the MIT license, see license at the end of the file. -# New versions available at https://gist.github.com/camel-cdr/cfd9ba2b8754b521edf4892fe19c7031 -# Conversions taken from https://github.com/RISCVtestbed/rvv-rollback - -.macro vle32.v a:vararg - vlw.v \a -.endm -.macro vle16.v a:vararg - vlh.v \a -.endm -.macro vle8.v a:vararg - vlb.v \a -.endm -.macro vle32ff.v a:vararg - vlwff.v \a -.endm -.macro vle16ff.v a:vararg - vlhff.v \a -.endm -.macro vle8ff.v a:vararg - vlbff.v \a -.endm -.macro vse32.v a:vararg - vsw.v \a -.endm -.macro vse16.v a:vararg - vsh.v \a -.endm -.macro vse8.v a:vararg - vsb.v \a -.endm -.macro vluxei32.v a:vararg - vlxw.v \a -.endm -.macro vluxei16.v a:vararg - vlxh.v \a -.endm -.macro vluxei8.v a:vararg - vlxb.v \a -.endm -.macro vsuxei32.v a:vararg - vsuxw.v \a -.endm -.macro vsuxei16.v a:vararg - vsuxh.v \a -.endm -.macro vsuxei8.v a:vararg - vsuxb.v \a -.endm -.macro vlse32.v a:vararg - vlsw.v \a -.endm -.macro vlse16.v a:vararg - vlsh.v \a -.endm -.macro vlse8.v a:vararg - vlsb.v \a -.endm -.macro vsse32.v a:vararg - vssw.v \a -.endm -.macro vsse16.v a:vararg - vssh.v \a -.endm -.macro vsse8.v a:vararg - vssb.v \a -.endm -.macro vloxei32.v a:vararg - vlxw.v \a -.endm -.macro vloxei16.v a:vararg - vlxh.v \a -.endm -.macro vloxei8.v a:vararg - vlxb.v \a -.endm -.macro vsoxei32.v a:vararg - vsxw.v \a -.endm -.macro vsoxei16.v a:vararg - vsxh.v \a -.endm -.macro vsoxei8.v a:vararg - vsxb.v \a -.endm -.macro vfncvt.xu.f.w a:vararg - vfncvt.xu.f.v \a -.endm -.macro vfncvt.x.f.w a:vararg - vfncvt.x.f.v \a -.endm -.macro vfncvt.f.xu.w a:vararg - vfncvt.f.xu.v \a -.endm -.macro vfncvt.f.x.w a:vararg - vfncvt.f.x.v \a -.endm -.macro vfncvt.f.f.w a:vararg - vfncvt.f.f.v \a -.endm -.macro vfredusum a:vararg - vfredsum \a -.endm -.macro vfwredusum.vs a:vararg - vfwredsum.vs \a -.endm -.macro vnclip.wv a:vararg - vnclip.vv \a -.endm -.macro vnclip.wx a:vararg - vnclip.vx \a -.endm -.macro vnclip.wi a:vararg - vnclip.vi \a -.endm -.macro vnclipu.wv a:vararg - vnclipu.vv \a -.endm -.macro vnclipu.wx a:vararg - vnclipu.vx \a -.endm -.macro vnclipu.wi a:vararg - vnclipu.vi \a -.endm -.macro vnsra.wv a:vararg - vnsra.vv \a -.endm -.macro vnsra.wx a:vararg - vnsra.vx \a -.endm -.macro vnsra.wi a:vararg - vnsra.vi \a -.endm -.macro vnsrl.wv a:vararg - vnsrl.vv \a -.endm -.macro vnsrl.wx a:vararg - vnsrl.vx \a -.endm -.macro vnsrl.wi a:vararg - vnsrl.vi \a -.endm -.macro vmandn.mm a:vararg - vmandnot.mm \a -.endm -.macro vmorn.mm a:vararg - vmornot.mm \a -.endm -.macro vmmv.m a:vararg - vmcpy.m \a -.endm -.macro vcpop.m a:vararg - vmpopc.m \a -.endm -.macro vpop.m a:vararg - vmpopc.m \a -.endm -.macro vfirst.m a:vararg - vmfirst.m \a -.endm - -.macro define_for_all_nf prefix suffix prefix2 suffix2 - .macro \prefix\()2\suffix a:vararg - \prefix2\()2\suffix2 \a - .endm - .macro \prefix\()3\suffix a:vararg - \prefix2\()3\suffix2 \a - .endm - .macro \prefix\()4\suffix a:vararg - \prefix2\()4\suffix2 \a - .endm - .macro \prefix\()5\suffix a:vararg - \prefix2\()5\suffix2 \a - .endm - .macro \prefix\()6\suffix a:vararg - \prefix2\()6\suffix2 \a - .endm - .macro \prefix\()7\suffix a:vararg - \prefix2\()7\suffix2 \a - .endm - .macro \prefix\()8\suffix a:vararg - \prefix2\()8\suffix2 \a - .endm -.endm -define_for_all_nf vlseg e8.v vlseg b.v -define_for_all_nf vlseg e16.v vlseg h.v -define_for_all_nf vlseg e32.v vlseg w.v - -define_for_all_nf vsseg e8.v vsseg b.v -define_for_all_nf vsseg e16.v vsseg h.v -define_for_all_nf vsseg e32.v vsseg w.v - -define_for_all_nf vlsseg e8.v vlsseg bu.v -define_for_all_nf vlsseg e16.v vlsseg hu.v -define_for_all_nf vlsseg e32.v vlsseg wu.v - -define_for_all_nf vssseg e8.v vssseg b.v -define_for_all_nf vssseg e16.v vssseg h.v -define_for_all_nf vssseg e32.v vssseg w.v - -define_for_all_nf vloxseg e8.v vlxseg b.v -define_for_all_nf vloxseg e16.v vlxseg h.v -define_for_all_nf vloxseg e32.v vlxseg w.v -define_for_all_nf vluxseg e8.v vlxseg b.v -define_for_all_nf vluxseg e16.v vlxseg h.v -define_for_all_nf vluxseg e32.v vlxseg w.v - -define_for_all_nf vsoxseg e8.v vsxseg b.v -define_for_all_nf vsoxseg e16.v vsxseg h.v -define_for_all_nf vsoxseg e32.v vsxseg w.v -define_for_all_nf vsuxseg e8.v vsxseg b.v -define_for_all_nf vsuxseg e16.v vsxseg h.v -define_for_all_nf vsuxseg e32.v vsxseg w.v - - -.macro vsetvl0p7 rd, rs1, rs2, T=1, M=1 - vsetvl \rd, \rs1, \rs2 -.endm -.macro vsetvli0p7 rd, rs1, e=e8, m=m1, T=1, M=1 - .ifc \m, mf2 - NOT SUPPORTED IN rvv0.7 - .endif - .ifc \m, mf4 - NOT SUPPORTED IN rvv0.7 - .endif - .ifc \m, mf8 - NOT SUPPORTED IN rvv0.7 - .endif - vsetvli \rd, \rs1, \e, \m -.endm - -#define vsetvl vsetvl0p7 -#define vsetvli vsetvli0p7 - - - -# Copyright (c) 2023 Olaf Berstein -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - diff --git a/tests/rvv_bench/ascii_to_utf16/ascii_to_utf16.S b/tests/rvv_bench/ascii_to_utf16/ascii_to_utf16.S deleted file mode 100644 index b363d7830..000000000 --- a/tests/rvv_bench/ascii_to_utf16/ascii_to_utf16.S +++ /dev/null @@ -1,68 +0,0 @@ -#ifdef MX - -#if MX_N == 4 || MX_N == 2 || MX_N == 1 - -.global MX(ascii_to_utf16_rvv_vsseg_) -.type MX(ascii_to_utf16_rvv_vsseg_), @function -MX(ascii_to_utf16_rvv_vsseg_): - vsetvli t0, x0, e8, MX2(), ta, ma - vmv.v.i v0, 0 -1: - vsetvli t0, a2, e8, MX(), ta, ma - vle8.v v0, (a1) - vsseg2e8.v v0, (a0) - add a1, a1, t0 - sub a2, a2, t0 - slli t0, t0, 1 - add a0, a0, t0 - bnez a2, 1b - ret - - - -.global MX(ascii_to_utf16_rvv_ext_) -.type MX(ascii_to_utf16_rvv_ext_), @function -MX(ascii_to_utf16_rvv_ext_): -1: - vsetvli t0, a2, e8, MX(), ta, ma - vle8.v v0, (a1) -#if HAS_RVV_1_0 - vsetvli x0, x0, e16, MX2(), ta, ma - vzext.vf2 v8, v0 -#else - vwaddu.vx v8, v0, x0 - vsetvli x0, a2, e16, MX2(), ta, ma -#endif - vse16.v v8, (a0) - add a1, a1, t0 - sub a2, a2, t0 - slli t0, t0, 1 - add a0, a0, t0 - bnez a2, 1b - ret - - -.global MX(ascii_to_utf16_rvv_vss_) -.type MX(ascii_to_utf16_rvv_vss_), @function -MX(ascii_to_utf16_rvv_vss_): - vsetvli t0, x0, e8, MX2(), ta, ma - vmv.v.i v0, 0 - li a3, 2 -1: - vsetvli t0, a2, e16, MX2(), ta, ma - vse16.v v0, (a0) - - vsetvli t0, a2, e8, MX(), ta, ma - vle8.v v8, (a1) - vsse8.v v8, (a0), a3 - - add a1, a1, t0 - sub a2, a2, t0 - slli t0, t0, 1 - add a0, a0, t0 - bnez a2, 1b - ret - -#endif -#endif - diff --git a/tests/rvv_bench/ascii_to_utf16/ascii_to_utf16.c b/tests/rvv_bench/ascii_to_utf16/ascii_to_utf16.c deleted file mode 100644 index fc3fba747..000000000 --- a/tests/rvv_bench/ascii_to_utf16/ascii_to_utf16.c +++ /dev/null @@ -1,63 +0,0 @@ -#include "bench.h" - -void -ascii_to_utf16_scalar(uint16_t *restrict dest, uint8_t const *restrict src, size_t len) -{ - while (len--) *dest++ = *src++, BENCH_CLOBBER(); -} - -void -ascii_to_utf16_scalar_autovec(uint16_t *restrict dest, uint8_t const *restrict src, size_t len) -{ - while (len--) *dest++ = *src++; -} - -#define IMPLS(f) \ - f(scalar) f(scalar_autovec) \ - f(rvv_ext_m1) f(rvv_ext_m2) f(rvv_ext_m4) \ - f(rvv_vsseg_m1) f(rvv_vsseg_m2) f(rvv_vsseg_m4) \ - f(rvv_vss_m1) f(rvv_vss_m2) f(rvv_vss_m4) \ - -typedef void Func(uint16_t *restrict dest, uint8_t const *restrict src, size_t len); - -#define DECLARE(f) extern Func ascii_to_utf16_##f; -IMPLS(DECLARE) - -#define EXTRACT(f) { #f, &ascii_to_utf16_##f }, -Impl impls[] = { IMPLS(EXTRACT) }; - -uint16_t *dest; -uint8_t *src; - -void init(void) { } - -ux checksum(size_t n) { - ux sum = 0; - for (size_t i = 0; i < n+9; ++i) - sum = uhash(sum) + dest[i]; - return sum; -} - -void common(size_t n, size_t dOff, size_t sOff) { - dest = (uint16_t*)mem + dOff/2; - src = (uint8_t*)(dest + 9 + MAX_MEM/3) + sOff; - memrand(src, n+9); - for (size_t i = 0; i < n+9; ++i) src[i] |= 0x7F; - memset(dest, 1, (n+9)*2); -} - -BENCH(base) { - common(n, urand() & 255, urand() & 255); - TIME f(dest, src, n); -} BENCH_END - -BENCH(aligned) { - common(n, 0, 0); - TIME f(dest, src, n); -} BENCH_END - -Bench benches[] = { - { MAX_MEM/3 - 512-9*2, "ascii to utf16", bench_base }, - { MAX_MEM/3 - 512-9*2, "ascii to utf16 aligned", bench_aligned }, -}; BENCH_MAIN(impls, benches) - diff --git a/tests/rvv_bench/ascii_to_utf32/ascii_to_utf32.S b/tests/rvv_bench/ascii_to_utf32/ascii_to_utf32.S deleted file mode 100644 index 9cf21fad3..000000000 --- a/tests/rvv_bench/ascii_to_utf32/ascii_to_utf32.S +++ /dev/null @@ -1,66 +0,0 @@ -#ifdef MX - -#if MX_N == 2 || MX_N == 1 - -.global MX(ascii_to_utf32_rvv_vsseg_) -MX(ascii_to_utf32_rvv_vsseg_): - vsetvli t0, x0, e8, MX4(), ta, ma - vmv.v.i v0, 0 -1: - vsetvli t0, a2, e8, MX(), ta, ma - vle8.v v0, (a1) - vsseg4e8.v v0, (a0) - add a1, a1, t0 - sub a2, a2, t0 - slli t0, t0, 2 - add a0, a0, t0 - bnez a2, 1b - ret - - -.global MX(ascii_to_utf32_rvv_ext_) -MX(ascii_to_utf32_rvv_ext_): -1: - vsetvli t0, a2, e8, MX(), ta, ma - vle8.v v0, (a1) -#if HAS_RVV_1_0 - vsetvli x0, x0, e32, MX4(), ta, ma - vzext.vf4 v8, v0 -#else - vwaddu.vx v16, v0, x0 - vsetvli x0, a2, e16, MX2(), ta, ma - vwaddu.vx v8, v16, x0 - vsetvli x0, a2, e32, MX4(), ta, ma -#endif - vse32.v v8, (a0) - add a1, a1, t0 - sub a2, a2, t0 - slli t0, t0, 2 - add a0, a0, t0 - bnez a2, 1b - ret - - -.global MX(ascii_to_utf32_rvv_vss_) -MX(ascii_to_utf32_rvv_vss_): - vsetvli t0, x0, e8, MX4(), ta, ma - vmv.v.i v0, 0 - li a3, 4 -1: - vsetvli t0, a2, e32, MX4(), ta, ma - vse32.v v0, (a0) - - vsetvli t0, a2, e8, MX(), ta, ma - vle8.v v8, (a1) - vsse8.v v8, (a0), a3 - - add a1, a1, t0 - sub a2, a2, t0 - slli t0, t0, 2 - add a0, a0, t0 - bnez a2, 1b - ret - -#endif -#endif - diff --git a/tests/rvv_bench/ascii_to_utf32/ascii_to_utf32.c b/tests/rvv_bench/ascii_to_utf32/ascii_to_utf32.c deleted file mode 100644 index 968493037..000000000 --- a/tests/rvv_bench/ascii_to_utf32/ascii_to_utf32.c +++ /dev/null @@ -1,63 +0,0 @@ -#include "bench.h" - -void -ascii_to_utf32_scalar(uint32_t *restrict dest, uint8_t const *restrict src, size_t len) -{ - while (len--) *dest++ = *src++, BENCH_CLOBBER(); -} - -void -ascii_to_utf32_scalar_autovec(uint32_t *restrict dest, uint8_t const *restrict src, size_t len) -{ - while (len--) *dest++ = *src++; -} - -#define IMPLS(f) \ - f(scalar) f(scalar_autovec) \ - f(rvv_ext_m1) f(rvv_ext_m2) \ - f(rvv_vsseg_m1) f(rvv_vsseg_m2) \ - f(rvv_vss_m1) f(rvv_vss_m2) \ - -typedef void Func(uint32_t *restrict dest, uint8_t const *restrict src, size_t len); - -#define DECLARE(f) extern Func ascii_to_utf32_##f; -IMPLS(DECLARE) - -#define EXTRACT(f) { #f, &ascii_to_utf32_##f }, -Impl impls[] = { IMPLS(EXTRACT) }; - -uint32_t *dest; -uint8_t *src; - -void init(void) { } - -ux checksum(size_t n) { - ux sum = 0; - for (size_t i = 0; i < n+9; ++i) - sum = uhash(sum) + dest[i]; - return sum; -} - -void common(size_t n, size_t dOff, size_t sOff) { - dest = (uint32_t*)mem + dOff/4; - src = (uint8_t*)(dest + 9 + MAX_MEM/5) + sOff; - memrand(src, n+9); - for (size_t i = 0; i < n+9; ++i) src[i] |= 0x7F; - memset(dest, 1, (n+9)*4); -} - -BENCH(base) { - common(n, urand() & 255, urand() & 255); - TIME f(dest, src, n); -} BENCH_END - -BENCH(aligned) { - common(n, 0, 0); - TIME f(dest, src, n); -} BENCH_END - -Bench benches[] = { - { MAX_MEM/5 - 512-9*2, "ascii to utf32", bench_base }, - { MAX_MEM/5 - 512-9*2, "ascii to utf32 aligned", bench_aligned }, -}; BENCH_MAIN(impls, benches) - diff --git a/tests/rvv_bench/byteswap/byteswap.S b/tests/rvv_bench/byteswap/byteswap.S deleted file mode 100644 index 79154ef68..000000000 --- a/tests/rvv_bench/byteswap/byteswap.S +++ /dev/null @@ -1,81 +0,0 @@ -/* - * TODO: This currently only works for VLEN<=256. - * I think rvv 1.0 should only vrgatherei16.vv here in the future. - */ - -#ifdef MX - - -# a0 = ptr, a1 = len -.global MX(byteswap32_rvv_gather_) -MX(byteswap32_rvv_gather_): - vsetvli t0, x0, e8, MX(), ta, ma - vid.v v0 - vand.vi v8, v0, 3 - vrsub.vi v8, v8, 3 - vsrl.vi v0, v0, 2 - vsll.vi v0, v0, 2 - vadd.vv v0, v0, v8 # i/8*8 + (7-1%8) -1: - vsetvli t0, a1, e32, MX(), ta, ma - vle32.v v8, (a0) - slli t1, t0, 2 - vsetvli x0, t1, e8, MX(), ta, ma - vrgather.vv v16, v8, v0 - vsetvli x0, t0, e32, MX(), ta, ma - vse32.v v16, (a0) - sub a1, a1, t0 - add a0, a0, t1 - bnez a1, 1b - ret -#endif - -#if MX_N == 2 - -.macro byteswap32_rvv_m1_gathers n - .global byteswap32_rvv_m1_gathers_m\n - byteswap32_rvv_m1_gathers_m\n: - vsetvli t0, x0, e8, m1, ta, ma - vid.v v0 - vand.vi v8, v0, 3 - vrsub.vi v8, v8, 3 - vsrl.vi v0, v0, 2 - vsll.vi v0, v0, 2 - vadd.vv v0, v0, v8 # i/8*8 + (7-1%8) - 1: - vsetvli t0, a1, e32, m\n, ta, ma - vle32.v v8, (a0) - vsetvli t1, x0, e8, m1, ta, ma - vrgather.vv v16, v8, v0 - .ifge \n-2 - vrgather.vv v17, v9, v0 - .ifge \n-4 - vrgather.vv v18, v10, v0 - vrgather.vv v19, v11, v0 - .ifge \n-8 - vrgather.vv v20, v12, v0 - vrgather.vv v21, v13, v0 - vrgather.vv v22, v14, v0 - vrgather.vv v23, v15, v0 - .endif - .endif - .endif - vsetvli x0, t0, e32, m\n, ta, ma - vse32.v v16, (a0) - sub a1, a1, t0 - slli t0, t0, 2 - add a0, a0, t0 - bnez a1, 1b - ret -.endm - -byteswap32_rvv_m1_gathers 2 -#endif -#if MX_N == 4 -byteswap32_rvv_m1_gathers 4 -#endif -#if MX_N == 8 -byteswap32_rvv_m1_gathers 8 -#endif - - diff --git a/tests/rvv_bench/byteswap/byteswap.c b/tests/rvv_bench/byteswap/byteswap.c deleted file mode 100644 index dff204b72..000000000 --- a/tests/rvv_bench/byteswap/byteswap.c +++ /dev/null @@ -1,79 +0,0 @@ -#include "bench.h" - -void -byteswap32_scalar(uint32_t *ptr, size_t n) -{ - for (uint8_t *p = (uint8_t*)ptr; n--; p += 4) { - uint8_t p0 = p[0], p1 = p[1], p2 = p[2], p3 = p[3]; - p[3] = p0; BENCH_CLOBBER(); - p[2] = p1; BENCH_CLOBBER(); - p[1] = p2; BENCH_CLOBBER(); - p[0] = p3; BENCH_CLOBBER(); - } -} - -void -byteswap32_scalar_autovec(uint32_t *ptr, size_t n) -{ - for (uint8_t *p = (uint8_t*)ptr; n--; p += 4) { - uint8_t p0 = p[0], p1 = p[1], p2 = p[2], p3 = p[3]; - p[3] = p0; - p[2] = p1; - p[1] = p2; - p[0] = p3; - } -} - -#if __riscv_zbb -void -byteswap32_SWAR_rev8(uint32_t *ptr, size_t n) -{ - while (n--) { - *ptr = __builtin_bswap32(*ptr); - ++ptr; - BENCH_CLOBBER(); - } -} -#define REV8(f) f(SWAR_rev8) -#else -#define REV8(f) -#endif - - -#define IMPLS(f) \ - f(scalar) \ - f(scalar_autovec) \ - REV8(f) \ - MX(f, rvv_gather) \ - f(rvv_m1_gathers_m2) \ - f(rvv_m1_gathers_m4) \ - f(rvv_m1_gathers_m8) \ - -typedef void Func(uint32_t *ptr, size_t n); - -#define DECLARE(f) extern Func byteswap32_##f; -IMPLS(DECLARE) - -#define EXTRACT(f) { #f, &byteswap32_##f }, -Impl impls[] = { IMPLS(EXTRACT) }; - -uint32_t *ptr; - -void init(void) { ptr = (uint32_t*)mem; } - -ux checksum(size_t n) { - ux sum = 0; - for (size_t i = 0; i < n; ++i) - sum = uhash(sum) + ptr[i]; - return sum; -} - -BENCH(base) { - memrand(ptr, n * sizeof *ptr); - TIME f(ptr, n); -} BENCH_END - -Bench benches[] = { - { MAX_MEM/4, "byteswap32", bench_base } -}; BENCH_MAIN(impls, benches) - diff --git a/tests/rvv_bench/chacha20/chacha20.S b/tests/rvv_bench/chacha20/chacha20.S deleted file mode 100644 index 9c62caeba..000000000 --- a/tests/rvv_bench/chacha20/chacha20.S +++ /dev/null @@ -1,5 +0,0 @@ -#ifndef MX -#if __riscv_xlen >= 64 -#include "rvv-chacha-poly/vchacha.s" -#endif -#endif diff --git a/tests/rvv_bench/chacha20/chacha20.c b/tests/rvv_bench/chacha20/chacha20.c deleted file mode 100644 index 7d6328b54..000000000 --- a/tests/rvv_bench/chacha20/chacha20.c +++ /dev/null @@ -1,61 +0,0 @@ -#include "bench.h" -#if __riscv_xlen >= 64 -#include "../thirdparty/boring.h" - -uint8_t *dest, *src; -uint8_t key[32], nonce[12]; -uint32_t counter; - - -extern void vector_chacha20( - uint8_t *out, const uint8_t *in, - size_t in_len, const uint8_t key[32], - const uint8_t nonce[12], uint32_t counter); - -static void -chacha20_boring(void *restrict dest, void const *restrict src, size_t n) { - boring_chacha20(dest, src, n, key, nonce, counter); -} - -static void -chacha20_rvv(void *restrict dest, void const *restrict src, size_t n) { - vector_chacha20(dest, src, n, key, nonce, counter); -} - -typedef void *Func(void *restrict dest, void const *restrict src, size_t n); - -Impl impls[] = { - { "boring", &chacha20_boring }, - { "rvv", &chacha20_rvv }, -}; - -void init(void) { - memrand(key, sizeof key); - memrand(nonce, sizeof nonce); - counter = 0; -} - -ux checksum(size_t n) { - ux sum = 0; - for (size_t i = 0; i < n+16; ++i) - sum = uhash(sum) + mem[i]; - return sum; -} - -BENCH(aligned) { - memset(mem, 0, n+16); - TIME f(mem, mem + MAX_MEM/2 + 16, n); -} BENCH_END - -Bench benches[] = { - { MAX_MEM/2 - 16, "chacha20 aligned", bench_aligned } -}; BENCH_MAIN(impls, benches) - - -#include "../thirdparty/boring.c" -#else -void init(void) {} -Impl impls[] = {}; -Bench benches[] = {}; -BENCH_MAIN(impls, benches) -#endif diff --git a/tests/rvv_bench/default.nix b/tests/rvv_bench/default.nix index 5a5c08121..2e238ee33 100644 --- a/tests/rvv_bench/default.nix +++ b/tests/rvv_bench/default.nix @@ -1,40 +1,67 @@ { lib -, getTestRequiredFeatures +, fetchFromGitHub , linkerScript , makeBuilder -, findAndBuild , t1main -, makeEmuResult +, filterByFeatures }: let - include = ./_include; + src = fetchFromGitHub { + owner = "camel-cdr"; + repo = "rvv-bench"; + rev = "5dc20c3596b3aa8412804e2d169d1b175bae927a"; + hash = "sha256-5A079sl4g7FIWgCYykLgTZXrmyfIblyXtxeh1AwqKiU="; + fetchSubmodules = true; + }; + + nonFpCases = [ + "ascii_to_utf16" + "ascii_to_utf32" + "byteswap" + "chacha20" + "memcpy" + "memset" + "mergelines" + "poly1305" + "strlen" + "utf8_count" + ]; + + fpCases = [ + "mandelbrot" + ]; + + cases = nonFpCases ++ fpCases; + builder = makeBuilder { casePrefix = "rvv_bench"; }; - build = { caseName, sourcePath }: + build = caseName: let drv = builder { - inherit caseName; + inherit caseName src; - src = sourcePath; + patches = [ ./t1_runtime.patch ]; - featuresRequired = getTestRequiredFeatures sourcePath; + passthru.featuresRequired = lib.optionals (lib.elem caseName fpCases) { extensions = [ "zve32f" ]; }; buildPhase = '' runHook preBuild + pushd bench >/dev/null - $CC -E -DINC=$PWD/${caseName}.S -E ${include}/template.S -o functions.S - $CC -I${include} ${caseName}.c -T${linkerScript} ${t1main} functions.S -o $pname.elf + $CC -E -DINC=$PWD/${caseName}.S template.S -E -o functions.S + $CC ${caseName}.c -T${linkerScript} ${t1main} functions.S -o ../$pname.elf + popd >/dev/null runHook postBuild ''; - meta.description = "test case '${caseName}', written in C intrinsic"; - - passthru.emu-result = makeEmuResult drv; + meta.description = "test case '${caseName}' from rvv-bench"; }; in drv; in -findAndBuild ./. build +lib.filterAttrs + filterByFeatures + (lib.genAttrs cases build) diff --git a/tests/rvv_bench/mandelbrot/features-required.json b/tests/rvv_bench/mandelbrot/features-required.json index 892f81d20..08c7567d8 100644 --- a/tests/rvv_bench/mandelbrot/features-required.json +++ b/tests/rvv_bench/mandelbrot/features-required.json @@ -1 +1 @@ -["zve32f"] +{ "extensions": ["zve32f"] } diff --git a/tests/rvv_bench/mandelbrot/mandelbrot.S b/tests/rvv_bench/mandelbrot/mandelbrot.S deleted file mode 100644 index 55224666a..000000000 --- a/tests/rvv_bench/mandelbrot/mandelbrot.S +++ /dev/null @@ -1,358 +0,0 @@ -#if 0 - -void -mandelbrot_rvv(size_t width, size_t maxIter, uint32_t *res) -{ - vfloat32m2_t cx, cy, zx, zy, zx2, zy2; - vuint32m2_t viter; - vbool16_t mask; - - for (size_t y = 0; y < width; ++y) { - size_t vl, x = width; - while (x > 0) { - x -= vl = __riscv_vsetvl_e32m2(x); - - mask = __riscv_vmset_m_b16(vl); - viter = __riscv_vmv_v_x_u32m2(0, vl); - - cx = __riscv_vfcvt_f_xu_v_f32m2(__riscv_vadd_vx_u32m2(__riscv_viota_m_u32m2(mask, vl), x, vl), vl); - cy = __riscv_vfmv_v_f_f32m2(y, vl); - - cx = __riscv_vfadd_vf_f32m2(__riscv_vfmul_vf_f32m2(cx, 2.0f / width, vl), -1.5f, vl); - cy = __riscv_vfadd_vf_f32m2(__riscv_vfmul_vf_f32m2(cy, 2.0f / width, vl), -1, vl); - - zx = zy = zx2 = zy2 = __riscv_vfmv_v_f_f32m2(0, vl); - - size_t iter = 0; - while (iter < maxIter && __riscv_vfirst_m_b16(mask, vl) >= 0) { - mask = __riscv_vmflt_vf_f32m2_b16(__riscv_vfadd_vv_f32m2(zx2, zy2, vl), 4, vl); - zx2 = __riscv_vfadd_vv_f32m2(__riscv_vfsub_vv_f32m2(zx2, zy2, vl), cx, vl); - zy = __riscv_vfmacc_vv_f32m2(cy, __riscv_vfadd_vv_f32m2(zx, zx, vl), zy, vl); - zx = zx2; - zx2 = __riscv_vfmul_vv_f32m2(zx, zx, vl); - zy2 = __riscv_vfmul_vv_f32m2(zy, zy, vl); - ++iter; - viter = __riscv_vmerge_vxm_u32m2(viter, iter, mask, vl); - } - __riscv_vse32_v_u32m2(res + x, viter, vl); - } - res += width; - } -} - -#endif - -#if MX_N > 0 && MX_N <= 2 - -#if HAS_F16 -.global MX(mandelbrot_rvv_f16_) # generated by clang -MX(rvv_f16_m1p5): - .half 0xbe00 # half -1.5 -MX(rvv_f16_m1): - .half 0xbc00 # half -1 -MX(rvv_f16_p4): - .half 0x4400 # half 4 -MX(mandelbrot_rvv_f16_): - beqz a0, MX(rvv_f16_13) - beqz a1, MX(rvv_f16_9) - li a7, 0 - fcvt.s.wu fa2, a0 - lui a3, 262144 - fmv.w.x fa1, a3 - la a3, MX(rvv_f16_m1p5) - flh fa5, (a3) - la a3, MX(rvv_f16_m1) - flh fa4, (a3) - la a3, MX(rvv_f16_p4) - flh fa3, (a3) - fdiv.s fa2, fa1, fa2 - fcvt.h.s fa2, fa2 - slli a6, a0, 2 - j MX(rvv_f16_4) -MX(rvv_f16_3): - addi a7, a7, 1 - add a2, a2, a6 - beq a7, a0, MX(rvv_f16_13) -MX(rvv_f16_4): - fcvt.s.wu fa1, a7 - fcvt.h.s fa1, fa1 - mv t0, a0 - j MX(rvv_f16_6) -MX(rvv_f16_5): - slli a3, t0, 2 - add a3, a3, a2 - vsetvli zero, zero, e32, MX2(), ta, ma - vse32.v v8, (a3) - beqz t0, MX(rvv_f16_3) -MX(rvv_f16_6): - vsetvli a3, t0, e32, MX2(), ta, ma - sub t0, t0, a3 - vmset.m v0 - vmv.v.i v8, 0 - vsetvli zero, zero, e16, MX(), ta, ma - viota.m v12, v0 - vadd.vx v12, v12, t0 - vfcvt.f.xu.v v12, v12 - vfmv.v.f v14, fa1 - vfmul.vf v12, v12, fa2 - vfadd.vf v12, v12, fa5 - vfmul.vf v14, v14, fa2 - vfadd.vf v14, v14, fa4 - vmv.v.i v20, 0 - li a4, 1 - mv a3, a1 - vmv.v.i v16, 0 - vmv.v.i v18, 0 - vmv.v.i v22, 0 -MX(rvv_f16_7): -#if HAS_RVV_1_0 || MX_N >= 2 - vsetvli zero, zero, e8, MXf2(), ta, ma -#else - vsetvli zero, zero, e8, m1, ta, ma -#endif - vfirst.m a5, v0 - bltz a5, MX(rvv_f16_5) - vsetvli zero, zero, e16, MX(), ta, ma - vfadd.vv v24, v18, v22 - vmflt.vf v0, v24, fa3 - vfsub.vv v18, v18, v22 - vfadd.vv v20, v20, v20 - vfadd.vv v24, v18, v12 - vfmadd.vv v16, v20, v14 - vfmul.vv v18, v24, v24 - vfmul.vv v22, v16, v16 - vsetvli zero, zero, e32, MX2(), ta, ma - vmerge.vxm v8, v8, a4, v0 - addi a3, a3, -1 - addi a4, a4, 1 -#if HAS_RVV_1_0 - vmv2r.v v20, v24 -#else - vsetvli zero, zero, e32, m2 - vmv.v.v v20, v24 -#endif - bnez a3, MX(rvv_f16_7) - j MX(rvv_f16_5) -MX(rvv_f16_9): - slli a3, a0, 2 -MX(rvv_f16_10): - mv a4, a0 -MX(rvv_f16_11): - vsetvli a5, a4, e32, MX2(), ta, ma - sub a4, a4, a5 - vmv.v.i v8, 0 - slli a5, a4, 2 - add a5, a5, a2 - vse32.v v8, (a5) - bnez a4, MX(rvv_f16_11) - addi a1, a1, 1 - add a2, a2, a3 - bne a1, a0, MX(rvv_f16_10) -MX(rvv_f16_13): - ret -#endif - - -.global MX(mandelbrot_rvv_f32_) # generated by clang -MX(mandelbrot_rvv_f32_): - beqz a0, MX(rvv_f32_13) - beqz a1, MX(rvv_f32_9) - li a7, 0 - fcvt.s.wu fa5, a0 - lui a3, 262144 - fmv.w.x fa4, a3 - fdiv.s fa5, fa4, fa5 - lui a3, 785408 - fmv.w.x fa4, a3 - lui a3, 784384 - fmv.w.x fa3, a3 - lui a3, 264192 - fmv.w.x fa2, a3 - slli a6, a0, 2 - j MX(rvv_f32_4) -MX(rvv_f32_3): - addi a7, a7, 1 - add a2, a2, a6 - beq a7, a0, MX(rvv_f32_13) -MX(rvv_f32_4): - fcvt.s.wu fa1, a7 - mv t0, a0 - j MX(rvv_f32_6) -MX(rvv_f32_5): - slli a3, t0, 2 - add a3, a3, a2 - vsetvli zero, zero, e32, MX(), ta, ma - vse32.v v8, (a3) - beqz t0, MX(rvv_f32_3) -MX(rvv_f32_6): - vsetvli t1, t0, e32, MX(), ta, ma - sub t0, t0, t1 - vmset.m v0 - vmv.v.i v8, 0 - viota.m v10, v0 - vadd.vx v10, v10, t0 - vfcvt.f.xu.v v10, v10 - vfmv.v.f v12, fa1 - vfmul.vf v10, v10, fa5 - vfadd.vf v10, v10, fa4 - vfmul.vf v12, v12, fa5 - vfadd.vf v12, v12, fa3 - vmv.v.i v18, 0 - li a3, 1 - mv a5, a1 - vmv.v.i v14, 0 - vmv.v.i v16, 0 - vmv.v.i v20, 0 -MX(rvv_f32_7): -#if HAS_RVV_1_0 - vsetvli zero, t1, e8, MXf4(), ta, ma -#else - vsetvli zero, t1, e8, m1, ta, ma -#endif - vfirst.m a4, v0 - bltz a4, MX(rvv_f32_5) - vsetvli zero, zero, e32, MX(), ta, ma - vfadd.vv v22, v16, v20 - vmflt.vf v0, v22, fa2 - vfsub.vv v16, v16, v20 - vfadd.vv v18, v18, v18 - vfadd.vv v22, v16, v10 - vfmadd.vv v14, v18, v12 - vfmul.vv v16, v22, v22 - vfmul.vv v20, v14, v14 - vmerge.vxm v8, v8, a3, v0 - addi a5, a5, -1 - addi a3, a3, 1 - vmv.v.v v18, v22 - bnez a5, MX(rvv_f32_7) - j MX(rvv_f32_5) -MX(rvv_f32_9): - slli a3, a0, 2 -MX(rvv_f32_10): - mv a4, a0 -MX(rvv_f32_11): - vsetvli a5, a4, e32, MX(), ta, ma - sub a4, a4, a5 - vmv.v.i v8, 0 - slli a5, a4, 2 - add a5, a5, a2 - vse32.v v8, (a5) - bnez a4, MX(rvv_f32_11) - addi a1, a1, 1 - add a2, a2, a3 - bne a1, a0, MX(rvv_f32_10) -MX(rvv_f32_13): - ret - -#endif - -#if MX_N == 2 && HAS_E64 - -.global MX(mandelbrot_rvv_f64_) # generated by clang -MX(rvv_f64_m1p5): - .quad 0xbff8000000000000 # double -1.5 -MX(rvv_f64_m1): - .quad 0xbff0000000000000 # double -1 -MX(rvv_f64_p4): - .quad 0x4010000000000000 # double 4 -MX(mandelbrot_rvv_f64_): - beqz a0, MX(rvv_f64_13) - beqz a1, MX(rvv_f64_9) - li a7, 0 - fcvt.s.wu fa2, a0 - lui a3, 262144 - fmv.w.x fa1, a3 - la a3, MX(rvv_f64_m1p5) - fld fa5, (a3) - la a3, MX(rvv_f64_m1) - fld fa4, (a3) - la a3, MX(rvv_f64_p4) - fld fa3, (a3) - fdiv.s fa2, fa1, fa2 - fcvt.d.s fa2, fa2 - slli a6, a0, 2 - j MX(rvv_f64_4) -MX(rvv_f64_3): - addi a7, a7, 1 - add a2, a2, a6 - beq a7, a0, MX(rvv_f64_13) -MX(rvv_f64_4): - fcvt.d.wu fa1, a7 - mv t0, a0 - j MX(rvv_f64_6) -MX(rvv_f64_5): - slli a3, t0, 2 - add a3, a3, a2 - vsetvli zero, zero, e32, m1, ta, ma - vse32.v v8, (a3) - beqz t0, MX(rvv_f64_3) -MX(rvv_f64_6): - vsetvli a3, t0, e32, m1, ta, ma - sub t0, t0, a3 - vmset.m v0 - vmv.v.i v8, 0 - vsetvli zero, zero, e64, m2, ta, ma - viota.m v10, v0 - vadd.vx v10, v10, t0 - vfcvt.f.xu.v v10, v10 - vfmv.v.f v12, fa1 - vfmul.vf v10, v10, fa2 - vfadd.vf v10, v10, fa5 - vfmul.vf v12, v12, fa2 - vfadd.vf v12, v12, fa4 - vmv.v.i v18, 0 - li a4, 1 - mv a3, a1 - vmv.v.i v14, 0 - vmv.v.i v16, 0 - vmv.v.i v20, 0 -MX(rvv_f64_7): -#if HAS_RVV_1_0 - vsetvli zero, zero, e8, MXf8(), ta, ma -#else - vsetvli zero, t1, e8, m1, ta, ma -#endif - vfirst.m a5, v0 - bltz a5, MX(rvv_f64_5) - vsetvli zero, zero, e64, m2, ta, ma - vfadd.vv v22, v16, v20 - vmflt.vf v0, v22, fa3 - vfsub.vv v16, v16, v20 - vfadd.vv v18, v18, v18 - vfadd.vv v22, v16, v10 - vfmadd.vv v14, v18, v12 - vfmul.vv v16, v22, v22 - vfmul.vv v20, v14, v14 - vsetvli zero, zero, e32, m1, ta, ma - vmerge.vxm v8, v8, a4, v0 - addi a3, a3, -1 - addi a4, a4, 1 -#if HAS_RVV_1_0 - vmv2r.v v18, v22 -#else - vsetvli zero, zero, e32, m2 - vmv.v.v v18, v22 -#endif - bnez a3, MX(rvv_f64_7) - j MX(rvv_f64_5) -MX(rvv_f64_9): - slli a3, a0, 2 -MX(rvv_f64_10): - mv a4, a0 -MX(rvv_f64_11): - vsetvli a5, a4, e32, m1, ta, ma - sub a4, a4, a5 - vmv.v.i v8, 0 - slli a5, a4, 2 - add a5, a5, a2 - vse32.v v8, (a5) - bnez a4, MX(rvv_f64_11) - addi a1, a1, 1 - add a2, a2, a3 - bne a1, a0, MX(rvv_f64_10) -MX(rvv_f64_13): - ret - -#endif - - diff --git a/tests/rvv_bench/mandelbrot/mandelbrot.c b/tests/rvv_bench/mandelbrot/mandelbrot.c deleted file mode 100644 index f182eba0f..000000000 --- a/tests/rvv_bench/mandelbrot/mandelbrot.c +++ /dev/null @@ -1,94 +0,0 @@ -#include "bench.h" - -void -mandelbrot_scalar_f32(size_t width, size_t maxIter, uint32_t *res) -{ - for (size_t y = 0; y < width; ++y) - for (size_t x = 0; x < width; ++x) { - float cx = x * 2.0f / width - 1.5; - float cy = y * 2.0f / width - 1; - size_t iter = 0; - float zx = 0, zy = 0, zxS = 0, zyS = 0; - - BENCH_VOLATILE_REG(cy); - while (zxS + zyS <= 4 && iter < maxIter) { - zxS = zxS - zyS + cx; - zy = 2 * zx * zy + cy; - zx = zxS; - BENCH_VOLATILE_REG(zx); - zxS = zx*zx; - zyS = zy*zy; - ++iter; - BENCH_CLOBBER(); - } - *res++ = iter; - } -} - -#if __riscv_xlen >= 64 -void -mandelbrot_scalar_f64(size_t width, size_t maxIter, uint32_t *res) -{ - for (size_t y = 0; y < width; ++y) - for (size_t x = 0; x < width; ++x) { - double cx = x * 2.0 / width - 1.5; - double cy = y * 2.0 / width - 1; - size_t iter = 0; - double zx = 0, zy = 0, zxS = 0, zyS = 0; - - BENCH_VOLATILE_REG(cy); - while (zxS + zyS <= 4 && iter < maxIter) { - zxS = zxS - zyS + cx; - zy = 2 * zx * zy + cy; - zx = zxS; - BENCH_VOLATILE_REG(zx); - zxS = zx*zx; - zyS = zy*zy; - ++iter; - } - *res++ = iter; - } -} -#endif - -#if HAS_F16 -# define IMPLS_F16(f) f(rvv_f16_m1) f(rvv_f16_m2) -#else -# define IMPLS_F16(f) -#endif - -#define IMPLS(f) \ - f(rvv_f32_m1) \ - f(scalar_f32) \ - IF64(f(scalar_f64)) \ - IMPLS_F16(f) \ - f(rvv_f32_m2) \ - IF64(f(rvv_f64_m2)) \ - -typedef void Func(size_t width, size_t maxIter, uint32_t *res); - -#define DECLARE(f) extern Func mandelbrot_##f; -IMPLS(DECLARE) - -#define EXTRACT(f) { #f, &mandelbrot_##f }, -Impl impls[] = { IMPLS(EXTRACT) }; - -uint32_t *dest; -void init(void) { memset(mem, 0, MAX_MEM); dest = (uint32_t*)mem; } - -/* disabled, because of rounding errors, please independently verify */ -ux checksum(size_t n) { return 0; } - -BENCH(base) { - n = usqrt(n); - TIME f(n, mandelbrot_ITER, dest); -} BENCH_END - -Bench benches[] = { - { - SCALE_mandelbrot(MAX_MEM / 4), - "mandelbrot "STR(mandelbrot_ITER), - bench_base - }, -}; BENCH_MAIN(impls, benches) - diff --git a/tests/rvv_bench/memcpy/memcpy.S b/tests/rvv_bench/memcpy/memcpy.S deleted file mode 100644 index 6511a0493..000000000 --- a/tests/rvv_bench/memcpy/memcpy.S +++ /dev/null @@ -1,153 +0,0 @@ -#if 0 -void *memcpy_rvv(void *restrict dest, void const *restrict src, size_t n) { - unsigned char *d = dest; - unsigned char const *s = src; - for (size_t vl; n > 0; n -= vl, s += vl, d += vl) { - vl = __riscv_vsetvl_e8m8(n); - vuint8m8_t vec_src = __riscv_vle8_v_u8m8(s, vl); - __riscv_vse8_v_u8m8(d, vec_src, vl); - } - return dest; -} -#endif - - -#ifdef MX - -# a0 = dest, a1 = src, a2 = len -.global MX(memcpy_rvv_) -MX(memcpy_rvv_): - mv a3, a0 -1: - vsetvli t0, a2, e8, MX(), ta, ma - vle8.v v0, (a1) - add a1, a1, t0 - sub a2, a2, t0 - vse8.v v0, (a3) - add a3, a3, t0 - bnez a2, 1b - ret - -.global MX(memcpy_rvv_align_dest_) -MX(memcpy_rvv_align_dest_): - mv a3, a0 -#if HAS_RVV_1_0 - csrr t0, vlenb -#else - vsetvli t0, zero, e8, m1, ta, ma # vlenb -#endif - bltu a2, t0, 2f # len < vlenb - # align dest to vlenb - sub t1, zero, a0 - addi t2, t0, -1 - and t1, t1, t2 #align = (-dest) & (vlenb-1) - vsetvli t0, t1, e8, MX(), ta, ma -1: - vle8.v v0, (a1) - add a1, a1, t0 - sub a2, a2, t0 - vse8.v v0, (a3) - add a3, a3, t0 -2: - vsetvli t0, a2, e8, MX(), ta, ma - bnez a2, 1b - ret - -.global MX(memcpy_rvv_align_src_) -MX(memcpy_rvv_align_src_): - mv a3, a0 -#if HAS_RVV_1_0 - csrr t0, vlenb -#else - vsetvli t0, zero, e8, m1, ta, ma # vlen -#endif - bltu a2, t0, 2f # len < vlen - # align src to vlen - sub t1, zero, a1 - addi t2, t0, -1 - and t1, t1, t2 # align = (-src) & (vlen-1) - vsetvli t0, t1, e8, MX(), ta, ma -1: - vle8.v v0, (a1) - add a1, a1, t0 - sub a2, a2, t0 - vse8.v v0, (a3) - add a3, a3, t0 -2: - vsetvli t0, a2, e8, MX(), ta, ma - bnez a2, 1b - ret - -# combination of memcpy_rvv_align_dest and memcpy_rvv -.global MX(memcpy_rvv_align_dest_hybrid_) -MX(memcpy_rvv_align_dest_hybrid_): - mv a3, a0 -#if HAS_RVV_1_0 - csrr t0, vlenb -#else - vsetvli t0, zero, e8, m1, ta, ma # vlen -#endif - slli t1, t0, 8 # skip costly division for more values - bltu a2, t1, 2f # len < vlen - sub t1, zero, a0 - addi t2, t0, -1 - and t1, t1, t2 # align = (-dest) & (vlen-1) - vsetvli t0, t1, e8, MX(), ta, ma # align dest to vlen -1: - vle8.v v0, (a1) - add a1, a1, t0 - sub a2, a2, t0 - vse8.v v0, (a3) - add a3, a3, t0 -2: - vsetvli t0, a2, e8, MX(), ta, ma - bnez a2, 1b - ret - - -.global MX(memcpy_rvv_tail_) -MX(memcpy_rvv_tail_): - vsetvli t0, a2, e8, MX(), ta, ma - remu a3, a2, t0 # tail = n % vlenb - sub a2, a2, a3 # n -= tail - add a4, a0, a2 # end = dest + n - mv a2, a0 # n = dest -1: - vle8.v v8, (a1) - add a1, a1, t0 # src += vlenb - vse8.v v8, (a2) - add a2, a2, t0 # dest += vlenb - bltu a2, a4, 1b # dest < end - # copy tail - vsetvli zero, a3, e8, MX(), ta, ma - vle8.v v8, (a1) - vse8.v v8, (a2) - ret - -# this is supposed to test how well the implementation handles -# operations with an vl smaller than VLMAX -.global MX(memcpy_rvv_128_) -MX(memcpy_rvv_128_): - li t0, 128/8 - bgt a2, t0, 1f - mv t0, a2 -1: - vsetvli t0, t0, e8, MX(), ta, ma - remu a3, a2, t0 # tail = n % vlenb - sub a2, a2, a3 # n -= tail - add a4, a0, a2 # end = dest + n - mv a2, a0 # n = dest -1: - vle8.v v8, (a1) - add a1, a1, t0 # src += vlenb - vse8.v v8, (a2) - add a2, a2, t0 # dest += vlenb - bltu a2, a4, 1b # dest < end - # copy tail - vsetvli zero, a3, e8, MX(), ta, ma - vle8.v v8, (a1) - vse8.v v8, (a2) - ret - -#endif - diff --git a/tests/rvv_bench/memcpy/memcpy.c b/tests/rvv_bench/memcpy/memcpy.c deleted file mode 100644 index 60a977c71..000000000 --- a/tests/rvv_bench/memcpy/memcpy.c +++ /dev/null @@ -1,197 +0,0 @@ -#include "bench.h" - -void * -memcpy_scalar(void *restrict dest, void const *restrict src, size_t n) -{ - unsigned char *d = dest; - unsigned char const *s = src; - while (n--) *d++ = *s++, BENCH_CLOBBER(); - return dest; -} - -void * -memcpy_scalar_autovec(void *restrict dest, void const *restrict src, size_t n) -{ - unsigned char *d = dest; - unsigned char const *s = src; - while (n--) *d++ = *s++; - return dest; -} - -/* https://git.musl-libc.org/cgit/musl/tree/src/string/memcpy.c */ -void * -memcpy_musl(void *restrict dest, void const *restrict src, size_t n) -{ - unsigned char *d = dest; - unsigned char const *s = src; - -#ifdef __GNUC__ - -#if __BYTE_ORDER == __LITTLE_ENDIAN -#define LS >> -#define RS << -#else -#define LS << -#define RS >> -#endif - - typedef uint32_t __attribute__((__may_alias__)) u32; - uint32_t w, x; - - for (; (uintptr_t)s % 4 && n; n--) *d++ = *s++; - - if ((uintptr_t)d % 4 == 0) { - for (; n>=16; s+=16, d+=16, n-=16) { - *(u32 *)(d+0) = *(u32 *)(s+0); - *(u32 *)(d+4) = *(u32 *)(s+4); - *(u32 *)(d+8) = *(u32 *)(s+8); - *(u32 *)(d+12) = *(u32 *)(s+12); - } - if (n&8) { - *(u32 *)(d+0) = *(u32 *)(s+0); - *(u32 *)(d+4) = *(u32 *)(s+4); - d += 8; s += 8; - } - if (n&4) { - *(u32 *)(d+0) = *(u32 *)(s+0); - d += 4; s += 4; - } - if (n&2) { - *d++ = *s++; *d++ = *s++; - } - if (n&1) { - *d = *s; - } - return dest; - } - - if (n >= 32) switch ((uintptr_t)d % 4) { - case 1: - w = *(u32 *)s; - *d++ = *s++; - *d++ = *s++; - *d++ = *s++; - n -= 3; - for (; n>=17; s+=16, d+=16, n-=16) { - x = *(u32 *)(s+1); - *(u32 *)(d+0) = (w LS 24) | (x RS 8); - w = *(u32 *)(s+5); - *(u32 *)(d+4) = (x LS 24) | (w RS 8); - x = *(u32 *)(s+9); - *(u32 *)(d+8) = (w LS 24) | (x RS 8); - w = *(u32 *)(s+13); - *(u32 *)(d+12) = (x LS 24) | (w RS 8); - } - break; - case 2: - w = *(u32 *)s; - *d++ = *s++; - *d++ = *s++; - n -= 2; - for (; n>=18; s+=16, d+=16, n-=16) { - x = *(u32 *)(s+2); - *(u32 *)(d+0) = (w LS 16) | (x RS 16); - w = *(u32 *)(s+6); - *(u32 *)(d+4) = (x LS 16) | (w RS 16); - x = *(u32 *)(s+10); - *(u32 *)(d+8) = (w LS 16) | (x RS 16); - w = *(u32 *)(s+14); - *(u32 *)(d+12) = (x LS 16) | (w RS 16); - } - break; - case 3: - w = *(u32 *)s; - *d++ = *s++; - n -= 1; - for (; n>=19; s+=16, d+=16, n-=16) { - x = *(u32 *)(s+3); - *(u32 *)(d+0) = (w LS 8) | (x RS 24); - w = *(u32 *)(s+7); - *(u32 *)(d+4) = (x LS 8) | (w RS 24); - x = *(u32 *)(s+11); - *(u32 *)(d+8) = (w LS 8) | (x RS 24); - w = *(u32 *)(s+15); - *(u32 *)(d+12) = (x LS 8) | (w RS 24); - } - break; - } - if (n&16) { - *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; - *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; - *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; - *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; - } - if (n&8) { - *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; - *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; - } - if (n&4) { - *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; - } - if (n&2) { - *d++ = *s++; *d++ = *s++; - } - if (n&1) { - *d = *s; - } - return dest; -#endif - - while (n--) { *d++ = *s++; BENCH_CLOBBER(); } - return dest; -} - -#define memcpy_libc memcpy - -#define IMPLS(f) \ - IFHOSTED(f(libc)) \ - f(musl) \ - f(scalar) \ - f(scalar_autovec) \ - MX(f, rvv) \ - MX(f, rvv_align_dest) \ - MX(f, rvv_align_src) \ - MX(f, rvv_align_dest_hybrid) \ - MX(f, rvv_tail) \ - MX(f, rvv_128) \ - -typedef void *Func(void *restrict dest, void const *restrict src, size_t n); - -#define DECLARE(f) extern Func memcpy_##f; -IMPLS(DECLARE) - -#define EXTRACT(f) { #f, &memcpy_##f }, -Impl impls[] = { IMPLS(EXTRACT) }; - -uint8_t *dest, *src; -ux last; - -void init(void) { } - -ux checksum(size_t n) { - ux sum = last; - for (size_t i = 0; i < n+9; ++i) - sum = uhash(sum) + dest[i]; - return sum; -} - -void common(size_t n, size_t dOff, size_t sOff) { - dest = mem + dOff; src = dest + MAX_MEM/2 + sOff + 9; - memset(dest, 0, n+9); -} - -BENCH(base) { - common(n, urand() & 255, urand() & 255); - TIME last = (uintptr_t)f(dest, src, n); -} BENCH_END - -BENCH(aligned) { - common(n, 0, 0); - TIME last = (uintptr_t)f(dest, src, n); -} BENCH_END - -Bench benches[] = { - { MAX_MEM/2 - 521, "memcpy", bench_base }, - { MAX_MEM/2 - 521, "memcpy aligned", bench_aligned} -}; BENCH_MAIN(impls, benches) - diff --git a/tests/rvv_bench/memset/memset.S b/tests/rvv_bench/memset/memset.S deleted file mode 100644 index 3d00eae62..000000000 --- a/tests/rvv_bench/memset/memset.S +++ /dev/null @@ -1,96 +0,0 @@ -#if 0 -void *memset(void *dst, int n, size_t len) { - unsigned char *d = dst; - vuint8m8_t v = __riscv_vmv_v_x_u8m8((uint8_t)n, __riscv_vsetvlmax_e8m8()); - for (size_t vl; len > 0; len -= vl, d += vl) { - vl = __riscv_vsetvl_e8m8(len); - __riscv_vse8_v_u8m8(d, v, vl); - } - return dst; -} -#endif - -#ifdef MX - -.global MX(memset_rvv_) -MX(memset_rvv_): - vsetvli a3, zero, e8, MX(), ta, ma - vmv.v.x v8, a1 - mv a1, a0 -1: - vsetvli a3, a2, e8, MX(), ta, ma - vse8.v v8, (a1) - sub a2, a2, a3 - add a1, a1, a3 - bnez a2, 1b - ret - - -.global MX(memset_rvv_align_) -MX(memset_rvv_align_): - vsetvli t0, zero, e8, m1, ta, ma # vlen - vmv.v.x v8, a1 - mv a1, a0 - vsetvli t0, zero, e8, MX(), ta, ma # vlen - bltu a2, t0, 2f # len < vlen - # align dest to vlen - sub t1, zero, a0 - remu t1, t1, t0 # align = (-dest) % vlen - vsetvli t0, t1, e8, MX(), ta, ma -1: - vse8.v v8, (a1) - sub a2, a2, t0 - add a1, a1, t0 -2: - vsetvli t0, a2, e8, MX(), ta, ma - bnez a2, 1b - ret - -.global MX(memset_rvv_tail_) -MX(memset_rvv_tail_): - vsetvli t0, a2, e8, MX(), ta, ma - vmv.v.x v8, a1 - remu a3, a2, t0 # tail = n % vlenb - sub a2, a2, a3 # n -= tail - add a4, a0, a2 # end = dest + n - mv a2, a0 # n = dest -1: - vse8.v v8, (a2) - add a2, a2, t0 # dest += vlenb - bltu a2, a4, 1b # dest < end - # handle tail - vsetvli zero, a3, e8, MX(), ta, ma - vse8.v v8, (a2) - ret - -.global MX(memset_rvv_tail_4x_) -MX(memset_rvv_tail_4x_): - vsetvli t0, a2, e8, MX(), ta, ma - vmv.v.x v8, a1 - slli t1, t0, 2 - mv a5, a0 - mv a3, a2 - bltu a2, t1, 2f - remu a3, a2, t1 # tail = n % (vlenb*4) - sub a2, a2, a3 # n -= tail - add a4, a0, a2 # end = dest + n -1: - vse8.v v8, (a5) - add a5, a5, t0 # dest += vlenb - vse8.v v8, (a5) - add a5, a5, t0 # dest += vlenb - vse8.v v8, (a5) - add a5, a5, t0 # dest += vlenb - vse8.v v8, (a5) - add a5, a5, t0 # dest += vlenb - bltu a5, a4, 1b # dest < end - # handle tail -2: - vsetvli a4, a3, e8, MX(), ta, ma - vse8.v v8, (a5) - sub a3, a3, a4 - add a5, a5, a4 - bnez a3, 2b - ret - -#endif diff --git a/tests/rvv_bench/memset/memset.c b/tests/rvv_bench/memset/memset.c deleted file mode 100644 index 9b2f7c463..000000000 --- a/tests/rvv_bench/memset/memset.c +++ /dev/null @@ -1,163 +0,0 @@ -#include "bench.h" - -void * -memset_scalar(void *dest, int c, size_t n) -{ - unsigned char *d = dest; - while (n--) *d++ = c, BENCH_CLOBBER(); - return dest; -} - -void * -memset_scalar_autovec(void *dest, int c, size_t n) -{ - unsigned char *d = dest; - while (n--) *d++ = c; - return dest; -} - -/* https://git.musl-libc.org/cgit/musl/tree/src/string/memset.c */ -#if __riscv_xlen >= 64 -void * -memset_musl(void *dest, int c, size_t n) -{ - unsigned char *s = dest; - size_t k; - - /* Fill head and tail with minimal branching. Each - * conditional ensures that all the subsequently used - * offsets are well-defined and in the dest region. */ - - if (!n) return dest; - s[0] = c; - s[n-1] = c; - if (n <= 2) return dest; - s[1] = c; - s[2] = c; - s[n-2] = c; - s[n-3] = c; - if (n <= 6) return dest; - s[3] = c; - s[n-4] = c; - if (n <= 8) return dest; - - /* Advance pointer to align it at a 4-byte boundary, - * and truncate n to a multiple of 4. The previous code - * already took care of any head/tail that get cut off - * by the alignment. */ - - k = -(uintptr_t)s & 3; - s += k; - n -= k; - n &= -4; - -#ifdef __GNUC__ - typedef uint32_t __attribute__((__may_alias__)) u32; - typedef uint64_t __attribute__((__may_alias__)) u64; - - u32 c32 = ((u32)-1)/255 * (unsigned char)c; - - /* In preparation to copy 32 bytes at a time, aligned on - * an 8-byte bounary, fill head/tail up to 28 bytes each. - * As in the initial byte-based head/tail fill, each - * conditional below ensures that the subsequent offsets - * are valid (e.g. !(n<=24) implies n>=28). */ - - *(u32 *)(s+0) = c32; - *(u32 *)(s+n-4) = c32; - if (n <= 8) return dest; - *(u32 *)(s+4) = c32; - *(u32 *)(s+8) = c32; - *(u32 *)(s+n-12) = c32; - *(u32 *)(s+n-8) = c32; - if (n <= 24) return dest; - *(u32 *)(s+12) = c32; - *(u32 *)(s+16) = c32; - *(u32 *)(s+20) = c32; - *(u32 *)(s+24) = c32; - *(u32 *)(s+n-28) = c32; - *(u32 *)(s+n-24) = c32; - *(u32 *)(s+n-20) = c32; - *(u32 *)(s+n-16) = c32; - - /* Align to a multiple of 8 so we can fill 64 bits at a time, - * and avoid writing the same bytes twice as much as is - * practical without introducing additional branching. */ - - k = 24 + ((uintptr_t)s & 4); - s += k; - n -= k; - - /* If this loop is reached, 28 tail bytes have already been - * filled, so any remainder when n drops below 32 can be - * safely ignored. */ - - u64 c64 = c32 | ((u64)c32 << 32); - for (; n >= 32; n-=32, s+=32) { - *(u64 *)(s+0) = c64; - *(u64 *)(s+8) = c64; - *(u64 *)(s+16) = c64; - *(u64 *)(s+24) = c64; - } -#else - /* Pure C fallback with no aliasing violations. */ - while (n--) *s++ = c; -#endif - - return dest; -} -#endif - -#define memset_libc memset - -#define IMPLS(f) \ - IFHOSTED(f(libc)) \ - IF64(f(musl)) \ - f(scalar) \ - f(scalar_autovec) \ - MX(f, rvv) \ - MX(f, rvv_align) \ - MX(f, rvv_tail) \ - MX(f, rvv_tail_4x) \ - -typedef void *Func(void *dest, int c, size_t n); - -#define DECLARE(f) extern Func memset_##f; -IMPLS(DECLARE) - -#define EXTRACT(f) { #f, &memset_##f }, -Impl impls[] = { IMPLS(EXTRACT) }; - -uint8_t *dest; -ux last; -char c; - -void init(void) { c = urand(); } - -ux checksum(size_t n) { - ux sum = last; - for (size_t i = 0; i < n+9; ++i) - sum = uhash(sum) + dest[i]; - return sum; -} - -void common(size_t n, size_t off) { - dest = mem + off; - memset(dest, c+3, n+9); -} - -BENCH(base) { - common(n, urand() & 511); - TIME last = (uintptr_t)f(dest, c, n); -} BENCH_END - -BENCH(aligned) { - common(n, 0); - TIME last = (uintptr_t)f(dest, c, n); -} BENCH_END - -Bench benches[] = { - { MAX_MEM - 521, "memset", bench_base }, - { MAX_MEM - 521, "memset aligned", bench_aligned} -}; BENCH_MAIN(impls, benches) - diff --git a/tests/rvv_bench/mergelines/mergelines.S b/tests/rvv_bench/mergelines/mergelines.S deleted file mode 100644 index 051a0d7de..000000000 --- a/tests/rvv_bench/mergelines/mergelines.S +++ /dev/null @@ -1,179 +0,0 @@ -#if 0 -size_t -mergelines_rvv(char *str, size_t len) -{ - uint8_t *dest = (uint8_t*)str; - uint8_t *src = (uint8_t*)str; - char last = 0; - - vuint8m8_t v, u, d; - vbool1_t m; - - for (size_t vl, VL; len > 1; ) { - VL = vl = __riscv_vsetvl_e8m8(len); - - char next = len > vl ? src[vl] : 0; - v = __riscv_vle8_v_u8m8(src, vl); - u = __riscv_vslide1up_vx_u8m8(v, last, vl); - d = __riscv_vslide1down_vx_u8m8(v, next, vl); - - m = __riscv_vmor_mm_b1(__riscv_vmsne_vx_u8m8_b1(u, '\\', vl), __riscv_vmsne_vx_u8m8_b1(v, '\n', vl), vl); - #if DO_SKIP - if (likely(__riscv_vcpop_m_b1(m, vl) == vl && next != '\n')) - goto skip; - #endif - m = __riscv_vmand_mm_b1( - m, - __riscv_vmor_mm_b1(__riscv_vmsne_vx_u8m8_b1(v, '\\', vl), __riscv_vmsne_vx_u8m8_b1(d, '\n', vl), vl), - vl); - - v = __riscv_vcompress_vm_u8m8(v, m, vl); - vl = __riscv_vcpop_m_b1(m, vl); - skip: - __riscv_vse8_v_u8m8(dest, v, vl); - dest += vl; src += VL; len -= VL; - last = src[-1]; - } - - if (len > 0 && !(last == '\\' && *src == '\n')) *dest++ = *src++; - return (dest - (uint8_t*)str); -} -#endif - -#ifdef MX - -.global MX(mergelines_rvv_) # generated by clang -MX(mergelines_rvv_): - li a2, 2 - bltu a1, a2, MX(rvv_6) - li t0, 0 - li a7, 92 - li a6, 1 - mv a2, a0 - mv a4, a0 - j MX(rvv_4) -MX(rvv_2): # in Loop: Header=BB0_4 Depth=1 - add a3, a4, a5 - lbu t1, 0(a3) -MX(rvv_3): # in Loop: Header=BB0_4 Depth=1 - vle8.v v8, (a4) - add a3, a4, a5 - vslide1up.vx v16, v8, t0 - vslide1down.vx v24, v8, t1 - vmsne.vx v0, v16, a7 - vmsne.vi v16, v8, 10 - vmor.mm v16, v0, v16 - vmsne.vx v17, v8, a7 - vmsne.vi v18, v24, 10 - vmor.mm v17, v17, v18 - vmand.mm v16, v16, v17 - vcompress.vm v24, v8, v16 - vcpop.m a4, v16 - vsetvli zero, a4, e8, MX(), ta, ma - vse8.v v24, (a2) - lbu t0, -1(a3) - sub a1, a1, a5 - add a2, a2, a4 - mv a4, a3 - bgeu a6, a1, MX(rvv_8) -MX(rvv_4): # =>This Inner Loop Header: Depth=1 - vsetvli a5, a1, e8, MX(), ta, ma - bltu a5, a1, MX(rvv_2) - li t1, 0 - j MX(rvv_3) -MX(rvv_6): - mv a2, a0 - beqz a1, MX(rvv_10) - lbu a1, 0(a0) - mv a2, a0 - j MX(rvv_11) -MX(rvv_8): - beqz a1, MX(rvv_10) - lbu a1, 0(a3) - xori a3, t0, 92 - xori a4, a1, 10 - or a3, a3, a4 - bnez a3, MX(rvv_11) -MX(rvv_10): - sub a0, a2, a0 - ret -MX(rvv_11): - addi a3, a2, 1 - sb a1, 0(a2) - sub a0, a3, a0 - ret - - -.global MX(mergelines_rvv_skip_) # generated by clang -MX(mergelines_rvv_skip_): - li a2, 2 - bltu a1, a2, MX(rvv_skip_9) - li a5, 0 - li a6, 92 - li a7, 1 - mv t1, a0 - mv a3, a0 -MX(rvv_skip_2): # =>This Inner Loop Header: Depth=1 - vsetvli a4, a1, e8, MX(), ta, ma - bgeu a4, a1, MX(rvv_skip_4) - add a2, a3, a4 - lbu t0, 0(a2) - j MX(rvv_skip_5) -MX(rvv_skip_4): # in Loop: Header=BB0_2 Depth=1 - li t0, 0 -MX(rvv_skip_5): # in Loop: Header=BB0_2 Depth=1 - vle8.v v8, (a3) - vslide1up.vx v16, v8, a5 - vmsne.vx v24, v16, a6 - vmsne.vi v16, v8, 10 - vmor.mm v16, v24, v16 - vcpop.m a2, v16 - xor a2, a2, a4 - seqz a2, a2 - addi a5, t0, -10 - snez a5, a5 - and a2, a2, a5 - beqz a2, MX(rvv_skip_8) - mv a2, a4 -MX(rvv_skip_7): # in Loop: Header=BB0_2 Depth=1 - add a3, a3, a4 - vsetvli zero, a2, e8, MX(), ta, ma - vse8.v v8, (t1) - lbu a5, -1(a3) - sub a1, a1, a4 - add t1, t1, a2 - bltu a7, a1, MX(rvv_skip_2) - j MX(rvv_skip_11) -MX(rvv_skip_8): # in Loop: Header=BB0_2 Depth=1 - vslide1down.vx v24, v8, t0 - vmsne.vx v17, v8, a6 - vmsne.vi v18, v24, 10 - vmor.mm v17, v17, v18 - vmand.mm v16, v16, v17 - vcompress.vm v24, v8, v16 - vcpop.m a2, v16 - vmv.v.v v8, v24 - j MX(rvv_skip_7) -MX(rvv_skip_9): - mv t1, a0 - beqz a1, MX(rvv_skip_13) - lbu a1, 0(a0) - mv t1, a0 - j MX(rvv_skip_14) -MX(rvv_skip_11): - beqz a1, MX(rvv_skip_13) - lbu a1, 0(a3) - xori a2, a5, 92 - xori a3, a1, 10 - or a2, a2, a3 - bnez a2, MX(rvv_skip_14) -MX(rvv_skip_13): - sub a0, t1, a0 - ret -MX(rvv_skip_14): - addi a2, t1, 1 - sb a1, 0(t1) - sub a0, a2, a0 - ret - -#endif diff --git a/tests/rvv_bench/mergelines/mergelines.c b/tests/rvv_bench/mergelines/mergelines.c deleted file mode 100644 index 2d1d2078d..000000000 --- a/tests/rvv_bench/mergelines/mergelines.c +++ /dev/null @@ -1,75 +0,0 @@ -#include "bench.h" - -size_t -mergelines_scalar(char *str, size_t len) -{ - char *dest = str; - char *src = str; - - while (len > 1) { - if (src[0] == '\\' && src[1] == '\n') - src += 2, len -= 2; - else - *dest++ = *src++, --len; - BENCH_CLOBBER(); - } - if (len > 0) - *dest++ = *src++; - return dest - str; -} - -#define IMPLS(f) \ - MX(f, rvv) \ - f(scalar) \ - MX(f, rvv_skip) \ - -typedef size_t Func(char *buf, size_t len); - -#define DECLARE(f) extern Func mergelines_##f; -IMPLS(DECLARE) - -#define EXTRACT(f) { #f, &mergelines_##f }, -Impl impls[] = { IMPLS(EXTRACT) }; - -char *str; -ux last; - -void init(void) { } -ux checksum(size_t n) { return last; } - -void common(size_t n, char const *chars, size_t nChars) { - str = (char*)mem + (urand() & 255); - for (size_t i = 0; i < n; ++i) - str[i] = chars[urand() % nChars]; -} - -BENCH(2_3) { - common(n, "\\\na", 3); - TIME last = (uintptr_t)f(str, n); -} BENCH_END - -BENCH(2_16) { - common(n, "\\\nabcdefgh", 16); - TIME last = (uintptr_t)f(str, n); -} BENCH_END - -BENCH(2_32) { - common(n, "\\\nabcdefgh123456789", 32); - TIME last = (uintptr_t)f(str, n); -} BENCH_END - -BENCH(2_256) { - str = (char*)mem + (urand() & 255); - for (size_t i = 0; i < n; ++i) - str[i] = urand() & 0xff; - TIME last = (uintptr_t)f(str, n); -} BENCH_END - -#define COUNT SCALE_mergelines(MAX_MEM) - 256 -Bench benches[] = { - { COUNT, "mergelines 2/3", bench_2_3 }, - { COUNT, "mergelines 2/16", bench_2_16 }, - { COUNT, "mergelines 2/32", bench_2_32 }, - { COUNT, "mergelines 2/256", bench_2_256 } -}; BENCH_MAIN(impls, benches) - diff --git a/tests/rvv_bench/poly1305/poly1305.S b/tests/rvv_bench/poly1305/poly1305.S deleted file mode 100644 index e5b332e02..000000000 --- a/tests/rvv_bench/poly1305/poly1305.S +++ /dev/null @@ -1,5 +0,0 @@ -#ifndef MX -#if __riscv_xlen >= 64 -#include "rvv-chacha-poly/vpoly.s" -#endif -#endif diff --git a/tests/rvv_bench/poly1305/poly1305.c b/tests/rvv_bench/poly1305/poly1305.c deleted file mode 100644 index 72849ac75..000000000 --- a/tests/rvv_bench/poly1305/poly1305.c +++ /dev/null @@ -1,64 +0,0 @@ -#include "bench.h" -#if __riscv_xlen >= 64 -#include "thirdparty/boring.h" - -uint8_t *src; -uint8_t key[32], sig[16]; - -extern uint64_t -vector_poly1305(const uint8_t* in, size_t len, - const uint8_t key[32], uint8_t sig[16]); - -static void -poly1305_boring(void const *src, size_t n) { - poly1305_state state; - boring_poly1305_init(&state, key); - boring_poly1305_update(&state, src, n); - boring_poly1305_finish(&state, sig); -} - -static void -poly1305_rvv(void const *src, size_t n) { - vector_poly1305(src, n, key, sig); -} - -typedef void *Func(void const *src, size_t n); - -Impl impls[] = { - { "boring", &poly1305_boring }, -#if HAS_E64 - { "rvv", &poly1305_rvv }, -#endif -}; - -void init(void) { - memrand(key, sizeof key); - memrand(sig, sizeof sig); -} - -ux checksum(size_t n) { - ux sum = 0; - for (size_t i = 0; i < ARR_LEN(sig); ++i) - sum = uhash(sum) + sig[i]; - return sum; -} - -BENCH(aligned) { - for (size_t i = 0; i < 256; ++i) - mem[urand()%n] = urand(); - n = (15+n) & -16; - TIME f(mem, n); -} BENCH_END - -Bench benches[] = { - { MAX_MEM, "poly1305 aligned", bench_aligned } -}; BENCH_MAIN(impls, benches) - - -#include "../thirdparty/boring.c" -#else -void init(void) {} -Impl impls[] = {}; -Bench benches[] = {}; -BENCH_MAIN(impls, benches) -#endif diff --git a/tests/rvv_bench/strlen/strlen.S b/tests/rvv_bench/strlen/strlen.S deleted file mode 100644 index d639e5a80..000000000 --- a/tests/rvv_bench/strlen/strlen.S +++ /dev/null @@ -1,91 +0,0 @@ -#if 0 -size_t strlen_rvv(char *src) { - size_t vlmax = __riscv_vsetvlmax_e8m8(); - char *p = src; - long first = -1; - size_t vl; - while (first < 0) { - vuint8m8_t v = __riscv_vle8ff_v_u8m8((uint8_t*)p, &vl, vlmax); - first = __riscv_vfirst_m_b1(__riscv_vmseq_vx_u8m8_b1(v, 0, vl), vl); - p += vl; - } - p -= vl - first; - return (size_t)(p - src); -} - -#define PAGE_SIZE 4096 -size_t strlen_rvv_page_aligned_(char *src) { - char *p = src; - long first = 0; - - size_t n = 0 - ((uintptr_t)src | -4096); - size_t vl; - for (; n > 0; n -= vl) { - vl = __riscv_vsetvl_e8m8(n); - vuint8m8_t v = __riscv_vle8_v_u8m8((uint8_t*)p, vl); - first = __riscv_vfirst_m_b1(__riscv_vmseq_vx_u8m8_b1(v, 0, vl), vl); - p += vl; - if (first >= 0) { - goto end; - } - } - vl = __riscv_vsetvlmax_e8m8(); - do { - vuint8m8_t v = __riscv_vle8_v_u8m8((uint8_t*)p, vl); - first = __riscv_vfirst_m_b1(__riscv_vmseq_vx_u8m8_b1(v, 0, vl), vl); - p += vl; - } while (first < 0); -end: - p -= vl - first; - return (size_t)(p - src); -} -#endif - - -#ifdef MX - -.global MX(strlen_rvv_) -MX(strlen_rvv_): - mv a3, a0 -1: - vsetvli a1, x0, e8, MX(), ta, ma - vle8ff.v v8, (a3) - csrr a1, vl - vmseq.vi v0, v8, 0 - vfirst.m a2, v0 - add a3, a3, a1 # end += vl - bltz a2, 1b - add a0, a0, a1 # start += vl - add a3, a3, a2 # end += idx - sub a0, a3, a0 # start - end - ret - -.global MX(strlen_rvv_page_aligned_) # generated by clang -MX(strlen_rvv_page_aligned_): - lui a1, 1048575 - or a1, a1, a0 - neg a4, a1 - mv a1, a0 -1: - vsetvli a2, a4, e8, MX(), ta, ma - vle8.v v8, (a1) - vmseq.vi v16, v8, 0 - vfirst.m a3, v16 - add a1, a1, a2 - bgez a3, 1f - sub a4, a4, a2 - bnez a4, 1b - vsetvli a2, zero, e8, MX(), ta, ma -2: - vle8.v v8, (a1) - vmseq.vi v16, v8, 0 - vfirst.m a3, v16 - add a1, a1, a2 - bltz a3, 2b -1: - sub a1, a1, a2 - sub a0, a3, a0 - add a0, a0, a1 - ret - -#endif diff --git a/tests/rvv_bench/strlen/strlen.c b/tests/rvv_bench/strlen/strlen.c deleted file mode 100644 index 709e84b6f..000000000 --- a/tests/rvv_bench/strlen/strlen.c +++ /dev/null @@ -1,76 +0,0 @@ -#include "bench.h" - -size_t -strlen_scalar(char const *s) -{ - char const *a = s; - while (*s) ++s, BENCH_CLOBBER(); - return s - a; -} - -size_t -strlen_scalar_autovec(char const *s) -{ - char const *a = s; - while (*s) ++s; - return s - a; -} - -/* https://git.musl-libc.org/cgit/musl/tree/src/string/strlen.c */ -#define ONES ((size_t)-1/UCHAR_MAX) -#define HIGHS (ONES * (UCHAR_MAX/2+1)) -#define HASZERO(x) (((x)-ONES) & ~(x) & HIGHS) -size_t -strlen_musl(char const *s) -{ - char const *a = s; -#ifdef __GNUC__ - typedef size_t __attribute__((__may_alias__)) word; - word const *w; - for (; (uintptr_t)s % sizeof *w; s++) if (!*s) return s-a; - for (w = (void const*)s; !HASZERO(*w); w++); - s = (void const*)w; -#endif - for (; *s; s++); - return s-a; -} - -#define strlen_libc strlen - -#define IMPLS(f) \ - f(scalar) \ - f(scalar_autovec) \ - IFHOSTED(f(libc)) \ - f(musl) \ - MX(f, rvv_page_aligned) \ - MX(f, rvv) \ - - -typedef size_t Func(char const *s); - -#define DECLARE(f) extern Func strlen_##f; -IMPLS(DECLARE) - -#define EXTRACT(f) { #f, &strlen_##f }, -Impl impls[] = { IMPLS(EXTRACT) }; - -ux last; - -void init(void) { - for (size_t i = 0; i < MAX_MEM; ++i) - mem[i] += !mem[i]; // remove null bytes -} - -ux checksum(size_t n) { return last; } - -BENCH(base) { - char *p = (char*)mem + (urand() % 511); - p[n] = 0; - TIME last = f(p); - p[n] = urand() | 1; -} BENCH_END - -Bench benches[] = { - { MAX_MEM - 521, "strlen", bench_base }, -}; BENCH_MAIN(impls, benches) - diff --git a/tests/rvv_bench/t1_runtime.patch b/tests/rvv_bench/t1_runtime.patch new file mode 100644 index 000000000..bf07e1f1a --- /dev/null +++ b/tests/rvv_bench/t1_runtime.patch @@ -0,0 +1,102 @@ +diff --git a/bench/bench.h b/bench/bench.h +index af1c839..661e8df 100644 +--- a/bench/bench.h ++++ b/bench/bench.h +@@ -120,45 +120,11 @@ static void + bench_run(Bench *benches, size_t nBenches) + { + for (Bench *b = benches; b != benches + nBenches; ++b) { +- print("{\ntitle: \"")(s,b->name)("\",\n"); +- print("labels: [\"0\","); +- for (size_t i = 0; i < b->nImpls; ++i) +- print("\"")(s,b->impls[i].name)("\","); +- print("],\n"); +- + size_t N = b->N; +- print("data: [\n["); +- for (size_t n = 1; n < N; n = BENCH_NEXT(n)) +- print(u,n)(","); +- print("],\n")(flush,); + + for (Impl *i = b->impls; i != b->impls + b->nImpls; ++i) { +- print("["); +- for (size_t n = 1; n < N; n = BENCH_NEXT(n)) { +- ux si = 0, s0 = 0; +- +-#if VALIDATE +- if (i != b->impls) { +- URand seed = randState; +- (void)b->func(i->func, n); +- si = checksum(n); +- +- randState = seed; +- (void)b->func(b->impls[0].func, n); +- s0 = checksum(n); +- } +- +- if (si != s0) { +- print("ERROR: ")(s,i->name)(" in ")(s,b->name)(" at ")(u,n)(flush,); +- exit(EXIT_FAILURE); +- } +-#endif +- +- print(f,bench_time(n, *i, *b))(",")(flush,); +- } +- print("],\n")(flush,); ++ bench_time(N, *i, *b); + } +- print("]\n},\n"); + } + } + +diff --git a/bench/config.h b/bench/config.h +index 0078049..a7a8cf6 100644 +--- a/bench/config.h ++++ b/bench/config.h +@@ -3,14 +3,14 @@ + #define HAS_F16 0 + + /* the maximum number of bytes to allocate, minimum of 4096 */ +-#define MAX_MEM (1024*1024*32) ++#define MAX_MEM (1024*4) + /* the byte count for the next run */ + #define NEXT(c) (c + c/7 + 3) + + /* minimum number of repeats, to sample median from */ +-#define MIN_REPEATS 10 ++#define MIN_REPEATS 1 + /* maxium number of repeats, executed until more than STOP_TIME has elapsed */ +-#define MAX_REPEATS 64 ++#define MAX_REPEATS 1 + + /* stop repeats early afer this many cycles have elapsed */ + #define STOP_CYCLES (1024*1024*500) +diff --git a/nolibc.h b/nolibc.h +index 94d4235..06f2c0f 100644 +--- a/nolibc.h ++++ b/nolibc.h +@@ -64,7 +64,7 @@ memread(void *ptr, size_t len) + return fread(ptr, 1, len, stdin); + } + #ifndef ENABLE_RDCYCLE_HACK +-int main(void) { ++int test(void) { + int x = nolibc_main(); + print_flush(); + exit(x); +@@ -158,13 +158,8 @@ void _start(void) { + static inline ux + rv_cycles(void) + { +- ux cycle; +-#ifdef READ_MCYCLE +- __asm volatile ("csrr %0, mcycle" : "=r"(cycle)); +-#else +- __asm volatile ("csrr %0, cycle" : "=r"(cycle)); +-#endif +- return cycle; ++ // TODO: support cycle ++ return 0; + } + + diff --git a/tests/rvv_bench/utf8_count/utf8_count.S b/tests/rvv_bench/utf8_count/utf8_count.S deleted file mode 100644 index 41a079693..000000000 --- a/tests/rvv_bench/utf8_count/utf8_count.S +++ /dev/null @@ -1,213 +0,0 @@ -#if 0 -size_t utf8_count_rvv(char const *buf, size_t len) { - size_t sum = 0; - for (size_t vl; len > 0; len -= vl, buf += vl) { - vl = __riscv_vsetvl_e8m8(len); - vint8m8_t v = __riscv_vle8_v_i8m8((void*)buf, vl); - vbool1_t mask = __riscv_vmsgt_vx_i8m8_b1(v, -65, vl); - sum += __riscv_vcpop_m_b1(mask, vl); - } - return sum; -} -#endif - -#ifdef MX - -.global MX(utf8_count_rvv_) -MX(utf8_count_rvv_): - li a2, 0 - li a3, -65 -1: - vsetvli a4, a1, e8, MX(), ta, ma - vle8.v v8, (a0) - vmsgt.vx v16, v8, a3 - vcpop.m a5, v16 - add a2, a2, a5 - sub a1, a1, a4 - add a0, a0, a4 - bnez a1, 1b - mv a0, a2 - ret - -.global MX(utf8_count_rvv_align_) -MX(utf8_count_rvv_align_): - mv a2, a0 - li a0, 0 - li a3, -65 - vsetvli t0, zero, e8, MX(), ta, ma # vlen - bltu a1, t0, 2f # len < vlen - # align dest to vlen - sub t1, zero, a2 - remu t1, t1, t0 # align = (-dest) % vlen - vsetvli t0, t1, e8, MX(), ta, ma -1: - vle8.v v8,(a2) - vmsgt.vx v16, v8, a3 - vcpop.m a4, v16 - add a0, a0, a4 - sub a1, a1, t0 - add a2, a2, t0 -2: - vsetvli t0, a1, e8, MX(), ta, ma - bnez a1, 1b - ret - -.global MX(utf8_count_rvv_tail_) -MX(utf8_count_rvv_tail_): - vsetvli t0, a1, e8, MX(), ta, ma - remu a2, a1, t0 # tail = n % vlenb - sub a1, a1, a2 # n -= tail - add a3, a0, a1 # end = dest + n - mv a1, a0 # n = dest - li a0, 0 - li t1, -65 -1: - vle8.v v8, (a1) - vmsgt.vx v16, v8, t1 - vcpop.m t2, v16 - add a0, a0, t2 - add a1, a1, t0 # src += vlenb - bltu a1, a3, 1b # dest < end - # copy tail - vsetvli zero, a2, e8, MX(), ta, ma - vle8.v v8, (a1) - vmsgt.vx v16, v8, t1 - vcpop.m t2, v16 - add a0, a0, t2 - ret - -# this is supposed to test how well the implementation handles -# operations with an vl smaller than VLMAX -.global MX(utf8_count_rvv_128_) -MX(utf8_count_rvv_128_): - li t0, 128/8 - bgt a1, t0, 1f - mv t0, a1 -1: - vsetvli t0, t0, e8, MX(), ta, ma - remu a2, a1, t0 # tail = n % vlenb - sub a1, a1, a2 # n -= tail - add a3, a0, a1 # end = dest + n - mv a1, a0 # n = dest - li a0, 0 - li t1, -65 -1: - vle8.v v8, (a1) - vmsgt.vx v16, v8, t1 - vcpop.m t2, v16 - add a0, a0, t2 - add a1, a1, t0 # src += vlenb - bltu a1, a3, 1b # dest < end - # copy tail - vsetvli zero, a2, e8, MX(), ta, ma - vle8.v v8, (a1) - vmsgt.vx v16, v8, t1 - vcpop.m t2, v16 - add a0, a0, t2 - ret - - -.global MX(utf8_count_rvv_4x_) -MX(utf8_count_rvv_4x_): - mv a2, a0 - li a0, 0 - li a6, -65 -1: - vsetvli a4, a1, e8, MX(), ta, ma - vle8.v v8, (a2) - vmsgt.vx v16, v8, a6 - vcpop.m a7, v16 - sub a1, a1, a4 - add a2, a2, a4 - vsetvli a4, a1, e8, MX(), ta, ma - vle8.v v8, (a2) - vmsgt.vx v16, v8, a6 - vcpop.m a3, v16 - sub a1, a1, a4 - add a2, a2, a4 - vsetvli a4, a1, e8, MX(), ta, ma - vle8.v v8, (a2) - vmsgt.vx v16, v8, a6 - vcpop.m a5, v16 - sub a1, a1, a4 - add a2, a2, a4 - vsetvli a4, a1, e8, MX(), ta, ma - vle8.v v8, (a2) - add a0, a0, a7 - add a0, a0, a3 - add a0, a0, a5 - vmsgt.vx v16, v8, a6 - vcpop.m a3, v16 - add a0, a0, a3 - sub a1, a1, a4 - add a2, a2, a4 - bnez a1, 1b - ret - -// gcc generated from unrolled intrinsics implementation: -// https://godbolt.org/z/q75c6r3Ta -.global MX(utf8_count_rvv_4x_tail_) -MX(utf8_count_rvv_4x_tail_): - vsetvli a5, zero, e8, MX(), ta, ma - slli t3, a5, 2 - add a1, a0, a1 - add a2, a0, t3 - mv a4, a0 - bltu a1, a2, 5f - slli t4, a5, 1 - add t5, t4, a5 - li a0, 0 - li a6, -65 -1: - add a3, a5, a4 - vsetvli zero, zero, e8, MX(), ta, ma - add a7, t4, a4 - vle8.v v8, (a4) - vle8.v v16, (a3) - vmsgt.vx v8, v8, a6 - vmsgt.vx v16, v16, a6 - vcpop.m a3, v8 - vcpop.m t1, v16 - add a3, a3, t1 - vle8.v v8, (a7) - add a4, t5, a4 - vmsgt.vx v8, v8, a6 - vcpop.m a7, v8 - add a3, a3, a7 - vle8.v v8, (a4) - mv a4, a2 - vmsgt.vx v8, v8, a6 - add a2, a2, t3 - vcpop.m a7, v8 - add a3, a3, a7 - add a0, a0, a3 - bgeu a1, a2, 1b -2: - sub a3, a1, a4 - beq a1, a4, 4f - li a2, 0 - li a1, -65 -3: - vsetvli a5, a3, e8, MX(), ta, ma - sub a3, a3, a5 - vle8.v v8, (a4) - add a4, a4, a5 - vmsgt.vx v8, v8, a1 - vcpop.m a5, v8 - add a2, a2, a5 - bne a3, zero, 3b - add a0, a0, a2 -4: - ret -5: - li a0, 0 - j 2b - - - - -#endif - - - - diff --git a/tests/rvv_bench/utf8_count/utf8_count.c b/tests/rvv_bench/utf8_count/utf8_count.c deleted file mode 100644 index ebe2e678c..000000000 --- a/tests/rvv_bench/utf8_count/utf8_count.c +++ /dev/null @@ -1,135 +0,0 @@ -#include "bench.h" - -size_t -utf8_count_scalar(char const *str, size_t len) -{ - uint8_t const *p = (uint8_t const*)str; - size_t count = 0; - while (len--) count += (*p++ & 0xc0) != 0x80, BENCH_CLOBBER(); - return count; -} - -size_t -utf8_count_scalar_autovec(char const *str, size_t len) -{ - uint8_t const *p = (uint8_t const*)str; - size_t count = 0; - while (len--) count += (*p++ & 0xc0) != 0x80; - return count; -} - -#define GEN_SWAR(name, popc, clobber) \ - size_t \ - utf8_count_##name(char const *str, size_t len) \ - { \ - ux const BENCH_MAY_ALIAS *u; \ - size_t count = 0, tail = 0; \ -\ - uint8_t const *u8 = (uint8_t const*)str; \ - if (len < sizeof *u) { \ - tail = len; \ - goto skip; \ - } \ -\ - tail = sizeof *u - (uintptr_t)str % sizeof *u; \ -\ - len -= tail; \ - while (tail--) \ - count += (*u8++ & 0xC0) != 0x80, clobber; \ -\ - u = (ux const*)u8; \ - tail = len % sizeof *u; \ -\ - for (len /= sizeof *u; len--; ++u) { \ - ux b1 = ~*u & (ux)0x8080808080808080; \ - ux b2 = *u & (ux)0x4040404040404040; \ - count += popc((b1 >> 1) | b2); \ - clobber; \ - } \ -\ - u8 = (uint8_t const*)u; \ - skip: \ - while (tail--) \ - count += (*u8++ & 0xC0) != 0x80, clobber; \ - return count; \ - } - -#if __riscv_zbb -GEN_SWAR(SWAR_popc,__builtin_popcountll,BENCH_CLOBBER()) -GEN_SWAR(SWAR_popc_autovec,__builtin_popcountll,(void)0) -# define POPC(f) f(SWAR_popc) f(SWAR_popc_autovec) -#else -# define POPC(f) -#endif - -static inline int -upopcnt(ux x) -{ - /* 2-bit sums */ - x -= (x >> 1) & (-(ux)1/3); - /* 4-bit sums */ - x = (x & (-(ux)1/15*3)) + ((x >> 2) & (-(ux)1/15*3)); - /* 8-bit sums */ - x = (x + (x >> 4)) & (-(ux)1/255*15); - BENCH_VOLATILE_REG(x); - /* now we can just add the sums together, because can't overflow, - * since there can't be more than 255 bits set */ - x += (x >> 8); /* 16-bit sums */ - x += (x >> 16); /* sum 16-bit sums */ - IF64(x += (x >> 32)); /* sum 32-bit sums */ - return x & 127; -} - - -GEN_SWAR(SWAR_popc_bithack,upopcnt,BENCH_CLOBBER()) -GEN_SWAR(SWAR_popc_bithack_autovec,upopcnt,(void)0) - - -#define IMPLS(f) \ - MX(f, rvv) \ - f(scalar) \ - f(scalar_autovec) \ - POPC(f) \ - f(SWAR_popc_bithack) \ - f(SWAR_popc_bithack_autovec) \ - MX(f, rvv_align) \ - MX(f, rvv_tail) \ - MX(f, rvv_128) \ - MX(f, rvv_4x) \ - MX(f, rvv_4x_tail) \ - -typedef size_t Func(char const *str, size_t len); - -#define DECLARE(f) extern Func utf8_count_##f; -IMPLS(DECLARE) - -#define EXTRACT(f) { #f, &utf8_count_##f }, -Impl impls[] = { IMPLS(EXTRACT) }; - -char *str; -ux last; - -void init(void) { } -ux checksum(size_t n) { return last; } - -void common(size_t n, size_t off) { - str = (char*)mem + off; - memrand(str, n + 9); -} - -BENCH(base) { - common(n, urand() & 511); - TIME last = (uintptr_t)f(str, n); -} BENCH_END - -BENCH(aligned) { - common(n, 0); - TIME last = (uintptr_t)f(str, n); -} BENCH_END - -Bench benches[] = { - { MAX_MEM - 521, "utf8 count", bench_base }, - { MAX_MEM - 521, "utf8 count aligned", bench_aligned } -}; BENCH_MAIN(impls, benches) - - diff --git a/tests/t1_main.S b/tests/t1_main.S index 85ed6ac32..426e90a1b 100644 --- a/tests/t1_main.S +++ b/tests/t1_main.S @@ -10,6 +10,8 @@ _start: call test // exit - csrwi 0x7cc, 0 + li x1, 0x40000000 + li x2, 0xdeadbeef + sw x2, 0(x1) .p2align 2