riscv-software-src · nibrunieAtSi5 · Sep 21, 2024 · Sep 21, 2024 · Sep 24, 2024 · Sep 24, 2024
diff --git a/riscv/insns/vghsh_vv.h b/riscv/insns/vghsh_vv.h
@@ -6,6 +6,10 @@ require_zvkg;
 require(P.VU.vsew == 32);
 require_egw_fits(128);
 
+require_vd_align_lmul;
+require_vs2_align_lmul;
+require_vs1_align_lmul;
+
 VI_ZVK_VD_VS1_VS2_EGU32x4_NOVM_LOOP(
  {},
  {

diff --git a/riscv/insns/vgmul_vv.h b/riscv/insns/vgmul_vv.h
@@ -6,6 +6,9 @@ require_zvkg;
 require(P.VU.vsew == 32);
 require_egw_fits(128);
 
+require_vd_align_lmul;
+require_vs2_align_lmul;
+
 VI_ZVK_VD_VS2_EGU32x4_NOVM_LOOP(
  {},
  {

diff --git a/riscv/insns/vsha2ch_vv.h b/riscv/insns/vsha2ch_vv.h
@@ -2,7 +2,6 @@
 
 #include "zvknh_ext_macros.h"
 
-// Ensures VSEW is 32 or 64, and vd doesn't overlap with either vs1 or vs2.
 require_vsha2_common_constraints;
 
 switch (P.VU.vsew) {

diff --git a/riscv/insns/vsha2cl_vv.h b/riscv/insns/vsha2cl_vv.h
@@ -2,7 +2,6 @@
 
 #include "zvknh_ext_macros.h"
 
-// Ensures VSEW is 32 or 64, and vd doesn't overlap with either vs1 or vs2.
 require_vsha2_common_constraints;
 
 switch (P.VU.vsew) {

diff --git a/riscv/insns/vsha2ms_vv.h b/riscv/insns/vsha2ms_vv.h
@@ -2,7 +2,6 @@
 
 #include "zvknh_ext_macros.h"
 
-// Ensures VSEW is 32 or 64, and vd doesn't overlap with either vs1 or vs2.
 require_vsha2_common_constraints;
 
 switch (P.VU.vsew) {

diff --git a/riscv/insns/vsm3me_vv.h b/riscv/insns/vsm3me_vv.h
@@ -13,6 +13,7 @@
  (ZVKSH_P1((M16) ^ (M9) ^ ZVK_ROL32((M3), 15)) ^ ZVK_ROL32((M13), 7) ^ (M6))
 
 require_vsm3_constraints;
+require_vs1_align_lmul;
 
 VI_ZVK_VD_VS1_VS2_EGU32x8_NOVM_LOOP(
  {},

diff --git a/riscv/insns/vsm4k_vi.h b/riscv/insns/vsm4k_vi.h
@@ -16,6 +16,9 @@ static constexpr uint32_t zvksed_ck[32] = {
 
 require_vsm4_constraints;
 
+require_vd_align_lmul;
+require_vs2_align_lmul;
+
 VI_ZVK_VD_VS2_ZIMM5_EGU32x4_NOVM_LOOP(
  {},
  // The following statements will be executed before the first execution

diff --git a/riscv/insns/vsm4r_vs.h b/riscv/insns/vsm4r_vs.h
@@ -5,6 +5,9 @@
 require_vsm4_constraints;
 // No overlap of vd and vs2.
 require(insn.rd() != insn.rs2());
+// vd and vs2 are LMUL (resp. EGW / VLEN) aligned
+require_vd_align_lmul;
+require_vs2_align_eglmul(128);
 
 VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP(
  {},

diff --git a/riscv/insns/vsm4r_vv.h b/riscv/insns/vsm4r_vv.h
@@ -4,6 +4,9 @@
 
 require_vsm4_constraints;
 
+require_vd_align_lmul;
+require_vs2_align_lmul;
+
 VI_ZVK_VD_VS2_EGU32x4_NOVM_LOOP(
  {},
  {

diff --git a/riscv/zvk_ext_macros.h b/riscv/zvk_ext_macros.h
@@ -86,6 +86,29 @@
 // (LMUL * VLEN) <= EGW
 #define require_egw_fits(EGW) require((EGW) <= (P.VU.VLEN * P.VU.vflmul))
 
+// Ensures that a register index is aligned with LMUL
+#define require_vreg_align_lmul(VREG_NUM) \
+ if (P.VU.vflmul > 1) { \
+ require_align(VREG_NUM, P.VU.vflmul); \
+ }
+
+// Ensures that a register index is aligned to EMUL
+// evaluated as EGW / VLEN.
+// The check is only enabled if this value is greater
+// than one (no index alignment check required for fractional EMUL)
+#define require_vreg_align_eglmul(EGW, VREG_NUM) \
+ do { \
+ float vfeglmul = EGW / P.VU.VLEN; \
+ if (vfeglmul > 1) { \
+ require_align(VREG_NUM, vfeglmul); \
+ }\
+ } while (0)
+
+#define require_vd_align_lmul require_vreg_align_lmul(insn.rd())
+#define require_vs2_align_lmul require_vreg_align_lmul(insn.rs2())
+#define require_vs1_align_lmul require_vreg_align_lmul(insn.rs1())
+#define require_vs2_align_eglmul(EGW) require_vreg_align_eglmul(EGW, insn.rs2())
+
 // Checks that the vector unit state (vtype and vl) can be interpreted
 // as element groups with EEW=32, EGS=4 (four 32-bits elements per group),
 // for an effective element group width of EGW=128 bits.

diff --git a/riscv/zvkned_ext_macros.h b/riscv/zvkned_ext_macros.h
@@ -10,6 +10,8 @@
 // - Zvkned is enabled
 // - EGW (128) <= LMUL * VLEN
 // - vd and vs2 cannot overlap
+// - vd is LMUL aligned
+// - vs2 is ceil(EGW / VLEN) aligned
 //
 // The constraint that vstart and vl are both EGS (4) aligned
 // is checked in the VI_ZVK_..._EGU32x4_..._LOOP macros.
@@ -19,12 +21,16 @@
  require(P.VU.vsew == 32); \
  require_egw_fits(128); \
  require(insn.rd() != insn.rs2()); \
+ require_vd_align_lmul; \
+ require_vs2_align_eglmul(128); \
  } while (false)
 
 // vaes*.vv instruction constraints. Those are the same as the .vs ones,
 // except for the overlap constraint that is not present for .vv variants.
 // - Zvkned is enabled
 // - EGW (128) <= LMUL * VLEN
+// - vd is LMUL aligned
+// - vs2 is LMUL aligned
 //
 // The constraint that vstart and vl are both EGS (4) aligned
 // is checked in the VI_ZVK_..._EGU32x4_..._LOOP macros.
@@ -33,6 +39,8 @@
  require_zvkned; \
  require(P.VU.vsew == 32); \
  require_egw_fits(128); \
+ require_vd_align_lmul; \
+ require_vs2_align_lmul; \
  } while (false)
 
 // vaeskf*.vi instruction constraints. Those are the same as the .vv ones.
@@ -41,6 +49,8 @@
  require_zvkned; \
  require(P.VU.vsew == 32); \
  require_egw_fits(128); \
+ require_vd_align_lmul; \
+ require_vs2_align_lmul; \
  } while (false)
 
 #define VAES_XTIME(A) (((A) << 1) ^ (((A) & 0x80) ? 0x1b : 0))

diff --git a/riscv/zvknh_ext_macros.h b/riscv/zvknh_ext_macros.h
@@ -9,6 +9,9 @@
 // Constraints common to all vsha* instructions, across all VSEW:
 // - VSEW is 32 (SHA-256) or 64 (SHA-512)
 // - No overlap of vd with vs1 or vs2.
+// - vd is LMUL aligned
+// - vs1 is LMUL aligned
+// - vs2 is LMUL aligned
 //
 // The constraint that vstart and vl are both EGS (4) aligned
 // is checked in the VI_..._EGU32x4_..._LOOP and VI_..._EGU64x4_..._LOOP
@@ -18,6 +21,9 @@
  require(P.VU.vsew == 32 || P.VU.vsew == 64); \
  require(insn.rd() != insn.rs1()); \
  require(insn.rd() != insn.rs2()); \
+ require_vd_align_lmul; \
+ require_vs2_align_lmul; \
+ require_vs1_align_lmul; \
  } while (false)
 
 // Constraints on vsha2 instructions that must be verified when VSEW==32.

diff --git a/riscv/zvksh_ext_macros.h b/riscv/zvksh_ext_macros.h
@@ -11,6 +11,8 @@
 // - VSEW == 32
 // - EGW (256) <= LMUL * VLEN
 // - No overlap of vd and vs2.
+// - vd is LMUL aligned
+// - vs2 is LMUL aligned
 //
 // The constraint that vstart and vl are both EGS (8) aligned
 // is checked in the VI_ZVK_..._EGU32x8_..._LOOP macros.
@@ -20,6 +22,8 @@
  require(P.VU.vsew == 32); \
  require_egw_fits(256); \
  require(insn.rd() != insn.rs2()); \
+ require_vd_align_lmul; \
+ require_vs2_align_lmul; \
  } while (false)
 
 #define FF1(X, Y, Z) ((X) ^ (Y) ^ (Z))