From 11ceb5158236ba2bf178e48da4395eb9c1fb245f Mon Sep 17 00:00:00 2001
From: Vincenzo Maisto <maisto_v@libero.it>
Date: Wed, 18 Oct 2023 14:24:25 +0200
Subject: [PATCH] Extensions and bug fixes

* Stall CSR operations if there is a pending vector instruction

* Set vstart=0 for succesful vector instructions

* Extend and fix Ara exception reporting from VLSU

* Add MMU interface (just mock)

* Refactoring
---
 hardware/include/ara_pkg.sv    |   12 +-
 hardware/src/ara.sv            |   43 +-
 hardware/src/ara_dispatcher.sv | 1225 +++++++++++++++++---------------
 hardware/src/ara_sequencer.sv  |    8 +-
 hardware/src/vlsu/addrgen.sv   |  300 ++++----
 hardware/src/vlsu/vldu.sv      |  212 +++---
 hardware/src/vlsu/vlsu.sv      |   54 +-
 hardware/src/vlsu/vstu.sv      |    1 +
 8 files changed, 1035 insertions(+), 820 deletions(-)

diff --git a/hardware/include/ara_pkg.sv b/hardware/include/ara_pkg.sv
index 1f8e50cfa..e3d0c8753 100644
--- a/hardware/include/ara_pkg.sv
+++ b/hardware/include/ara_pkg.sv
@@ -155,17 +155,17 @@ package ara_pkg;
   } ara_op_e;
 
   // Return true if op is a load operation
-  function automatic is_load(ara_op_e op);
+  function automatic logic is_load(ara_op_e op);
     is_load = op inside {[VLE:VLXE]};
   endfunction : is_load
 
   // Return true if op is a store operation
-  function automatic is_store(ara_op_e op);
+  function automatic logic is_store(ara_op_e op);
     is_store = op inside {[VSE:VSXE]};
   endfunction : is_store
 
   // Return true of op is either VCPOP or VFIRST
-  function automatic vd_scalar(ara_op_e op);
+  function automatic logic vd_scalar(ara_op_e op);
     vd_scalar = op inside {[VCPOP:VFIRST]};
   endfunction : vd_scalar
 
@@ -322,11 +322,11 @@ package ara_pkg;
     // Scalar response
     elen_t resp;
 
-    // Instruction triggered an error
-    logic error;
+    // Instruction triggered an exception
+    ariane_pkg::exception_t exception;
 
     // New value for vstart
-    vlen_t error_vl;
+    vlen_t exception_vstart;
   } ara_resp_t;
 
   ////////////////////
diff --git a/hardware/src/ara.sv b/hardware/src/ara.sv
index 0583d1eea..b9e70b3e6 100644
--- a/hardware/src/ara.sv
+++ b/hardware/src/ara.sv
@@ -37,6 +37,25 @@ module ara import ara_pkg::*; #(
     input  logic              scan_enable_i,
     input  logic              scan_data_i,
     output logic              scan_data_o,
+    
+    // CSR input
+    input  logic              en_ld_st_translation_i,
+    
+    // Interface with CVA6's sv39 MMU
+    // This is everything the MMU can provide, it might be overcomplete for Ara and some signals be useless
+    output  ariane_pkg::exception_t        mmu_misaligned_ex_o,
+    output  logic                          mmu_req_o,        // request address translation
+    output  logic [riscv::VLEN-1:0]        mmu_vaddr_o,      // virtual address out
+    output  logic                          mmu_is_store_o,   // the translation is requested by a store
+    // if we need to walk the page table we can't grant in the same cycle
+    // Cycle 0
+    input logic                            mmu_dtlb_hit_i,   // sent in the same cycle as the request if translation hits in the DTLB
+    input logic [riscv::PPNW-1:0]          mmu_dtlb_ppn_i,   // ppn (send same cycle as hit)
+    // Cycle 1
+    input logic                            mmu_valid_i,      // translation is valid
+    input logic [riscv::PLEN-1:0]          mmu_paddr_i,      // translated address
+    input ariane_pkg::exception_t          mmu_exception_i,  // address translation threw an exception
+
     // Interface with Ariane
     input  accelerator_req_t  acc_req_i,
     output accelerator_resp_t acc_resp_o,
@@ -123,8 +142,8 @@ module ara import ara_pkg::*; #(
   pe_resp_t          [NrPEs-1:0]   pe_resp;
   // Interface with the address generator
   logic                            addrgen_ack;
-  logic                            addrgen_error;
-  vlen_t                           addrgen_error_vl;
+  ariane_pkg::exception_t          addrgen_exception;
+  vlen_t                           addrgen_exception_vstart;
   logic              [NrLanes-1:0] alu_vinsn_done;
   logic              [NrLanes-1:0] mfpu_vinsn_done;
   // Interface with the operand requesters
@@ -171,8 +190,8 @@ module ara import ara_pkg::*; #(
     .pe_scalar_resp_ready_o(pe_scalar_resp_ready     ),
     // Interface with the address generator
     .addrgen_ack_i         (addrgen_ack              ),
-    .addrgen_error_i       (addrgen_error            ),
-    .addrgen_error_vl_i    (addrgen_error_vl         )
+    .addrgen_exception_i   (addrgen_exception        ),
+    .addrgen_exception_vstart_i(addrgen_exception_vstart     )
   );
 
   // Scalar move support
@@ -337,8 +356,8 @@ module ara import ara_pkg::*; #(
     .pe_req_ready_o             (pe_req_ready[NrLanes+OffsetStore : NrLanes+OffsetLoad]),
     .pe_resp_o                  (pe_resp[NrLanes+OffsetStore : NrLanes+OffsetLoad]     ),
     .addrgen_ack_o              (addrgen_ack                                           ),
-    .addrgen_error_o            (addrgen_error                                         ),
-    .addrgen_error_vl_o         (addrgen_error_vl                                      ),
+    .addrgen_exception_o        (addrgen_exception                                     ),
+    .addrgen_exception_vstart_o     (addrgen_exception_vstart                                  ),
     // Interface with the Mask unit
     .mask_i                     (mask                                                  ),
     .mask_valid_i               (mask_valid                                            ),
@@ -354,6 +373,18 @@ module ara import ara_pkg::*; #(
     .addrgen_operand_target_fu_i(sldu_addrgen_operand_target_fu                        ),
     .addrgen_operand_valid_i    (sldu_addrgen_operand_valid                            ),
     .addrgen_operand_ready_o    (addrgen_operand_ready                                 ),
+    // CSR input    
+    .en_ld_st_translation_i,
+    // Interface with CVA6's sv39 MMU
+    .mmu_misaligned_ex_o   ,
+    .mmu_req_o             ,
+    .mmu_vaddr_o           ,
+    .mmu_is_store_o        ,
+    .mmu_dtlb_hit_i        ,
+    .mmu_dtlb_ppn_i        ,
+    .mmu_valid_i           ,
+    .mmu_paddr_i           ,
+    .mmu_exception_i       ,
     // Load unit
     .ldu_result_req_o           (ldu_result_req                                        ),
     .ldu_result_addr_o          (ldu_result_addr                                       ),
diff --git a/hardware/src/ara_dispatcher.sv b/hardware/src/ara_dispatcher.sv
index 8471fb391..d9d803e71 100644
--- a/hardware/src/ara_dispatcher.sv
+++ b/hardware/src/ara_dispatcher.sv
@@ -53,17 +53,17 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
   //  CSRs  //
   ////////////
 
-  vlen_t  vstart_d, vstart_q;
-  vlen_t  vl_d, vl_q;
-  vtype_t vtype_d, vtype_q;
-  vxsat_e vxsat_d, vxsat_q;
-  vxrm_t  vxrm_d, vxrm_q;
-
-  `FF(vstart_q, vstart_d, '0)
-  `FF(vl_q, vl_d, '0)
-  `FF(vtype_q, vtype_d, '{vill: 1'b1, default: '0})
-  `FF(vxsat_q, vxsat_d, '0)
-  `FF(vxrm_q, vxrm_d, '0)
+  vlen_t  csr_vstart_d, csr_vstart_q;
+  vlen_t  csr_vl_d, csr_vl_q;
+  vtype_t csr_vtype_d, csr_vtype_q;
+  vxsat_e csr_vxsat_d, csr_vxsat_q;
+  vxrm_t  csr_vxrm_d, csr_vxrm_q;
+
+  `FF(csr_vstart_q, csr_vstart_d, '0)
+  `FF(csr_vl_q, csr_vl_d, '0)
+  `FF(csr_vtype_q, csr_vtype_d, '{vill: 1'b1, default: '0})
+  `FF(csr_vxsat_q, csr_vxsat_d, '0)
+  `FF(csr_vxrm_q, csr_vxrm_d, '0)
   // Converts between the internal representation of `vtype_t` and the full XLEN-bit CSR.
   function automatic riscv::xlen_t xlen_vtype(vtype_t vtype);
     xlen_vtype = {vtype.vill, {riscv::XLEN-9{1'b0}}, vtype.vma, vtype.vta, vtype.vsew,
@@ -134,7 +134,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
     NORMAL_OPERATION,
     WAIT_IDLE,
     RESHUFFLE,
-    SLDU_SEQUENCER
+    SLDU_SEQUENCER // NOTE: this is never used!
   } state_e;
   state_e state_d, state_q;
 
@@ -193,9 +193,9 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
   // its counters of pending memory operations
   // Ara should tell Ariane when a memory operation is completed, so that it can modify
   // its pending load/store counters.
-  // A memory operation can be completed both when it is over and when vl_q == 0. In the latter case,
+  // A memory operation can be completed both when it is over and when csr_vl_q == 0. In the latter case,
   // Ara's decoder answers immediately, and this can cause a collision with an answer from Ara's VLSU.
-  // To avoid collisions, we give precedence to the VLSU, and we delay the vl_q == 0 memory op
+  // To avoid collisions, we give precedence to the VLSU, and we delay the csr_vl_q == 0 memory op
   // completion signal if a collision occurs
   logic load_zero_vl, store_zero_vl;
   // Do not checks vregs validity against current LMUL
@@ -205,14 +205,15 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
   logic is_decoding;
   // Is this an in-lane operation?
   logic in_lane_op;
-  // If the vslideup offset is greater than vl_q, the vslideup has no effects
+  // If the vslideup offset is greater than csr_vl_q, the vslideup has no effects
   logic null_vslideup;
 
   // Pipeline the VLSU's load and store complete signals, for timing reasons
   logic load_complete_q;
   logic store_complete_q;
-  `FF(load_complete_q, load_complete_i, 1'b0)
-  `FF(store_complete_q, store_complete_i, 1'b0)
+  logic illegal_insn_load, illegal_insn_store;
+  `FF(load_complete_q, load_complete_i || illegal_insn_load, 1'b0)
+  `FF(store_complete_q, store_complete_i || illegal_insn_store, 1'b0)
 
   // NP2 Slide support
   logic is_stride_np2;
@@ -236,14 +237,14 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
 
   always_comb begin: p_decoder
     // Default values
-    vstart_d     = vstart_q;
-    vl_d         = vl_q;
-    vtype_d      = vtype_q;
+    csr_vstart_d     = csr_vstart_q;
+    csr_vl_d         = csr_vl_q;
+    csr_vtype_d      = csr_vtype_q;
     state_d      = state_q;
     eew_d        = eew_q;
     eew_valid_d  = eew_valid_q;
-    lmul_vs2     = vtype_q.vlmul;
-    lmul_vs1     = vtype_q.vlmul;
+    lmul_vs2     = csr_vtype_q.vlmul;
+    lmul_vs1     = csr_vtype_q.vlmul;
 
     reshuffle_req_d  = reshuffle_req_q;
     eew_old_buffer_d = eew_old_buffer_q;
@@ -255,8 +256,10 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
     rs_mask_request_d   = 1'b0;
 
     illegal_insn = 1'b0;
-    vxsat_d      = vxsat_q;
-    vxrm_d       = vxrm_q;
+    illegal_insn_load  = 1'b0;
+    illegal_insn_store = 1'b0;
+    csr_vxsat_d      = csr_vxsat_q;
+    csr_vxrm_d       = csr_vxrm_q;
 
     is_vload      = 1'b0;
     is_vstore     = 1'b0;
@@ -271,8 +274,6 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
     is_decoding = 1'b0;
     in_lane_op  = 1'b0;
 
-    acc_resp_o.req_ready  = 1'b0;
-    acc_resp_o.resp_valid = 1'b0;
     acc_resp_o       = '{
       trans_id      : acc_req_i.trans_id,
       load_complete : load_zero_vl | load_complete_q,
@@ -281,18 +282,20 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
       fflags_valid  : |fflags_ex_valid_i,
       default       : '0
     };
+    acc_resp_o.req_ready  = 1'b0;
+    acc_resp_o.resp_valid = 1'b0;
 
     // fflags
     for (int lane = 0; lane < NrLanes; lane++) acc_resp_o.fflags |= fflags_ex_i[lane];
 
     ara_req_d = '{
-      vl           : vl_q,
-      vstart       : vstart_q,
-      vtype        : vtype_q,
-      emul         : vtype_q.vlmul,
-      eew_vs1      : vtype_q.vsew,
-      eew_vs2      : vtype_q.vsew,
-      eew_vd_op    : vtype_q.vsew,
+      vl           : csr_vl_q,
+      vstart       : csr_vstart_q,
+      vtype        : csr_vtype_q,
+      emul         : csr_vtype_q.vlmul,
+      eew_vs1      : csr_vtype_q.vsew,
+      eew_vs2      : csr_vtype_q.vsew,
+      eew_vd_op    : csr_vtype_q.vsew,
       eew_vmask    : eew_q[VMASK],
       cvt_resize   : CVT_SAME,
       default      : '0
@@ -303,9 +306,9 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
     ignore_zero_vl_check = 1'b0;
 
     // Saturation in any lane will raise vxsat flag
-    vxsat_d |= |vxsat_flag_i;
+    csr_vxsat_d |= |vxsat_flag_i;
     // Fixed-point rounding mode is applied to all lanes
-    for (int lane = 0; lane < NrLanes; lane++) alu_vxrm_o[lane] = vxrm_q;
+    for (int lane = 0; lane < NrLanes; lane++) alu_vxrm_o[lane] = csr_vxrm_q;
     // Rounding mode is shared between all lanes
     for (int lane = 0; lane < NrLanes; lane++) acc_resp_o.fflags |= fflags_ex_i[lane];
     // Special states
@@ -420,14 +423,14 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
           end
         end
       end
-    endcase
+    endcase // state_q
 
-    if (state_d == NORMAL_OPERATION && state_q != RESHUFFLE) begin
-      if (acc_req_i.req_valid && ara_req_ready_i && acc_req_i.resp_ready) begin
+    if (state_d == NORMAL_OPERATION && state_q != RESHUFFLE) begin : not_reshuffling
+      if (acc_req_i.req_valid && ara_req_ready_i && acc_req_i.resp_ready) begin : ready
         // Decoding
         is_decoding = 1'b1;
         // Acknowledge the request
-        acc_resp_o.req_ready = ara_req_ready_i;
+        acc_resp_o.req_ready = 1'b1;
 
         // Decode the instructions based on their opcode
         unique case (acc_req_i.insn.itype.opcode)
@@ -435,11 +438,11 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
           //  Vector Arithmetic instructions  //
           //////////////////////////////////////
 
-          riscv::OpcodeVec: begin
+          riscv::OpcodeVec: begin : OpcodeVec
             // Instruction is of one of the RVV types
             automatic rvv_instruction_t insn = rvv_instruction_t'(acc_req_i.insn.instr);
 
-            // These always respond at the same cycle
+            // These (mostly) always respond at the same cycle
             acc_resp_o.resp_valid = 1'b1;
 
             // Decode based on their func3 field
@@ -447,33 +450,34 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
               // Configuration instructions
               OPCFG: begin: opcfg
                 // These can be acknowledged regardless of the state of Ara
-                acc_resp_o.req_ready = 1'b1;
+                // NOTE: unless there is a pending fault-only first vector load
+                // acc_resp_o.req_ready = 1'b1;
                 is_config       = 1'b1;
 
                 // Update vtype
                 if (insn.vsetvli_type.func1 == 1'b0) begin // vsetvli
-                  vtype_d = vtype_xlen(riscv::xlen_t'(insn.vsetvli_type.zimm11));
+                  csr_vtype_d = vtype_xlen(riscv::xlen_t'(insn.vsetvli_type.zimm11));
                 end else if (insn.vsetivli_type.func2 == 2'b11) begin // vsetivli
-                  vtype_d = vtype_xlen(riscv::xlen_t'(insn.vsetivli_type.zimm10));
+                  csr_vtype_d = vtype_xlen(riscv::xlen_t'(insn.vsetivli_type.zimm10));
                 end else if (insn.vsetvl_type.func7 == 7'b100_0000) begin // vsetvl
-                  vtype_d = vtype_xlen(riscv::xlen_t'(acc_req_i.rs2[7:0]));
+                  csr_vtype_d = vtype_xlen(riscv::xlen_t'(acc_req_i.rs2[7:0]));
                 end else
-                  acc_resp_o.error = 1'b1;
+                  illegal_insn = 1'b1;
 
                 // Check whether the updated vtype makes sense
-                if ((vtype_d.vsew > rvv_pkg::vew_e'($clog2(ELENB))) || // SEW <= ELEN
-                    (vtype_d.vlmul == LMUL_RSVD) ||                    // reserved value
+                if ((csr_vtype_d.vsew > rvv_pkg::vew_e'($clog2(ELENB))) || // SEW <= ELEN
+                    (csr_vtype_d.vlmul == LMUL_RSVD) ||                    // reserved value
                     // LMUL >= SEW/ELEN
-                    (signed'($clog2(ELENB)) + signed'(vtype_d.vlmul) < signed'(vtype_d.vsew))) begin
-                  vtype_d = '{vill: 1'b1, default: '0};
-                  vl_d    = '0;
+                    (signed'($clog2(ELENB)) + signed'(csr_vtype_d.vlmul) < signed'(csr_vtype_d.vsew))) begin
+                  csr_vtype_d = '{vill: 1'b1, default: '0};
+                  csr_vl_d    = '0;
                 end
 
                 // Update the vector length
                 else begin
                   // Maximum vector length. VLMAX = LMUL * VLEN / SEW.
-                  automatic int unsigned vlmax = VLENB >> vtype_d.vsew;
-                  unique case (vtype_d.vlmul)
+                  automatic int unsigned vlmax = VLENB >> csr_vtype_d.vsew;
+                  unique case (csr_vtype_d.vlmul)
                     LMUL_1  : vlmax <<= 0;
                     LMUL_2  : vlmax <<= 1;
                     LMUL_4  : vlmax <<= 2;
@@ -486,24 +490,24 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                   endcase
 
                   if (insn.vsetivli_type.func2 == 2'b11) begin // vsetivli
-                    vl_d = vlen_t'(insn.vsetivli_type.uimm5);
+                    csr_vl_d = vlen_t'(insn.vsetivli_type.uimm5);
                   end else begin // vsetvl || vsetvli
                     if (insn.vsetvl_type.rs1 == '0 && insn.vsetvl_type.rd == '0) begin
                       // Do not update the vector length
-                      vl_d = vl_q;
+                      csr_vl_d = csr_vl_q;
                     end else if (insn.vsetvl_type.rs1 == '0 && insn.vsetvl_type.rd != '0) begin
                       // Set the vector length to vlmax
-                      vl_d = vlmax;
+                      csr_vl_d = vlmax;
                     end else begin
                       // Normal stripmining
-                      vl_d = ((|acc_req_i.rs1[$bits(acc_req_i.rs1)-1:$bits(vl_d)]) ||
+                      csr_vl_d = ((|acc_req_i.rs1[$bits(acc_req_i.rs1)-1:$bits(csr_vl_d)]) ||
                         (vlen_t'(acc_req_i.rs1) > vlmax)) ? vlmax : vlen_t'(acc_req_i.rs1);
                     end
                   end
                 end
 
                 // Return the new vl
-                acc_resp_o.result = vl_d;
+                acc_resp_o.result = csr_vl_d;
 
                 // If the vtype has changed, wait for the backend before issuing any new instructions.
                 // This is to avoid hazards on implicit register labels when LMUL_old > LMUL_new
@@ -511,7 +515,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                 // Checking only lmul_q is a trick: we want to stall only if both lmuls have
                 // zero MSB. If lmul_q has zero MSB, it's greater than lmul_d only if also
                 // lmul_d has zero MSB since the slice comparison is intrinsically unsigned
-                if (!vtype_q.vlmul[2] && (vtype_d.vlmul[2:0] < vtype_q.vlmul[2:0]))
+                if (!csr_vtype_q.vlmul[2] && (csr_vtype_d.vlmul[2:0] < csr_vtype_q.vlmul[2:0]))
                   state_d = WAIT_IDLE;
               end
 
@@ -631,7 +635,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     if (insn.varith_type.vm) begin
                       ara_req_d.eew_vs1    = eew_q[ara_req_d.vs1];
                       ara_req_d.vtype.vsew = eew_q[ara_req_d.vs1];
-                      ara_req_d.vl         = (vl_q << vtype_q.vsew[1:0]) >> ara_req_d.eew_vs1[1:0];
+                      ara_req_d.vl         = (csr_vl_q << csr_vtype_q.vsew[1:0]) >> ara_req_d.eew_vs1[1:0];
                     end
                   end
                   6'b100000: ara_req_d.op = ara_pkg::VSADDU;
@@ -647,11 +651,11 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                   6'b101100: begin
                     ara_req_d.op             = ara_pkg::VNSRL;
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
-                    ara_req_d.eew_vs2        = vtype_q.vsew.next();
-                    lmul_vs2                 = next_lmul(vtype_q.vlmul);
+                    ara_req_d.eew_vs2        = csr_vtype_q.vsew.next();
+                    lmul_vs2                 = next_lmul(csr_vtype_q.vlmul);
 
                     // Check whether the EEW is not too wide.
-                    if (int'(vtype_q.vsew) > int'(EW32)) illegal_insn = 1'b1;
+                    if (int'(csr_vtype_q.vsew) > int'(EW32)) illegal_insn = 1'b1;
 
                     // Check whether we can access vs2
                     unique case (ara_req_d.emul.next())
@@ -665,11 +669,11 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                   6'b101101: begin
                     ara_req_d.op             = ara_pkg::VNSRA;
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
-                    ara_req_d.eew_vs2        = vtype_q.vsew.next();
-                    lmul_vs2                 = next_lmul(vtype_q.vlmul);
+                    ara_req_d.eew_vs2        = csr_vtype_q.vsew.next();
+                    lmul_vs2                 = next_lmul(csr_vtype_q.vlmul);
 
                     // Check whether the EEW is not too wide.
-                    if (int'(vtype_q.vsew) > int'(EW32)) illegal_insn = 1'b1;
+                    if (int'(csr_vtype_q.vsew) > int'(EW32)) illegal_insn = 1'b1;
 
                     // Check whether we can access vs2
                     unique case (ara_req_d.emul.next())
@@ -682,28 +686,28 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                   end
                   6'b101110: begin
                     ara_req_d.op = ara_pkg::VNCLIPU;
-                    ara_req_d.eew_vs2 = vtype_q.vsew.next();
+                    ara_req_d.eew_vs2 = csr_vtype_q.vsew.next();
                   end
                   6'b101111: begin
                     ara_req_d.op = ara_pkg::VNCLIP;
-                    ara_req_d.eew_vs2 = vtype_q.vsew.next();
+                    ara_req_d.eew_vs2 = csr_vtype_q.vsew.next();
                   end
                   // Reductions encode in cvt_resize the neutral value bits
                   // CVT_WIDE is 2'b00 (hack to save wires)
                   6'b110000: begin
                     ara_req_d.op = ara_pkg::VWREDSUMU;
-                    ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                    ara_req_d.eew_vs1        = vtype_q.vsew.next();
-                    ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                    ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                    ara_req_d.eew_vs1        = csr_vtype_q.vsew.next();
+                    ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                     ara_req_d.conversion_vs1 = OpQueueReductionZExt;
                     ara_req_d.conversion_vs2 = OpQueueConversionZExt2;
                     ara_req_d.cvt_resize     = CVT_WIDE;
                   end
                   6'b110001: begin
                     ara_req_d.op = ara_pkg::VWREDSUM;
-                    ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                    ara_req_d.eew_vs1        = vtype_q.vsew.next();
-                    ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                    ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                    ara_req_d.eew_vs1        = csr_vtype_q.vsew.next();
+                    ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                     ara_req_d.conversion_vs1 = OpQueueReductionZExt;
                     ara_req_d.conversion_vs2 = OpQueueConversionSExt2;
                     ara_req_d.cvt_resize     = CVT_WIDE;
@@ -727,7 +731,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                 endcase
 
                 // Instruction is invalid if the vtype is invalid
-                if (vtype_q.vill) illegal_insn = 1'b1;
+                if (csr_vtype_q.vill) illegal_insn = 1'b1;
               end
 
               OPIVX: begin: opivx
@@ -757,7 +761,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                   6'b001110: begin
                     ara_req_d.op            = ara_pkg::VSLIDEUP;
                     ara_req_d.stride        = acc_req_i.rs1;
-                    ara_req_d.eew_vs2       = vtype_q.vsew;
+                    ara_req_d.eew_vs2       = csr_vtype_q.vsew;
                     // Encode vslideup/vslide1up on the use_scalar_op field
                     ara_req_d.use_scalar_op = 1'b0;
                     // Vl refers to current system vsew, but operand requesters
@@ -765,13 +769,13 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     // i.e., request will need reshuffling
                     ara_req_d.scale_vl      = 1'b1;
                     // If stride > vl, the vslideup has no effects
-                    if (|ara_req_d.stride[$bits(ara_req_d.stride)-1:$bits(vl_q)] ||
-                      (vlen_t'(ara_req_d.stride) >= vl_q)) null_vslideup = 1'b1;
+                    if (|ara_req_d.stride[$bits(ara_req_d.stride)-1:$bits(csr_vl_q)] ||
+                      (vlen_t'(ara_req_d.stride) >= csr_vl_q)) null_vslideup = 1'b1;
                   end
                   6'b001111: begin
                     ara_req_d.op            = ara_pkg::VSLIDEDOWN;
                     ara_req_d.stride        = acc_req_i.rs1;
-                    ara_req_d.eew_vs2       = vtype_q.vsew;
+                    ara_req_d.eew_vs2       = csr_vtype_q.vsew;
                     // Encode vslidedown/vslide1down on the use_scalar_op field
                     ara_req_d.use_scalar_op = 1'b0;
                     // Request will need reshuffling
@@ -880,11 +884,11 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                   6'b101100: begin
                     ara_req_d.op             = ara_pkg::VNSRL;
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
-                    ara_req_d.eew_vs2        = vtype_q.vsew.next();
-                    lmul_vs2                 = next_lmul(vtype_q.vlmul);
+                    ara_req_d.eew_vs2        = csr_vtype_q.vsew.next();
+                    lmul_vs2                 = next_lmul(csr_vtype_q.vlmul);
 
                     // Check whether the EEW is not too wide.
-                    if (int'(vtype_q.vsew) > int'(EW32)) illegal_insn = 1'b1;
+                    if (int'(csr_vtype_q.vsew) > int'(EW32)) illegal_insn = 1'b1;
 
                     // Check whether we can access vs2
                     unique case (ara_req_d.emul.next())
@@ -898,11 +902,11 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                   6'b101101: begin
                     ara_req_d.op             = ara_pkg::VNSRA;
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
-                    ara_req_d.eew_vs2        = vtype_q.vsew.next();
-                    lmul_vs2                 = next_lmul(vtype_q.vlmul);
+                    ara_req_d.eew_vs2        = csr_vtype_q.vsew.next();
+                    lmul_vs2                 = next_lmul(csr_vtype_q.vlmul);
 
                     // Check whether the EEW is not too wide.
-                    if (int'(vtype_q.vsew) > int'(EW32)) illegal_insn = 1'b1;
+                    if (int'(csr_vtype_q.vsew) > int'(EW32)) illegal_insn = 1'b1;
 
                     // Check whether we can access vs2
                     unique case (ara_req_d.emul.next())
@@ -915,11 +919,11 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                   end
                   6'b101110: begin
                     ara_req_d.op = ara_pkg::VNCLIPU;
-                    ara_req_d.eew_vs2 = vtype_q.vsew.next();
+                    ara_req_d.eew_vs2 = csr_vtype_q.vsew.next();
                   end
                   6'b101111: begin
                     ara_req_d.op = ara_pkg::VNCLIP;
-                    ara_req_d.eew_vs2 = vtype_q.vsew.next();
+                    ara_req_d.eew_vs2 = csr_vtype_q.vsew.next();
                   end
                   default: illegal_insn = 1'b1;
                 endcase
@@ -937,7 +941,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                 endcase
 
                 // Instruction is invalid if the vtype is invalid
-                if (vtype_q.vill) illegal_insn = 1'b1;
+                if (csr_vtype_q.vill) illegal_insn = 1'b1;
               end
 
               OPIVI: begin: opivi
@@ -965,19 +969,19 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                   6'b001110: begin
                     ara_req_d.op            = ara_pkg::VSLIDEUP;
                     ara_req_d.stride        = {{ELEN{insn.varith_type.rs1[19]}}, insn.varith_type.rs1};
-                    ara_req_d.eew_vs2       = vtype_q.vsew;
+                    ara_req_d.eew_vs2       = csr_vtype_q.vsew;
                     // Encode vslideup/vslide1up on the use_scalar_op field
                     ara_req_d.use_scalar_op = 1'b0;
                     // Request will need reshuffling
                     ara_req_d.scale_vl      = 1'b1;
                     // If stride > vl, the vslideup has no effects
-                    if (|ara_req_d.stride[$bits(ara_req_d.stride)-1:$bits(vl_q)] ||
-                      (vlen_t'(ara_req_d.stride) >= vl_q)) null_vslideup = 1'b1;
+                    if (|ara_req_d.stride[$bits(ara_req_d.stride)-1:$bits(csr_vl_q)] ||
+                      (vlen_t'(ara_req_d.stride) >= csr_vl_q)) null_vslideup = 1'b1;
                   end
                   6'b001111: begin
                     ara_req_d.op            = ara_pkg::VSLIDEDOWN;
                     ara_req_d.stride        = {{ELEN{insn.varith_type.rs1[19]}}, insn.varith_type.rs1};
-                    ara_req_d.eew_vs2       = vtype_q.vsew;
+                    ara_req_d.eew_vs2       = csr_vtype_q.vsew;
                     // Encode vslidedown/vslide1down on the use_scalar_op field
                     ara_req_d.use_scalar_op = 1'b0;
                     // Request will need reshuffling
@@ -1090,11 +1094,11 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                   6'b101100: begin
                     ara_req_d.op             = ara_pkg::VNSRL;
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
-                    ara_req_d.eew_vs2        = vtype_q.vsew.next();
-                    lmul_vs2                 = next_lmul(vtype_q.vlmul);
+                    ara_req_d.eew_vs2        = csr_vtype_q.vsew.next();
+                    lmul_vs2                 = next_lmul(csr_vtype_q.vlmul);
 
                     // Check whether the EEW is not too wide.
-                    if (int'(vtype_q.vsew) > int'(EW32)) illegal_insn = 1'b1;
+                    if (int'(csr_vtype_q.vsew) > int'(EW32)) illegal_insn = 1'b1;
 
                     // Check whether we can access vs2
                     unique case (ara_req_d.emul.next())
@@ -1108,11 +1112,11 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                   6'b101101: begin
                     ara_req_d.op             = ara_pkg::VNSRA;
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
-                    ara_req_d.eew_vs2        = vtype_q.vsew.next();
-                    lmul_vs2                 = next_lmul(vtype_q.vlmul);
+                    ara_req_d.eew_vs2        = csr_vtype_q.vsew.next();
+                    lmul_vs2                 = next_lmul(csr_vtype_q.vlmul);
 
                     // Check whether the EEW is not too wide.
-                    if (int'(vtype_q.vsew) > int'(EW32)) illegal_insn = 1'b1;
+                    if (int'(csr_vtype_q.vsew) > int'(EW32)) illegal_insn = 1'b1;
 
                     // Check whether we can access vs2
                     unique case (ara_req_d.emul.next())
@@ -1125,11 +1129,11 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                   end
                   6'b101110: begin
                     ara_req_d.op = ara_pkg::VNCLIPU;
-                    ara_req_d.eew_vs2 = vtype_q.vsew.next();
+                    ara_req_d.eew_vs2 = csr_vtype_q.vsew.next();
                   end
                   6'b101111: begin
                     ara_req_d.op = ara_pkg::VNCLIP;
-                    ara_req_d.eew_vs2 = vtype_q.vsew.next();
+                    ara_req_d.eew_vs2 = csr_vtype_q.vsew.next();
                   end
                   default: illegal_insn = 1'b1;
                 endcase
@@ -1147,7 +1151,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                 endcase
 
                 // Instruction is invalid if the vtype is invalid
-                if (vtype_q.vill) illegal_insn = 1'b1;
+                if (csr_vtype_q.vill) illegal_insn = 1'b1;
               end
 
               OPMVV: begin: opmvv
@@ -1236,7 +1240,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ignore_zero_vl_check = 1'b1;
 
                     // Sign extend operands
-                    unique case (vtype_q.vsew)
+                    unique case (csr_vtype_q.vsew)
                       EW8: begin
                         ara_req_d.conversion_vs2 = OpQueueConversionSExt8;
                       end
@@ -1250,13 +1254,14 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     endcase
 
                     // Wait until the back-end answers to acknowledge those instructions
-                    if (ara_resp_valid_i) begin
-                      acc_resp_o.req_ready   = 1'b1;
-                      acc_resp_o.result = ara_resp_i.resp;
-                      acc_resp_o.error  = ara_resp_i.error;
-                      acc_resp_o.resp_valid  = 1'b1;
-                      ara_req_valid_d   = 1'b0;
-                    end
+                    if ( ara_resp_valid_i ) begin : ara_resp_valid
+                      acc_resp_o.req_ready  = 1'b1;
+                      acc_resp_o.resp_valid = 1'b1;
+                      acc_resp_o.result     = ara_resp_i.resp;
+                      acc_resp_o.exception  = ara_resp_i.exception;
+                      // Clear request to backend
+                      ara_req_valid_d       = 1'b0;
+                    end : ara_resp_valid
                   end
                   6'b010100: begin
                     ara_req_d.use_vd_op = 1'b1;
@@ -1356,8 +1361,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                         ara_req_d.cvt_resize     = CVT_WIDE;
 
                         // Invalid conversion
-                        if (int'(vtype_q.vsew) < int'(EW64) ||
-                            int'(vtype_q.vlmul) inside {LMUL_1_2, LMUL_1_4, LMUL_1_8})
+                        if (int'(csr_vtype_q.vsew) < int'(EW64) ||
+                            int'(csr_vtype_q.vlmul) inside {LMUL_1_2, LMUL_1_4, LMUL_1_8})
                           illegal_insn = 1'b1;
                       end
                       5'b00011: begin // VSEXT.VF8
@@ -1366,44 +1371,44 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                         ara_req_d.cvt_resize     = CVT_WIDE;
 
                         // Invalid conversion
-                        if (int'(vtype_q.vsew) < int'(EW64) ||
-                            int'(vtype_q.vlmul) inside {LMUL_1_2, LMUL_1_4, LMUL_1_8})
+                        if (int'(csr_vtype_q.vsew) < int'(EW64) ||
+                            int'(csr_vtype_q.vlmul) inside {LMUL_1_2, LMUL_1_4, LMUL_1_8})
                           illegal_insn = 1'b1;
                       end
                       5'b00100: begin // VZEXT.VF4
                         ara_req_d.conversion_vs2 = OpQueueConversionZExt4;
-                        ara_req_d.eew_vs2        = prev_prev_ew(vtype_q.vsew);
+                        ara_req_d.eew_vs2        = prev_prev_ew(csr_vtype_q.vsew);
                         ara_req_d.cvt_resize     = CVT_WIDE;
 
                         // Invalid conversion
-                        if (int'(vtype_q.vsew) < int'(EW32) ||
-                            int'(vtype_q.vlmul) inside {LMUL_1_4, LMUL_1_8}) illegal_insn = 1'b1;
+                        if (int'(csr_vtype_q.vsew) < int'(EW32) ||
+                            int'(csr_vtype_q.vlmul) inside {LMUL_1_4, LMUL_1_8}) illegal_insn = 1'b1;
                       end
                       5'b00101: begin // VSEXT.VF4
                         ara_req_d.conversion_vs2 = OpQueueConversionSExt4;
-                        ara_req_d.eew_vs2        = prev_prev_ew(vtype_q.vsew);
+                        ara_req_d.eew_vs2        = prev_prev_ew(csr_vtype_q.vsew);
                         ara_req_d.cvt_resize     = CVT_WIDE;
 
                         // Invalid conversion
-                        if (int'(vtype_q.vsew) < int'(EW32) ||
-                            int'(vtype_q.vlmul) inside {LMUL_1_4, LMUL_1_8}) illegal_insn = 1'b1;
+                        if (int'(csr_vtype_q.vsew) < int'(EW32) ||
+                            int'(csr_vtype_q.vlmul) inside {LMUL_1_4, LMUL_1_8}) illegal_insn = 1'b1;
                       end
                       5'b00110: begin // VZEXT.VF2
                         ara_req_d.conversion_vs2 = OpQueueConversionZExt2;
-                        ara_req_d.eew_vs2        = vtype_q.vsew.prev();
+                        ara_req_d.eew_vs2        = csr_vtype_q.vsew.prev();
                         ara_req_d.cvt_resize     = CVT_WIDE;
 
                         // Invalid conversion
-                        if (int'(vtype_q.vsew) < int'(EW16) || int'(vtype_q.vlmul) inside {LMUL_1_8})
+                        if (int'(csr_vtype_q.vsew) < int'(EW16) || int'(csr_vtype_q.vlmul) inside {LMUL_1_8})
                           illegal_insn = 1'b1;
                       end
                       5'b00111: begin // VSEXT.VF2
                         ara_req_d.conversion_vs2 = OpQueueConversionSExt2;
-                        ara_req_d.eew_vs2        = vtype_q.vsew.prev();
+                        ara_req_d.eew_vs2        = csr_vtype_q.vsew.prev();
                         ara_req_d.cvt_resize     = CVT_WIDE;
 
                         // Invalid conversion
-                        if (int'(vtype_q.vsew) < int'(EW16) || int'(vtype_q.vlmul) inside {LMUL_1_8})
+                        if (int'(csr_vtype_q.vsew) < int'(EW16) || int'(csr_vtype_q.vlmul) inside {LMUL_1_8})
                           illegal_insn = 1'b1;
                       end
                       default: illegal_insn = 1'b1;
@@ -1443,92 +1448,92 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                   // Widening instructions
                   6'b110000: begin // VWADDU
                     ara_req_d.op             = ara_pkg::VADD;
-                    ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                    ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                    ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                    ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionZExt2;
                     ara_req_d.cvt_resize     = CVT_WIDE;
                   end
                   6'b110001: begin // VWADD
                     ara_req_d.op             = ara_pkg::VADD;
-                    ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                    ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                    ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                    ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                     ara_req_d.conversion_vs1 = OpQueueConversionSExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionSExt2;
                     ara_req_d.cvt_resize     = CVT_WIDE;
                   end
                   6'b110010: begin // VWSUBU
                     ara_req_d.op             = ara_pkg::VSUB;
-                    ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                    ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                    ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                    ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionZExt2;
                     ara_req_d.cvt_resize     = CVT_WIDE;
                   end
                   6'b110011: begin // VWSUB
                     ara_req_d.op             = ara_pkg::VSUB;
-                    ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                    ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                    ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                    ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                     ara_req_d.conversion_vs1 = OpQueueConversionSExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionSExt2;
                     ara_req_d.cvt_resize     = CVT_WIDE;
                   end
                   6'b110100: begin // VWADDU.W
                     ara_req_d.op             = ara_pkg::VADD;
-                    ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                    lmul_vs2                 = next_lmul(vtype_q.vlmul);
-                    ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                    ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                    lmul_vs2                 = next_lmul(csr_vtype_q.vlmul);
+                    ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
-                    ara_req_d.eew_vs2        = vtype_q.vsew.next();
+                    ara_req_d.eew_vs2        = csr_vtype_q.vsew.next();
                     ara_req_d.cvt_resize     = CVT_WIDE;
                   end
                   6'b110101: begin // VWADD.W
                     ara_req_d.op             = ara_pkg::VADD;
-                    ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                    lmul_vs2                 = next_lmul(vtype_q.vlmul);
-                    ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                    ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                    lmul_vs2                 = next_lmul(csr_vtype_q.vlmul);
+                    ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                     ara_req_d.conversion_vs1 = OpQueueConversionSExt2;
-                    ara_req_d.eew_vs2        = vtype_q.vsew.next();
+                    ara_req_d.eew_vs2        = csr_vtype_q.vsew.next();
                     ara_req_d.cvt_resize     = CVT_WIDE;
                   end
                   6'b110110: begin // VWSUBU.W
                     ara_req_d.op             = ara_pkg::VSUB;
-                    ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                    lmul_vs2                 = next_lmul(vtype_q.vlmul);
-                    ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                    ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                    lmul_vs2                 = next_lmul(csr_vtype_q.vlmul);
+                    ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
-                    ara_req_d.eew_vs2        = vtype_q.vsew.next();
+                    ara_req_d.eew_vs2        = csr_vtype_q.vsew.next();
                     ara_req_d.cvt_resize     = CVT_WIDE;
                   end
                   6'b110111: begin // VWSUB.W
                     ara_req_d.op             = ara_pkg::VSUB;
-                    ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                    lmul_vs2                 = next_lmul(vtype_q.vlmul);
-                    ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                    ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                    lmul_vs2                 = next_lmul(csr_vtype_q.vlmul);
+                    ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                     ara_req_d.conversion_vs1 = OpQueueConversionSExt2;
-                    ara_req_d.eew_vs2        = vtype_q.vsew.next();
+                    ara_req_d.eew_vs2        = csr_vtype_q.vsew.next();
                     ara_req_d.cvt_resize     = CVT_WIDE;
                   end
                   6'b111000: begin // VWMULU
                     ara_req_d.op             = ara_pkg::VMUL;
-                    ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                    ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                    ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                    ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionZExt2;
                     ara_req_d.cvt_resize     = CVT_WIDE;
                   end
                   6'b111010: begin // VWMULSU
                     ara_req_d.op             = ara_pkg::VMUL;
-                    ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                    ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                    ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                    ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionSExt2;
                     ara_req_d.cvt_resize     = CVT_WIDE;
                   end
                   6'b111011: begin // VWMUL
                     ara_req_d.op             = ara_pkg::VMUL;
-                    ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                    ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                    ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                    ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                     ara_req_d.conversion_vs1 = OpQueueConversionSExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionSExt2;
                     ara_req_d.cvt_resize     = CVT_WIDE;
@@ -1536,31 +1541,31 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                   6'b111100: begin // VWMACCU
                     ara_req_d.op             = ara_pkg::VMACC;
                     ara_req_d.use_vd_op      = 1'b1;
-                    ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                    ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                    ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                    ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionZExt2;
-                    ara_req_d.eew_vd_op      = vtype_q.vsew.next();
+                    ara_req_d.eew_vd_op      = csr_vtype_q.vsew.next();
                     ara_req_d.cvt_resize     = CVT_WIDE;
                   end
                   6'b111101: begin // VWMACC
                     ara_req_d.op             = ara_pkg::VMACC;
                     ara_req_d.use_vd_op      = 1'b1;
-                    ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                    ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                    ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                    ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                     ara_req_d.conversion_vs1 = OpQueueConversionSExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionSExt2;
-                    ara_req_d.eew_vd_op      = vtype_q.vsew.next();
+                    ara_req_d.eew_vd_op      = csr_vtype_q.vsew.next();
                     ara_req_d.cvt_resize     = CVT_WIDE;
                   end
                   6'b111111: begin // VWMACCSU
                     ara_req_d.op             = ara_pkg::VMACC;
                     ara_req_d.use_vd_op      = 1'b1;
-                    ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                    ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                    ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                    ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                     ara_req_d.conversion_vs1 = OpQueueConversionSExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionZExt2;
-                    ara_req_d.eew_vd_op      = vtype_q.vsew.next();
+                    ara_req_d.eew_vd_op      = csr_vtype_q.vsew.next();
                     ara_req_d.cvt_resize     = CVT_WIDE;
                   end
                   default: illegal_insn = 1'b1;
@@ -1594,7 +1599,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                 if (int'(ara_req_d.vtype.vsew) > int'(EW64)) illegal_insn = 1'b1;
 
                 // Instruction is invalid if the vtype is invalid
-                if (vtype_q.vill) illegal_insn = 1'b1;
+                if (csr_vtype_q.vill) illegal_insn = 1'b1;
               end
 
               OPMVX: begin: opmvx
@@ -1619,17 +1624,17 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                   6'b001110: begin // vslide1up
                     ara_req_d.op      = ara_pkg::VSLIDEUP;
                     ara_req_d.stride  = 1;
-                    ara_req_d.eew_vs2 = vtype_q.vsew;
+                    ara_req_d.eew_vs2 = csr_vtype_q.vsew;
                     // Request will need reshuffling
                     ara_req_d.scale_vl = 1'b1;
                     // If stride > vl, the vslideup has no effects
-                    if (|ara_req_d.stride[$bits(ara_req_d.stride)-1:$bits(vl_q)] ||
-                      (vlen_t'(ara_req_d.stride) >= vl_q)) null_vslideup = 1'b1;
+                    if (|ara_req_d.stride[$bits(ara_req_d.stride)-1:$bits(csr_vl_q)] ||
+                      (vlen_t'(ara_req_d.stride) >= csr_vl_q)) null_vslideup = 1'b1;
                   end
                   6'b001111: begin // vslide1down
                     ara_req_d.op      = ara_pkg::VSLIDEDOWN;
                     ara_req_d.stride  = 1;
-                    ara_req_d.eew_vs2 = vtype_q.vsew;
+                    ara_req_d.eew_vs2 = csr_vtype_q.vsew;
                     // Request will need reshuffling
                     ara_req_d.scale_vl = 1'b1;
                   end
@@ -1637,7 +1642,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     // vmv.s.x
                     ara_req_d.op      = ara_pkg::VMVSX;
                     ara_req_d.use_vs2 = 1'b0;
-                    ara_req_d.vl      = |vl_q ? 1 : '0;
+                    ara_req_d.vl      = |csr_vl_q ? 1 : '0;
                     // This instruction ignores LMUL checks
                     skip_lmul_checks  = 1'b1;
                   end
@@ -1675,92 +1680,92 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                   // Widening instructions
                   6'b110000: begin // VWADDU
                     ara_req_d.op             = ara_pkg::VADD;
-                    ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                    ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                    ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                    ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionZExt2;
                     ara_req_d.cvt_resize     = CVT_WIDE;
                   end
                   6'b110001: begin // VWADD
                     ara_req_d.op             = ara_pkg::VADD;
-                    ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                    ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                    ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                    ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                     ara_req_d.conversion_vs1 = OpQueueConversionSExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionSExt2;
                     ara_req_d.cvt_resize     = CVT_WIDE;
                   end
                   6'b110010: begin // VWSUBU
                     ara_req_d.op             = ara_pkg::VSUB;
-                    ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                    ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                    ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                    ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionZExt2;
                     ara_req_d.cvt_resize     = CVT_WIDE;
                   end
                   6'b110011: begin // VWSUB
                     ara_req_d.op             = ara_pkg::VSUB;
-                    ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                    ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                    ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                    ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                     ara_req_d.conversion_vs1 = OpQueueConversionSExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionSExt2;
                     ara_req_d.cvt_resize     = CVT_WIDE;
                   end
                   6'b110100: begin // VWADDU.W
                     ara_req_d.op             = ara_pkg::VADD;
-                    ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                    lmul_vs2                 = next_lmul(vtype_q.vlmul);
-                    ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                    ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                    lmul_vs2                 = next_lmul(csr_vtype_q.vlmul);
+                    ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
-                    ara_req_d.eew_vs2        = vtype_q.vsew.next();
+                    ara_req_d.eew_vs2        = csr_vtype_q.vsew.next();
                     ara_req_d.cvt_resize     = CVT_WIDE;
                   end
                   6'b110101: begin // VWADD.W
                     ara_req_d.op             = ara_pkg::VADD;
-                    ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                    lmul_vs2                 = next_lmul(vtype_q.vlmul);
-                    ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                    ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                    lmul_vs2                 = next_lmul(csr_vtype_q.vlmul);
+                    ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                     ara_req_d.conversion_vs1 = OpQueueConversionSExt2;
-                    ara_req_d.eew_vs2        = vtype_q.vsew.next();
+                    ara_req_d.eew_vs2        = csr_vtype_q.vsew.next();
                     ara_req_d.cvt_resize     = CVT_WIDE;
                   end
                   6'b110110: begin // VWSUBU.W
                     ara_req_d.op             = ara_pkg::VSUB;
-                    ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                    lmul_vs2                 = next_lmul(vtype_q.vlmul);
-                    ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                    ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                    lmul_vs2                 = next_lmul(csr_vtype_q.vlmul);
+                    ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
-                    ara_req_d.eew_vs2        = vtype_q.vsew.next();
+                    ara_req_d.eew_vs2        = csr_vtype_q.vsew.next();
                     ara_req_d.cvt_resize     = CVT_WIDE;
                   end
                   6'b110111: begin // VWSUB.W
                     ara_req_d.op             = ara_pkg::VSUB;
-                    ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                    lmul_vs2                 = next_lmul(vtype_q.vlmul);
-                    ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                    ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                    lmul_vs2                 = next_lmul(csr_vtype_q.vlmul);
+                    ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                     ara_req_d.conversion_vs1 = OpQueueConversionSExt2;
-                    ara_req_d.eew_vs2        = vtype_q.vsew.next();
+                    ara_req_d.eew_vs2        = csr_vtype_q.vsew.next();
                     ara_req_d.cvt_resize     = CVT_WIDE;
                   end
                   6'b111000: begin // VWMULU
                     ara_req_d.op             = ara_pkg::VMUL;
-                    ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                    ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                    ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                    ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionZExt2;
                     ara_req_d.cvt_resize     = CVT_WIDE;
                   end
                   6'b111010: begin // VWMULSU
                     ara_req_d.op             = ara_pkg::VMUL;
-                    ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                    ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                    ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                    ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionSExt2;
                     ara_req_d.cvt_resize     = CVT_WIDE;
                   end
                   6'b111011: begin // VWMUL
                     ara_req_d.op             = ara_pkg::VMUL;
-                    ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                    ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                    ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                    ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                     ara_req_d.conversion_vs1 = OpQueueConversionSExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionSExt2;
                     ara_req_d.cvt_resize     = CVT_WIDE;
@@ -1768,41 +1773,41 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                   6'b111100: begin // VWMACCU
                     ara_req_d.op             = ara_pkg::VMACC;
                     ara_req_d.use_vd_op      = 1'b1;
-                    ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                    ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                    ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                    ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionZExt2;
-                    ara_req_d.eew_vd_op      = vtype_q.vsew.next();
+                    ara_req_d.eew_vd_op      = csr_vtype_q.vsew.next();
                     ara_req_d.cvt_resize     = CVT_WIDE;
                   end
                   6'b111101: begin // VWMACC
                     ara_req_d.op             = ara_pkg::VMACC;
                     ara_req_d.use_vd_op      = 1'b1;
-                    ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                    ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                    ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                    ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                     ara_req_d.conversion_vs1 = OpQueueConversionSExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionSExt2;
-                    ara_req_d.eew_vd_op      = vtype_q.vsew.next();
+                    ara_req_d.eew_vd_op      = csr_vtype_q.vsew.next();
                     ara_req_d.cvt_resize     = CVT_WIDE;
                   end
                   6'b111110: begin // VWMACCUS
                     ara_req_d.op             = ara_pkg::VMACC;
                     ara_req_d.use_vd_op      = 1'b1;
-                    ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                    ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                    ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                    ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionSExt2;
-                    ara_req_d.eew_vd_op      = vtype_q.vsew.next();
+                    ara_req_d.eew_vd_op      = csr_vtype_q.vsew.next();
                     ara_req_d.cvt_resize     = CVT_WIDE;
                   end
                   6'b111111: begin // VWMACCSU
                     ara_req_d.op             = ara_pkg::VMACC;
                     ara_req_d.use_vd_op      = 1'b1;
-                    ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                    ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                    ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                    ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                     ara_req_d.conversion_vs1 = OpQueueConversionSExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionZExt2;
-                    ara_req_d.eew_vd_op      = vtype_q.vsew.next();
+                    ara_req_d.eew_vd_op      = csr_vtype_q.vsew.next();
                     ara_req_d.cvt_resize     = CVT_WIDE;
                   end
                   default: illegal_insn = 1'b1;
@@ -1830,7 +1835,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                 if (int'(ara_req_d.vtype.vsew) > int'(EW64)) illegal_insn = 1'b1;
 
                 // Instruction is invalid if the vtype is invalid
-                if (vtype_q.vill) illegal_insn = 1'b1;
+                if (csr_vtype_q.vill) illegal_insn = 1'b1;
               end
 
               OPFVV: begin: opfvv
@@ -1900,7 +1905,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                       ignore_zero_vl_check = 1'b1;
 
                       // Zero-extend operands
-                      unique case (vtype_q.vsew)
+                      unique case (csr_vtype_q.vsew)
                         EW16: begin
                           ara_req_d.conversion_vs2 = OpQueueConversionZExt4;
                         end
@@ -1911,13 +1916,14 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                       endcase
 
                       // Wait until the back-end answers to acknowledge those instructions
-                      if (ara_resp_valid_i) begin
-                        acc_resp_o.req_ready   = 1'b1;
-                        acc_resp_o.result = ara_resp_i.resp;
-                        acc_resp_o.error  = ara_resp_i.error;
-                        acc_resp_o.resp_valid  = 1'b1;
-                        ara_req_valid_d   = 1'b0;
-                      end
+                      if ( ara_resp_valid_i ) begin : ara_resp_valid
+                        acc_resp_o.req_ready  = 1'b1;
+                        acc_resp_o.resp_valid = 1'b1;
+                        acc_resp_o.result     = ara_resp_i.resp;
+                        acc_resp_o.exception  = ara_resp_i.exception;
+                        // Clear request to backend
+                        ara_req_valid_d       = 1'b0;
+                      end : ara_resp_valid
                     end
                     6'b011000: ara_req_d.op = ara_pkg::VMFEQ;
                     6'b011001: ara_req_d.op = ara_pkg::VMFLE;
@@ -1938,96 +1944,95 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                         5'b01000: begin // Widening VFCVTXUF
                           ara_req_d.op             = VFCVTXUF;
                           ara_req_d.cvt_resize     = CVT_WIDE;
-                          ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                          ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                          ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                          ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                           ara_req_d.conversion_vs2 = OpQueueAdjustFPCvt;
                         end
                         5'b01001: begin // Widening VFCVTXF
                           ara_req_d.op             = VFCVTXF;
                           ara_req_d.cvt_resize     = CVT_WIDE;
-                          ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                          ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                          ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                          ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                           ara_req_d.conversion_vs2 = OpQueueAdjustFPCvt;
                         end
                         5'b01010: begin // Widening VFCVTFXU
                           ara_req_d.op             = VFCVTFXU;
                           ara_req_d.cvt_resize     = CVT_WIDE;
-                          ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                          ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                          ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                          ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                           ara_req_d.conversion_vs2 = OpQueueAdjustFPCvt;
                         end
                         5'b01011: begin // Widening VFCVTFX
                           ara_req_d.op             = VFCVTFX;
                           ara_req_d.cvt_resize     = CVT_WIDE;
-                          ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                          ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                          ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                          ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                           ara_req_d.conversion_vs2 = OpQueueAdjustFPCvt;
                         end
                         5'b01100: begin // Widening VFCVTFF
                           ara_req_d.op             = VFCVTFF;
                           ara_req_d.cvt_resize     = CVT_WIDE;
-                          ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                          ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                          ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                          ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                           ara_req_d.conversion_vs2 = OpQueueAdjustFPCvt;
                         end
                         5'b01110: begin // Widening VFCVTRTZXUF
                           ara_req_d.op             = VFCVTRTZXUF;
                           ara_req_d.cvt_resize     = CVT_WIDE;
-                          ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                          ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                          ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                          ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                           ara_req_d.conversion_vs2 = OpQueueAdjustFPCvt;
                         end
                         5'b01111: begin // Widening VFCVTRTZXF
                           ara_req_d.op             = VFCVTRTZXF;
                           ara_req_d.cvt_resize     = CVT_WIDE;
-                          ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                          ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                          ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                          ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                           ara_req_d.conversion_vs2 = OpQueueAdjustFPCvt;
                         end
                         5'b10000: begin // Narrowing VFCVTXUF
                           ara_req_d.op             = VFCVTXUF;
                           ara_req_d.cvt_resize     = CVT_NARROW;
-                          ara_req_d.eew_vs2        = vtype_q.vsew.next();
+                          ara_req_d.eew_vs2        = csr_vtype_q.vsew.next();
                         end
                         5'b10001: begin // Narrowing VFCVTXF
                           ara_req_d.op             = VFCVTXF;
                           ara_req_d.cvt_resize     = CVT_NARROW;
-                          ara_req_d.eew_vs2        = vtype_q.vsew.next();
+                          ara_req_d.eew_vs2        = csr_vtype_q.vsew.next();
                         end
                         5'b10010: begin // Narrowing VFCVTFXU
                           ara_req_d.op             = VFCVTFXU;
                           ara_req_d.cvt_resize     = CVT_NARROW;
-                          ara_req_d.eew_vs2        = vtype_q.vsew.next();
+                          ara_req_d.eew_vs2        = csr_vtype_q.vsew.next();
                         end
                         5'b10011: begin // Narrowing VFCVTFX
                           ara_req_d.op             = VFCVTFX;
                           ara_req_d.cvt_resize     = CVT_NARROW;
-                          ara_req_d.eew_vs2        = vtype_q.vsew.next();
+                          ara_req_d.eew_vs2        = csr_vtype_q.vsew.next();
                         end
                         5'b10100: begin // Narrowing VFCVTFF
                           ara_req_d.op             = VFCVTFF;
                           ara_req_d.cvt_resize     = CVT_NARROW;
-                          ara_req_d.eew_vs2        = vtype_q.vsew.next();
+                          ara_req_d.eew_vs2        = csr_vtype_q.vsew.next();
                         end
                         5'b10101: begin // Narrowing VFNCVTRODFF
                           ara_req_d.op             = VFNCVTRODFF;
                           ara_req_d.cvt_resize     = CVT_NARROW;
-                          ara_req_d.eew_vs2        = vtype_q.vsew.next();
+                          ara_req_d.eew_vs2        = csr_vtype_q.vsew.next();
                         end
                         5'b10110: begin // Narrowing VFCVTRTZXUF
                           ara_req_d.op             = VFCVTRTZXUF;
                           ara_req_d.cvt_resize     = CVT_NARROW;
-                          ara_req_d.eew_vs2        = vtype_q.vsew.next();
+                          ara_req_d.eew_vs2        = csr_vtype_q.vsew.next();
                         end
                         5'b10111: begin // Narrowing VFCVTRTZXF
                           ara_req_d.op             = VFCVTRTZXF;
                           ara_req_d.cvt_resize     = CVT_NARROW;
-                          ara_req_d.eew_vs2        = vtype_q.vsew.next();
+                          ara_req_d.eew_vs2        = csr_vtype_q.vsew.next();
                         end
                         default: begin
                           // Trigger an error
-                          acc_resp_o.error = 1'b1;
-                          ara_req_valid_d  = 1'b0;
+                          illegal_insn = 1'b1;
                         end
                       endcase
                     end
@@ -2090,99 +2095,99 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     6'b110000: begin // VFWADD
                       ara_req_d.op             = ara_pkg::VFADD;
                       ara_req_d.swap_vs2_vd_op = 1'b1;
-                      ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                      ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                      ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                      ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                       ara_req_d.conversion_vs1 = OpQueueConversionWideFP2;
                       ara_req_d.conversion_vs2 = OpQueueConversionWideFP2;
                     end
                     6'b110001: begin // VFWREDUSUM
                       ara_req_d.op             = ara_pkg::VFWREDUSUM;
                       ara_req_d.swap_vs2_vd_op = 1'b1;
-                      ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                      ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                      ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                      ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                       ara_req_d.conversion_vs1 = OpQueueReductionZExt;
                       ara_req_d.conversion_vs2 = OpQueueConversionWideFP2;
-                      ara_req_d.eew_vs1        = vtype_q.vsew.next();
+                      ara_req_d.eew_vs1        = csr_vtype_q.vsew.next();
                       ara_req_d.cvt_resize     = resize_e'(2'b00);
                     end
                     6'b110010: begin // VFWSUB
                       ara_req_d.op             = ara_pkg::VFSUB;
                       ara_req_d.swap_vs2_vd_op = 1'b1;
-                      ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                      ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                      ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                      ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                       ara_req_d.conversion_vs1 = OpQueueConversionWideFP2;
                       ara_req_d.conversion_vs2 = OpQueueConversionWideFP2;
                     end
                     6'b110011: begin // VFWREDOSUM
                       ara_req_d.op             = ara_pkg::VFWREDOSUM;
                       ara_req_d.swap_vs2_vd_op = 1'b1;
-                      ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                      ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                      ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                      ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                       ara_req_d.conversion_vs1 = OpQueueReductionZExt;
                       ara_req_d.conversion_vs2 = OpQueueConversionWideFP2;
-                      ara_req_d.eew_vs1        = vtype_q.vsew.next();
+                      ara_req_d.eew_vs1        = csr_vtype_q.vsew.next();
                       ara_req_d.cvt_resize     = resize_e'(2'b00);
                     end
                     6'b110100: begin // VFWADD.W
                       ara_req_d.op             = ara_pkg::VFADD;
                       ara_req_d.swap_vs2_vd_op = 1'b1;
-                      ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                      lmul_vs2                 = next_lmul(vtype_q.vlmul);
-                      ara_req_d.vtype.vsew     = vtype_q.vsew.next();
-                      ara_req_d.eew_vs2        = vtype_q.vsew.next();
+                      ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                      lmul_vs2                 = next_lmul(csr_vtype_q.vlmul);
+                      ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
+                      ara_req_d.eew_vs2        = csr_vtype_q.vsew.next();
                       ara_req_d.conversion_vs1 = OpQueueConversionWideFP2;
                     end
                     6'b110110: begin // VFWSUB.W
                       ara_req_d.op             = ara_pkg::VFSUB;
                       ara_req_d.swap_vs2_vd_op = 1'b1;
-                      ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                      lmul_vs2                 = next_lmul(vtype_q.vlmul);
-                      ara_req_d.vtype.vsew     = vtype_q.vsew.next();
-                      ara_req_d.eew_vs2        = vtype_q.vsew.next();
+                      ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                      lmul_vs2                 = next_lmul(csr_vtype_q.vlmul);
+                      ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
+                      ara_req_d.eew_vs2        = csr_vtype_q.vsew.next();
                       ara_req_d.conversion_vs1 = OpQueueConversionWideFP2;
                     end
                     6'b111000: begin // VFWMUL
                       ara_req_d.op             = ara_pkg::VFMUL;
-                      ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                      ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                      ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                      ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                       ara_req_d.conversion_vs1 = OpQueueConversionWideFP2;
                       ara_req_d.conversion_vs2 = OpQueueConversionWideFP2;
                     end
                     6'b111100: begin // VFWMACC
                       ara_req_d.op             = ara_pkg::VFMACC;
                       ara_req_d.use_vd_op      = 1'b1;
-                      ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                      ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                      ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                      ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                       ara_req_d.conversion_vs1 = OpQueueConversionWideFP2;
                       ara_req_d.conversion_vs2 = OpQueueConversionWideFP2;
-                      ara_req_d.eew_vd_op      = vtype_q.vsew.next();
+                      ara_req_d.eew_vd_op      = csr_vtype_q.vsew.next();
                     end
                     6'b111101: begin // VFWNMACC
                       ara_req_d.op             = ara_pkg::VFNMACC;
                       ara_req_d.use_vd_op      = 1'b1;
-                      ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                      ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                      ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                      ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                       ara_req_d.conversion_vs1 = OpQueueConversionWideFP2;
                       ara_req_d.conversion_vs2 = OpQueueConversionWideFP2;
-                      ara_req_d.eew_vd_op      = vtype_q.vsew.next();
+                      ara_req_d.eew_vd_op      = csr_vtype_q.vsew.next();
                     end
                     6'b111110: begin // VFWMSAC
                       ara_req_d.op             = ara_pkg::VFMSAC;
                       ara_req_d.use_vd_op      = 1'b1;
-                      ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                      ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                      ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                      ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                       ara_req_d.conversion_vs1 = OpQueueConversionWideFP2;
                       ara_req_d.conversion_vs2 = OpQueueConversionWideFP2;
-                      ara_req_d.eew_vd_op      = vtype_q.vsew.next();
+                      ara_req_d.eew_vd_op      = csr_vtype_q.vsew.next();
                     end
                     6'b111111: begin // VFWNMSAC
                       ara_req_d.op             = ara_pkg::VFNMSAC;
                       ara_req_d.use_vd_op      = 1'b1;
-                      ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                      ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                      ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                      ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                       ara_req_d.conversion_vs1 = OpQueueConversionWideFP2;
                       ara_req_d.conversion_vs2 = OpQueueConversionWideFP2;
-                      ara_req_d.eew_vd_op      = vtype_q.vsew.next();
+                      ara_req_d.eew_vd_op      = csr_vtype_q.vsew.next();
                     end
                     default: illegal_insn = 1'b1;
                   endcase
@@ -2238,7 +2243,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                   endcase
 
                   // Instruction is invalid if the vtype is invalid
-                  if (vtype_q.vill) illegal_insn = 1'b1;
+                  if (csr_vtype_q.vill) illegal_insn = 1'b1;
                 end else illegal_insn = 1'b1; // Vector FP instructions are disabled
               end
 
@@ -2277,17 +2282,17 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     6'b001110: begin // vfslide1up
                       ara_req_d.op     = ara_pkg::VSLIDEUP;
                       ara_req_d.stride = 1;
-                    ara_req_d.eew_vs2  = vtype_q.vsew;
+                    ara_req_d.eew_vs2  = csr_vtype_q.vsew;
                     // Request will need reshuffling
                     ara_req_d.scale_vl = 1'b1;
                     // If stride > vl, the vslideup has no effects
-                    if (|ara_req_d.stride[$bits(ara_req_d.stride)-1:$bits(vl_q)] ||
-                      (vlen_t'(ara_req_d.stride) >= vl_q)) null_vslideup = 1'b1;
+                    if (|ara_req_d.stride[$bits(ara_req_d.stride)-1:$bits(csr_vl_q)] ||
+                      (vlen_t'(ara_req_d.stride) >= csr_vl_q)) null_vslideup = 1'b1;
                     end
                     6'b001111: begin // vfslide1down
                       ara_req_d.op     = ara_pkg::VSLIDEDOWN;
                       ara_req_d.stride = 1;
-                    ara_req_d.eew_vs2  = vtype_q.vsew;
+                    ara_req_d.eew_vs2  = csr_vtype_q.vsew;
                     // Request will need reshuffling
                     ara_req_d.scale_vl = 1'b1;
                     end
@@ -2295,7 +2300,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                       // vmv.s.f
                       ara_req_d.op      = ara_pkg::VFMVSF;
                       ara_req_d.use_vs2 = 1'b0;
-                      ara_req_d.vl      = |vl_q ? 1 : '0;
+                      ara_req_d.vl      = |csr_vl_q ? 1 : '0;
                       // This instruction ignores LMUL checks
                       skip_lmul_checks  = 1'b1;
                     end
@@ -2356,85 +2361,85 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     6'b110000: begin // VFWADD
                       ara_req_d.op             = ara_pkg::VFADD;
                       ara_req_d.swap_vs2_vd_op = 1'b1;
-                      ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                      ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                      ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                      ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                       ara_req_d.conversion_vs2 = OpQueueConversionWideFP2;
                       ara_req_d.wide_fp_imm    = 1'b1;
                     end
                     6'b110010: begin // VFWSUB
                       ara_req_d.op             = ara_pkg::VFSUB;
                       ara_req_d.swap_vs2_vd_op = 1'b1;
-                      ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                      ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                      ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                      ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                       ara_req_d.conversion_vs2 = OpQueueConversionWideFP2;
                       ara_req_d.wide_fp_imm    = 1'b1;
                     end
                     6'b110100: begin // VFWADD.W
                       ara_req_d.op             = ara_pkg::VFADD;
                       ara_req_d.swap_vs2_vd_op = 1'b1;
-                      ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                      lmul_vs2                 = next_lmul(vtype_q.vlmul);
-                      ara_req_d.vtype.vsew     = vtype_q.vsew.next();
-                      ara_req_d.eew_vs2        = vtype_q.vsew.next();
+                      ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                      lmul_vs2                 = next_lmul(csr_vtype_q.vlmul);
+                      ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
+                      ara_req_d.eew_vs2        = csr_vtype_q.vsew.next();
                       ara_req_d.wide_fp_imm    = 1'b1;
                     end
                     6'b110110: begin // VFWSUB.W
                       ara_req_d.op             = ara_pkg::VFSUB;
                       ara_req_d.swap_vs2_vd_op = 1'b1;
-                      ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                      lmul_vs2                 = next_lmul(vtype_q.vlmul);
-                      ara_req_d.vtype.vsew     = vtype_q.vsew.next();
-                      ara_req_d.eew_vs2        = vtype_q.vsew.next();
+                      ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                      lmul_vs2                 = next_lmul(csr_vtype_q.vlmul);
+                      ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
+                      ara_req_d.eew_vs2        = csr_vtype_q.vsew.next();
                       ara_req_d.wide_fp_imm    = 1'b1;
                     end
                     6'b111000: begin // VFWMUL
                       ara_req_d.op             = ara_pkg::VFMUL;
-                      ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                      ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                      ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                      ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                       ara_req_d.conversion_vs2 = OpQueueConversionWideFP2;
                       ara_req_d.wide_fp_imm    = 1'b1;
                     end
                     6'b111100: begin // VFWMACC
                       ara_req_d.op             = ara_pkg::VFMACC;
                       ara_req_d.use_vd_op      = 1'b1;
-                      ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                      ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                      ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                      ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                       ara_req_d.conversion_vs2 = OpQueueConversionWideFP2;
                       ara_req_d.wide_fp_imm    = 1'b1;
-                      ara_req_d.eew_vd_op      = vtype_q.vsew.next();
+                      ara_req_d.eew_vd_op      = csr_vtype_q.vsew.next();
                     end
                     6'b111101: begin // VFWNMACC
                       ara_req_d.op             = ara_pkg::VFNMACC;
                       ara_req_d.use_vd_op      = 1'b1;
-                      ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                      ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                      ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                      ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                       ara_req_d.conversion_vs2 = OpQueueConversionWideFP2;
                       ara_req_d.wide_fp_imm    = 1'b1;
-                      ara_req_d.eew_vd_op      = vtype_q.vsew.next();
+                      ara_req_d.eew_vd_op      = csr_vtype_q.vsew.next();
                     end
                     6'b111110: begin // VFWMSAC
                       ara_req_d.op             = ara_pkg::VFMSAC;
                       ara_req_d.use_vd_op      = 1'b1;
-                      ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                      ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                      ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                      ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                       ara_req_d.conversion_vs2 = OpQueueConversionWideFP2;
                       ara_req_d.wide_fp_imm    = 1'b1;
-                      ara_req_d.eew_vd_op      = vtype_q.vsew.next();
+                      ara_req_d.eew_vd_op      = csr_vtype_q.vsew.next();
                     end
                     6'b111111: begin // VFWNMSAC
                       ara_req_d.op             = ara_pkg::VFNMSAC;
                       ara_req_d.use_vd_op      = 1'b1;
-                      ara_req_d.emul           = next_lmul(vtype_q.vlmul);
-                      ara_req_d.vtype.vsew     = vtype_q.vsew.next();
+                      ara_req_d.emul           = next_lmul(csr_vtype_q.vlmul);
+                      ara_req_d.vtype.vsew     = csr_vtype_q.vsew.next();
                       ara_req_d.conversion_vs2 = OpQueueConversionWideFP2;
                       ara_req_d.wide_fp_imm    = 1'b1;
-                      ara_req_d.eew_vd_op      = vtype_q.vsew.next();
+                      ara_req_d.eew_vd_op      = csr_vtype_q.vsew.next();
                     end
                     default: illegal_insn = 1'b1;
                   endcase
 
                   // Check if the FP scalar operand is NaN-boxed. If not, replace it with a NaN.
-                  case (vtype_q.vsew)
+                  case (csr_vtype_q.vsew)
                     EW16: if (~(&acc_req_i.rs1[63:16])) ara_req_d.scalar_op = 64'h0000000000007e00;
                     EW32: if (~(&acc_req_i.rs1[63:32])) ara_req_d.scalar_op = 64'h000000007fc00000;
                   endcase
@@ -2477,17 +2482,17 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                   endcase
 
                   // Instruction is invalid if the vtype is invalid
-                  if (vtype_q.vill) illegal_insn = 1'b1;
+                  if (csr_vtype_q.vill) illegal_insn = 1'b1;
                 end else illegal_insn = 1'b1; // Vector FP instructions are disabled
               end
             endcase
-          end
+          end : OpcodeVec
 
           ////////////////////
           //  Vector Loads  //
           ////////////////////
 
-          riscv::OpcodeLoadFp: begin
+          riscv::OpcodeLoadFp: begin : OpcodeLoadFp
             // Instruction is of one of the RVV types
             automatic rvv_instruction_t insn = rvv_instruction_t'(acc_req_i.insn.instr);
 
@@ -2511,7 +2516,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                   if (insn.vmem_type.mop != 2'b01 && insn.vmem_type.mop != 2'b11) begin
                     ara_req_d.vtype.vsew = EW8;
                   end else begin
-                    ara_req_d.vtype.vsew = vtype_q.vsew;
+                    ara_req_d.vtype.vsew = csr_vtype_q.vsew;
                     ara_req_d.eew_vs2    = EW8;
                   end
               end
@@ -2519,7 +2524,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                   if (insn.vmem_type.mop != 2'b01 && insn.vmem_type.mop != 2'b11) begin
                     ara_req_d.vtype.vsew = EW16;
                   end else begin
-                    ara_req_d.vtype.vsew = vtype_q.vsew;
+                    ara_req_d.vtype.vsew = csr_vtype_q.vsew;
                     ara_req_d.eew_vs2    = EW16;
                   end
               end
@@ -2527,7 +2532,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                   if (insn.vmem_type.mop != 2'b01 && insn.vmem_type.mop != 2'b11) begin
                     ara_req_d.vtype.vsew = EW32;
                   end else begin
-                    ara_req_d.vtype.vsew = vtype_q.vsew;
+                    ara_req_d.vtype.vsew = csr_vtype_q.vsew;
                     ara_req_d.eew_vs2    = EW32;
                   end
               end
@@ -2535,15 +2540,15 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                   if (insn.vmem_type.mop != 2'b01 && insn.vmem_type.mop != 2'b11) begin
                     ara_req_d.vtype.vsew = EW64;
                   end else begin
-                    ara_req_d.vtype.vsew = vtype_q.vsew;
+                    ara_req_d.vtype.vsew = csr_vtype_q.vsew;
                     ara_req_d.eew_vs2    = EW64;
                   end
               end
               default: begin // Invalid. Element is too wide, or encoding is non-existant.
                 acc_resp_o.req_ready  = 1'b1;
-                acc_resp_o.error = 1'b1;
                 acc_resp_o.resp_valid = 1'b1;
-                ara_req_valid_d  = 1'b0;
+                illegal_insn          = 1'b1;
+                ara_req_valid_d       = 1'b0;
               end
             endcase
 
@@ -2558,19 +2563,15 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                   5'b01000:;      // Unit-strided, whole registers
                   5'b01011: begin // Unit-strided, mask load, EEW=1
                     // We operate ceil(vl/8) bytes
-                    ara_req_d.vl         = (vl_q >> 3) + |vl_q[2:0];
+                    ara_req_d.vl         = (csr_vl_q >> 3) + |csr_vl_q[2:0];
                     ara_req_d.vtype.vsew = EW8;
                   end
                   5'b10000: begin // Unit-strided, fault-only first
                     // TODO: Not implemented
-                    illegal_insn     = 1'b1;
-                    acc_resp_o.req_ready  = 1'b1;
-                    acc_resp_o.resp_valid = 1'b1;
+                    illegal_insn_load     = 1'b1;
                   end
                   default: begin // Reserved
-                    illegal_insn     = 1'b1;
-                    acc_resp_o.req_ready  = 1'b1;
-                    acc_resp_o.resp_valid = 1'b1;
+                    illegal_insn_load     = 1'b1;
                   end
                 endcase
               end
@@ -2590,24 +2591,22 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
 
             // For memory operations: EMUL = LMUL * (EEW / SEW)
             // EEW is encoded in the instruction
-            ara_req_d.emul = vlmul_e'(vtype_q.vlmul + (ara_req_d.vtype.vsew - vtype_q.vsew));
+            ara_req_d.emul = vlmul_e'(csr_vtype_q.vlmul + (ara_req_d.vtype.vsew - csr_vtype_q.vsew));
 
             // Exception if EMUL > 8 or < 1/8
-            unique case ({vtype_q.vlmul[2], ara_req_d.emul[2]})
+            unique case ({csr_vtype_q.vlmul[2], ara_req_d.emul[2]})
               // The new emul is lower than the previous lmul
               2'b01: begin
                 // But the new eew is greater than vsew
-                if (signed'(ara_req_d.vtype.vsew - vtype_q.vsew) > 0) begin
-                  illegal_insn     = 1'b1;
-                  acc_resp_o.resp_valid = 1'b1;
+                if (signed'(ara_req_d.vtype.vsew - csr_vtype_q.vsew) > 0) begin
+                  illegal_insn_load     = 1'b1;
                 end
               end
               // The new emul is greater than the previous lmul
               2'b10: begin
                 // But the new eew is lower than vsew
-                if (signed'(ara_req_d.vtype.vsew - vtype_q.vsew) < 0) begin
-                  illegal_insn     = 1'b1;
-                  acc_resp_o.resp_valid = 1'b1;
+                if (signed'(ara_req_d.vtype.vsew - csr_vtype_q.vsew) < 0) begin
+                  illegal_insn_load     = 1'b1;
                 end
               end
               default:;
@@ -2617,20 +2616,16 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
             // access.
             unique case (ara_req_d.emul)
               LMUL_2: if ((insn.varith_type.rd & 5'b00001) != 5'b00000) begin
-                illegal_insn     = 1'b1;
-                acc_resp_o.resp_valid = 1'b1;
+                illegal_insn_load     = 1'b1;
               end
               LMUL_4: if ((insn.varith_type.rd & 5'b00011) != 5'b00000) begin
-                illegal_insn     = 1'b1;
-                acc_resp_o.resp_valid = 1'b1;
+                illegal_insn_load     = 1'b1;
               end
               LMUL_8: if ((insn.varith_type.rd & 5'b00111) != 5'b00000) begin
-                illegal_insn     = 1'b1;
-                acc_resp_o.resp_valid = 1'b1;
+                illegal_insn_load     = 1'b1;
               end
               LMUL_RSVD: begin
-                illegal_insn     = 1'b1;
-                acc_resp_o.resp_valid = 1'b1;
+                illegal_insn_load     = 1'b1;
               end
               default:;
             endcase
@@ -2640,9 +2635,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
               // Execute also if vl == 0
               ignore_zero_vl_check = 1'b1;
               // The LMUL value is kept in the instruction itself
-              illegal_insn     = 1'b0;
-              acc_resp_o.req_ready  = 1'b0;
-              acc_resp_o.resp_valid = 1'b0;
+              illegal_insn_load     = 1'b0;
               ara_req_valid_d  = 1'b1;
 
               // Maximum vector length. VLMAX = nf * VLEN / EW8.
@@ -2666,22 +2659,23 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                 end
                 default: begin
                   // Trigger an error for the reserved simm values
-                  illegal_insn     = 1'b1;
+                  illegal_insn_load = 1'b1;
                 end
               endcase
             end
 
             // Wait until the back-end answers to acknowledge those instructions
-            if (ara_resp_valid_i) begin
+            if ( ara_resp_valid_i ) begin : ara_resp_valid
               acc_resp_o.req_ready  = 1'b1;
-              acc_resp_o.error = ara_resp_i.error;
               acc_resp_o.resp_valid = 1'b1;
-              ara_req_valid_d  = 1'b0;
-              // In case of error, modify vstart
-              if (ara_resp_i.error)
-                vstart_d = ara_resp_i.error_vl;
-            end
-          end
+              acc_resp_o.exception  = ara_resp_i.exception;
+              ara_req_valid_d       = 1'b0; // Clear request to backend
+              // In case of exception, modify vstart
+              if ( ara_resp_i.exception.valid ) begin : exception
+                csr_vstart_d = ara_resp_i.exception_vstart;
+              end : exception
+            end : ara_resp_valid
+          end : OpcodeLoadFp
 
           /////////////////////
           //  Vector Stores  //
@@ -2693,7 +2687,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
           // The current vector length refers to the target EEW!
           // Vector stores never re-shuffle the source register!
 
-          riscv::OpcodeStoreFp: begin
+          riscv::OpcodeStoreFp: begin : OpcodeStoreFp
             // Instruction is of one of the RVV types
             automatic rvv_instruction_t insn = rvv_instruction_t'(acc_req_i.insn.instr);
 
@@ -2724,7 +2718,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                   if (insn.vmem_type.mop != 2'b01 && insn.vmem_type.mop != 2'b11) begin
                     ara_req_d.vtype.vsew = EW8; // ara_req_d.vtype.vsew is the target EEW!
                   end else begin
-                    ara_req_d.vtype.vsew = vtype_q.vsew;
+                    ara_req_d.vtype.vsew = csr_vtype_q.vsew;
                     ara_req_d.eew_vs2    = EW8;
                   end
               end
@@ -2732,7 +2726,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                   if (insn.vmem_type.mop != 2'b01 && insn.vmem_type.mop != 2'b11) begin
                     ara_req_d.vtype.vsew = EW16;
                   end else begin
-                    ara_req_d.vtype.vsew = vtype_q.vsew;
+                    ara_req_d.vtype.vsew = csr_vtype_q.vsew;
                     ara_req_d.eew_vs2    = EW16;
                   end
               end
@@ -2740,7 +2734,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                   if (insn.vmem_type.mop != 2'b01 && insn.vmem_type.mop != 2'b11) begin
                     ara_req_d.vtype.vsew = EW32;
                   end else begin
-                    ara_req_d.vtype.vsew = vtype_q.vsew;
+                    ara_req_d.vtype.vsew = csr_vtype_q.vsew;
                     ara_req_d.eew_vs2    = EW32;
                   end
               end
@@ -2748,15 +2742,12 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                   if (insn.vmem_type.mop != 2'b01 && insn.vmem_type.mop != 2'b11) begin
                     ara_req_d.vtype.vsew = EW64;
                   end else begin
-                    ara_req_d.vtype.vsew = vtype_q.vsew;
+                    ara_req_d.vtype.vsew = csr_vtype_q.vsew;
                     ara_req_d.eew_vs2    = EW64;
                   end
               end
               default: begin // Invalid. Element is too wide, or encoding is non-existant.
-                acc_resp_o.req_ready  = 1'b1;
-                acc_resp_o.error = 1'b1;
-                acc_resp_o.resp_valid = 1'b1;
-                ara_req_valid_d  = 1'b0;
+                illegal_insn  = 1'b1;
               end
             endcase
 
@@ -2771,13 +2762,11 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                   5'b01000:;     // Unit-strided, whole registers
                   5'b01011: begin // Unit-strided, mask store, EEW=1
                     // We operate ceil(vl/8) bytes
-                    ara_req_d.vl         = (vl_q >> 3) + |vl_q[2:0];
+                    ara_req_d.vl         = (csr_vl_q >> 3) + |csr_vl_q[2:0];
                     ara_req_d.vtype.vsew = EW8;
                   end
                   default: begin // Reserved
-                    illegal_insn     = 1'b1;
-                    acc_resp_o.req_ready  = 1'b1;
-                    acc_resp_o.resp_valid = 1'b1;
+                    illegal_insn_store    = 1'b1;
                   end
                 endcase
               end
@@ -2797,24 +2786,22 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
 
             // For memory operations: EMUL = LMUL * (EEW / SEW)
             // EEW is encoded in the instruction
-            ara_req_d.emul = vlmul_e'(vtype_q.vlmul + (ara_req_d.vtype.vsew - vtype_q.vsew));
+            ara_req_d.emul = vlmul_e'(csr_vtype_q.vlmul + (ara_req_d.vtype.vsew - csr_vtype_q.vsew));
 
             // Exception if EMUL > 8 or < 1/8
-            unique case ({vtype_q.vlmul[2], ara_req_d.emul[2]})
+            unique case ({csr_vtype_q.vlmul[2], ara_req_d.emul[2]})
               // The new emul is lower than the previous lmul
               2'b01: begin
                 // But the new eew is greater than vsew
-                if (signed'(ara_req_d.vtype.vsew - vtype_q.vsew) > 0) begin
-                  illegal_insn     = 1'b1;
-                  acc_resp_o.resp_valid = 1'b1;
+                if (signed'(ara_req_d.vtype.vsew - csr_vtype_q.vsew) > 0) begin
+                  illegal_insn_store    = 1'b1;
                 end
               end
               // The new emul is greater than the previous lmul
               2'b10: begin
                 // But the new eew is lower than vsew
-                if (signed'(ara_req_d.vtype.vsew - vtype_q.vsew) < 0) begin
-                  illegal_insn     = 1'b1;
-                  acc_resp_o.resp_valid = 1'b1;
+                if (signed'(ara_req_d.vtype.vsew - csr_vtype_q.vsew) < 0) begin
+                  illegal_insn_store    = 1'b1;
                 end
               end
               default:;
@@ -2824,20 +2811,16 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
             // access.
             unique case (ara_req_d.emul)
               LMUL_2: if ((insn.varith_type.rd & 5'b00001) != 5'b00000) begin
-                illegal_insn     = 1'b1;
-                acc_resp_o.resp_valid = 1'b1;
+                illegal_insn_store     = 1'b1;
               end
               LMUL_4: if ((insn.varith_type.rd & 5'b00011) != 5'b00000) begin
-                illegal_insn     = 1'b1;
-                acc_resp_o.resp_valid = 1'b1;
+                illegal_insn_store    = 1'b1;
               end
               LMUL_8: if ((insn.varith_type.rd & 5'b00111) != 5'b00000) begin
-                illegal_insn     = 1'b1;
-                acc_resp_o.resp_valid = 1'b1;
+                illegal_insn_store    = 1'b1;
               end
               LMUL_RSVD: begin
-                  illegal_insn     = 1'b1;
-                  acc_resp_o.resp_valid = 1'b1;
+                  illegal_insn_store    = 1'b1;
               end
               default:;
             endcase
@@ -2869,227 +2852,309 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                 end
                 default: begin
                   // Trigger an error for the reserved simm values
-                  illegal_insn     = 1'b1;
+                  illegal_insn_store = 1'b1;
                 end
               endcase
 
-              illegal_insn     = 1'b0;
+              // illegal_insn_store    = 1'b0; // TODO: IS THIS A BUG?
               acc_resp_o.req_ready  = 1'b0;
               acc_resp_o.resp_valid = 1'b0;
               ara_req_valid_d  = 1'b1;
             end
 
             // Wait until the back-end answers to acknowledge those instructions
-            if (ara_resp_valid_i) begin
+            if ( ara_resp_valid_i ) begin : ara_resp_valid
               acc_resp_o.req_ready  = 1'b1;
-              acc_resp_o.error = ara_resp_i.error;
               acc_resp_o.resp_valid = 1'b1;
-              ara_req_valid_d  = 1'b0;
-              // If there is an error, change vstart
-              if (ara_resp_i.error)
-                vstart_d = ara_resp_i.error_vl;
-            end
-          end
+              acc_resp_o.exception  = ara_resp_i.exception;
+              ara_req_valid_d       = 1'b0; // Clear request to backend
+              // In case of exception, modify vstart
+              if ( ara_resp_i.exception.valid ) begin : exception
+                csr_vstart_d = ara_resp_i.exception_vstart;
+              end : exception
+            end : ara_resp_valid
+          end : OpcodeStoreFp
 
           ////////////////////////////
           //  CSR Reads and Writes  //
           ////////////////////////////
 
-          riscv::OpcodeSystem: begin
-            // These always respond at the same cycle
-            acc_resp_o.resp_valid = 1'b1;
-            is_config        = 1'b1;
-
-            unique case (acc_req_i.insn.itype.funct3)
-              3'b001: begin // csrrw
-                // Decode the CSR.
-                case (riscv::csr_addr_t'(acc_req_i.insn.itype.imm))
-                  // Only vstart can be written with CSR instructions.
-                  riscv::CSR_VSTART: begin
-                    vstart_d          = acc_req_i.rs1;
-                    acc_resp_o.result = vstart_q;
-                  end
-                  riscv::CSR_VXRM: begin
-                    vxrm_d            = vxrm_t'(acc_req_i.rs1[1:0]);
-                    acc_resp_o.result = vlen_t'(vxrm_q);
-                  end
-                  riscv::CSR_VXSAT: begin
-                    vxsat_d           = vxsat_e'(acc_req_i.rs1[0]);
-                    acc_resp_o.result = vlen_t'(vxsat_q);
-                  end
-                  default: acc_resp_o.error = 1'b1;
-                endcase
-              end
-              3'b010: begin // csrrs
-                // Decode the CSR.
-                case (riscv::csr_addr_t'(acc_req_i.insn.itype.imm))
-                  riscv::CSR_VSTART: begin
-                    vstart_d          = vstart_q | vlen_t'(acc_req_i.rs1);
-                    acc_resp_o.result = vstart_q;
-                  end
-                  riscv::CSR_VTYPE: begin
-                    // Only reads are allowed
-                    if (acc_req_i.insn.itype.rs1 == '0) acc_resp_o.result = xlen_vtype(vtype_q);
-                    else acc_resp_o.error                                 = 1'b1;
-                  end
-                  riscv::CSR_VL: begin
-                    // Only reads are allowed
-                    if (acc_req_i.insn.itype.rs1 == '0) acc_resp_o.result = vl_q;
-                    else acc_resp_o.error                                 = 1'b1;
-                  end
-                  riscv::CSR_VLENB: begin
-                    // Only reads are allowed
-                    if (acc_req_i.insn.itype.rs1 == '0) acc_resp_o.result = VLENB;
-                    else acc_resp_o.error                                 = 1'b1;
-                  end
-                  riscv::CSR_VXRM: begin
-                    vxrm_d            = vxrm_q | vxrm_t'(acc_req_i.rs1[1:0]);
-                    acc_resp_o.result = vlen_t'(vxrm_q);
-                  end
-                  riscv::CSR_VXSAT: begin
-                    vxsat_d           = vxsat_q | vxsat_e'(acc_req_i.rs1[0]);
-                    acc_resp_o.result = vlen_t'(vxsat_q);
-                  end
-                  default: acc_resp_o.error = 1'b1;
-                endcase
-              end
-              3'b011: begin // csrrc
-                // Decode the CSR.
-                case (riscv::csr_addr_t'(acc_req_i.insn.itype.imm))
-                  riscv::CSR_VSTART: begin
-                    vstart_d          = vstart_q & ~vlen_t'(acc_req_i.rs1);
-                    acc_resp_o.result = vstart_q;
-                  end
-                  riscv::CSR_VTYPE: begin
-                    // Only reads are allowed
-                    if (acc_req_i.insn.itype.rs1 == '0) acc_resp_o.result = xlen_vtype(vtype_q);
-                    else acc_resp_o.error                                 = 1'b1;
-                  end
-                  riscv::CSR_VL: begin
-                    // Only reads are allowed
-                    if (acc_req_i.insn.itype.rs1 == '0) acc_resp_o.result = vl_q;
-                    else acc_resp_o.error                                 = 1'b1;
-                  end
-                  riscv::CSR_VLENB: begin
-                    // Only reads are allowed
-                    if (acc_req_i.insn.itype.rs1 == '0) acc_resp_o.result = VLENB;
-                    else acc_resp_o.error                                 = 1'b1;
-                  end
-                  riscv::CSR_VXSAT: begin
-                    vxsat_d           = vxsat_q & ~vxsat_e'(acc_req_i.rs1[0]);
-                    acc_resp_o.result = vxsat_q;
-                  end
-                  default: acc_resp_o.error = 1'b1;
-                endcase
-              end
-              3'b101: begin // csrrwi
-                // Decode the CSR.
-                case (riscv::csr_addr_t'(acc_req_i.insn.itype.imm))
-                  // Only vstart can be written with CSR instructions.
-                  riscv::CSR_VSTART: begin
-                    vstart_d          = vlen_t'(acc_req_i.insn.itype.rs1);
-                    acc_resp_o.result = vstart_q;
-                  end
-                  riscv::CSR_VXRM: begin
-                    vxrm_d            = vxrm_t'(acc_req_i.rs1[1:0]);
-                    acc_resp_o.result = vlen_t'(vxrm_q);
-                  end
-                  riscv::CSR_VXSAT: begin
-                    // logic [19:15] rs1; So, LSB is [15]
-                    vxsat_d           = acc_req_i.insn.itype.rs1[15];
-                    acc_resp_o.result = vxsat_q;
-                  end
-                  default: acc_resp_o.error = 1'b1;
-                endcase
-              end
-              3'b110: begin // csrrsi
-                // Decode the CSR.
-                case (riscv::csr_addr_t'(acc_req_i.insn.itype.imm))
-                  riscv::CSR_VSTART: begin
-                    vstart_d          = vstart_q | vlen_t'(acc_req_i.insn.itype.rs1);
-                    acc_resp_o.result = vstart_q;
-                  end
-                  riscv::CSR_VTYPE: begin
-                    // Only reads are allowed
-                    if (acc_req_i.insn.itype.rs1 == '0) acc_resp_o.result = xlen_vtype(vtype_q);
-                    else acc_resp_o.error                                 = 1'b1;
-                  end
-                  riscv::CSR_VL: begin
-                    // Only reads are allowed
-                    if (acc_req_i.insn.itype.rs1 == '0) acc_resp_o.result = vl_q;
-                    else acc_resp_o.error                                 = 1'b1;
-                  end
-                  riscv::CSR_VLENB: begin
-                    // Only reads are allowed
-                    if (acc_req_i.insn.itype.rs1 == '0) acc_resp_o.result = VLENB;
-                    else acc_resp_o.error                                 = 1'b1;
-                  end
-                  riscv::CSR_VXSAT: begin
-                    // logic [19:15] rs1; So, LSB is [15]
-                    vxsat_d           = vxsat_q | vxsat_e'(acc_req_i.insn.itype.rs1[15]);
-                    acc_resp_o.result = vxsat_q;
-                  end
-                  default: acc_resp_o.error = 1'b1;
-                endcase
-              end
-              3'b111: begin // csrrci
-                // Decode the CSR.
-                unique case (riscv::csr_addr_t'(acc_req_i.insn.itype.imm))
-                  riscv::CSR_VSTART: begin
-                    vstart_d          = vstart_q & ~vlen_t'(acc_req_i.insn.itype.rs1);
-                    acc_resp_o.result = vstart_q;
-                  end
-                  riscv::CSR_VTYPE: begin
-                    // Only reads are allowed
-                    if (acc_req_i.insn.itype.rs1 == '0) acc_resp_o.result = xlen_vtype(vtype_q);
-                    else acc_resp_o.error                                 = 1'b1;
-                  end
-                  riscv::CSR_VL: begin
-                    // Only reads are allowed
-                    if (acc_req_i.insn.itype.rs1 == '0) acc_resp_o.result = vl_q;
-                    else acc_resp_o.error                                 = 1'b1;
-                  end
-                  riscv::CSR_VLENB: begin
-                    // Only reads are allowed
-                    if (acc_req_i.insn.itype.rs1 == '0) acc_resp_o.result = VLENB;
-                    else acc_resp_o.error                                 = 1'b1;
-                  end
-                  riscv::CSR_VXSAT: begin
-                    // logic [19:15] rs1; So, LSB is [15]
-                    vxsat_d           = vxsat_q & ~vxsat_e'(acc_req_i.insn.itype.rs1[15]);
-                    acc_resp_o.result = vxsat_q;
-                  end
-                  default: acc_resp_o.error = 1'b1;
-                endcase
-              end
-              default: begin
-                // Trigger an illegal instruction
-                acc_resp_o.error = 1'b1;
-                acc_resp_o.resp_valid = 1'b1;
-              end
-            endcase
-          end
+          riscv::OpcodeSystem: begin : OpcodeSystem
+            // CSR ops have semantic dependency from vector instrucitons.
+            // Therefore, Ara must be idle before performing any CSR operation.
+
+            // Stall if there is any pending vector instruction
+            // NOTE: This is overconstraining. Not all CSR ops actually need to stall if a vector instruction is pending.
+            //       E.g., CSR vl is never updated by instructions past ara_dispatcher, except for "unit-stride fault-only-first loads". Reading vl would be safe otherwise.
+            //       E.g., CSR vlenb is a design-constant parameter, reading is always safe.
+            //       E.g., CSRs vxrm and vxsat have no influence on-non fixed-point instructions, it could be read and written safely when no fixed-point operation is running.
+            //       By better analyzing the spec, more of optimizations of such can be made. For the sake of simplicity, the current implementation treats CSR ops as one block.
+            if ( ara_idle_i ) begin : ara_idle
+              // These always respond at the same cycle
+              acc_resp_o.resp_valid = 1'b1;
+              is_config        = 1'b1;
+
+              unique case (acc_req_i.insn.itype.funct3)
+                3'b001: begin // csrrw
+                  // Decode the CSR.
+                  case (riscv::csr_addr_t'(acc_req_i.insn.itype.imm))
+                    // Only vstart can be written with CSR instructions.
+                    riscv::CSR_VSTART: begin
+                      csr_vstart_d          = acc_req_i.rs1;
+                      acc_resp_o.result = csr_vstart_q;
+                    end
+                    riscv::CSR_VXRM: begin
+                      csr_vxrm_d            = vxrm_t'(acc_req_i.rs1[16:15]);
+                      acc_resp_o.result = vlen_t'(csr_vxrm_q);
+                    end
+                    riscv::CSR_VXSAT: begin
+                      csr_vxsat_d           = vxsat_e'(acc_req_i.rs1[15]);
+                      acc_resp_o.result = vlen_t'(csr_vxsat_q);
+                    end
+                    riscv::CSR_VCSR: begin
+                      csr_vxrm_d            = vxrm_t'(  acc_req_i.rs1[17:16]  );
+                      csr_vxsat_d           = vxsat_e'( acc_req_i.rs1[15]    );
+                      acc_resp_o.result = vlen_t'(  { csr_vxrm_q, csr_vxsat_q } );
+                    end
+                    default: illegal_insn = 1'b1;
+                  endcase
+                end
+                3'b010: begin // csrrs
+                  // Decode the CSR.
+                  case (riscv::csr_addr_t'(acc_req_i.insn.itype.imm))
+                    riscv::CSR_VSTART: begin
+                      csr_vstart_d          = csr_vstart_q | vlen_t'(acc_req_i.rs1);
+                      acc_resp_o.result = csr_vstart_q;
+                    end
+                    riscv::CSR_VTYPE: begin
+                      // Only reads are allowed
+                      if (acc_req_i.insn.itype.rs1 == '0) acc_resp_o.result = xlen_vtype(csr_vtype_q);
+                      else illegal_insn = 1'b1;
+                    end
+                    riscv::CSR_VL: begin
+                      // Only reads are allowed
+                      if (acc_req_i.insn.itype.rs1 == '0) acc_resp_o.result = csr_vl_q;
+                      else illegal_insn = 1'b1;
+                    end
+                    riscv::CSR_VLENB: begin
+                      // Only reads are allowed
+                      if (acc_req_i.insn.itype.rs1 == '0) acc_resp_o.result = VLENB;
+                      else illegal_insn = 1'b1;
+                    end
+                    riscv::CSR_VXRM: begin
+                      csr_vxrm_d            = csr_vxrm_q | vxrm_t'(acc_req_i.rs1[16:15]);
+                      acc_resp_o.result = vlen_t'(csr_vxrm_q);
+                    end
+                    riscv::CSR_VXSAT: begin
+                      csr_vxsat_d           = csr_vxsat_q | vxsat_e'(acc_req_i.rs1[15]);
+                      acc_resp_o.result = vlen_t'(csr_vxsat_q);
+                    end
+                    riscv::CSR_VCSR: begin
+                      csr_vxrm_d            = csr_vxrm_q  | vxrm_t'(acc_req_i.rs1[17:16]);
+                      csr_vxsat_d           = csr_vxsat_q | vxsat_e'(acc_req_i.rs1[15]);
+                      acc_resp_o.result = vlen_t'(  { csr_vxrm_q, csr_vxsat_q } );
+                    end
+                    default: illegal_insn = 1'b1;
+                  endcase
+                end
+                3'b011: begin // csrrc
+                  // Decode the CSR.
+                  case (riscv::csr_addr_t'(acc_req_i.insn.itype.imm))
+                    riscv::CSR_VSTART: begin
+                      csr_vstart_d          = csr_vstart_q & ~vlen_t'(acc_req_i.rs1);
+                      acc_resp_o.result = csr_vstart_q;
+                    end
+                    riscv::CSR_VTYPE: begin
+                      // Only reads are allowed
+                      if (acc_req_i.insn.itype.rs1 == '0) acc_resp_o.result = xlen_vtype(csr_vtype_q);
+                      else illegal_insn = 1'b1;
+                    end
+                    riscv::CSR_VL: begin
+                      // Only reads are allowed
+                      if (acc_req_i.insn.itype.rs1 == '0) acc_resp_o.result = csr_vl_q;
+                      else illegal_insn = 1'b1;
+                    end
+                    riscv::CSR_VLENB: begin
+                      // Only reads are allowed
+                      if (acc_req_i.insn.itype.rs1 == '0) acc_resp_o.result = VLENB;
+                      else illegal_insn = 1'b1;
+                    end
+                    riscv::CSR_VXSAT: begin
+                      csr_vxsat_d           = csr_vxsat_q & ~vxsat_e'(acc_req_i.rs1[0]);
+                      acc_resp_o.result = csr_vxsat_q;
+                    end
+                    riscv::CSR_VXRM: begin
+                      csr_vxrm_d           = csr_vxrm_q & ~vxsat_e'(acc_req_i.rs1[1:0]);
+                      acc_resp_o.result = csr_vxrm_q;
+                    end
+                    riscv::CSR_VCSR: begin
+                      csr_vxrm_d            = csr_vxrm_q  & ~vxsat_e'(acc_req_i.rs1[2:1]);
+                      csr_vxsat_d           = csr_vxsat_q & ~vxsat_e'(acc_req_i.rs1[0]);
+                      acc_resp_o.result = vlen_t'(  { csr_vxrm_q, csr_vxsat_q } );
+                    end
+                    default: illegal_insn = 1'b1;
+                  endcase
+                end
+                3'b101: begin // csrrwi
+                  // Decode the CSR.
+                  case (riscv::csr_addr_t'(acc_req_i.insn.itype.imm))
+                    // Only vstart can be written with CSR instructions.
+                    riscv::CSR_VSTART: begin
+                      csr_vstart_d          = vlen_t'(acc_req_i.rs1);
+                      acc_resp_o.result = csr_vstart_q;
+                    end
+                    riscv::CSR_VXRM: begin
+                      csr_vxrm_d            = vxrm_t'(acc_req_i.rs1[1:0]);
+                      acc_resp_o.result = vlen_t'(csr_vxrm_q);
+                    end
+                    riscv::CSR_VXSAT: begin
+                      csr_vxsat_d           = acc_req_i.rs1[0];
+                      acc_resp_o.result = csr_vxsat_q;
+                    end
+                    riscv::CSR_VCSR: begin
+                      // logic [19:15] rs1; So, LSB is [15]
+                      csr_vxrm_d            = vxrm_t'(acc_req_i.rs1[2:1]);
+                      csr_vxsat_d           = vxsat_e'(acc_req_i.rs1[0]);
+                      acc_resp_o.result = csr_vxsat_q;
+                    end
+                    default: illegal_insn = 1'b1;
+                  endcase
+                end
+                3'b110: begin // csrrsi
+                  // Decode the CSR.
+                  case (riscv::csr_addr_t'(acc_req_i.insn.itype.imm))
+                    riscv::CSR_VSTART: begin
+                      csr_vstart_d          = csr_vstart_q | vlen_t'(acc_req_i.rs1);
+                      acc_resp_o.result = csr_vstart_q;
+                    end
+                    riscv::CSR_VTYPE: begin
+                      // Only reads are allowed
+                      if (acc_req_i.insn.itype.rs1 == '0) acc_resp_o.result = xlen_vtype(csr_vtype_q);
+                      else illegal_insn = 1'b1;
+                    end
+                    riscv::CSR_VL: begin
+                      // Only reads are allowed
+                      if (acc_req_i.insn.itype.rs1 == '0) acc_resp_o.result = csr_vl_q;
+                      else illegal_insn = 1'b1;
+                    end
+                    riscv::CSR_VLENB: begin
+                      // Only reads are allowed
+                      if (acc_req_i.insn.itype.rs1 == '0) acc_resp_o.result = VLENB;
+                      else illegal_insn = 1'b1;
+                    end
+                    riscv::CSR_VXSAT: begin
+                      // logic [19:15] rs1; So, LSB is [15]
+                      csr_vxsat_d           = csr_vxsat_q | vxsat_e'(acc_req_i.rs1[0]);
+                      acc_resp_o.result = csr_vxsat_q;
+                    end
+                    riscv::CSR_VXRM: begin
+                      // logic [19:15] rs1; So, LSB is [15]
+                      csr_vxrm_d           = csr_vxrm_q | vxrm_t'(acc_req_i.rs1[1:0]);
+                      acc_resp_o.result = csr_vxrm_q;
+                    end
+                    riscv::CSR_VCSR: begin
+                      // logic [19:15] rs1; So, LSB is [15]
+                      csr_vxrm_d            = csr_vxrm_q  |  vxrm_t'(acc_req_i.rs1[2:1]);
+                      csr_vxsat_d           = csr_vxsat_q | vxsat_e'(acc_req_i.rs1[0]);
+                      acc_resp_o.result = { csr_vxrm_q,  csr_vxsat_q };
+                    end
+                    default: illegal_insn = 1'b1;
+                  endcase
+                end
+                3'b111: begin // csrrci
+                  // Decode the CSR.
+                  unique case (riscv::csr_addr_t'(acc_req_i.insn.itype.imm))
+                    riscv::CSR_VSTART: begin
+                      csr_vstart_d          = csr_vstart_q & ~vlen_t'(acc_req_i.rs1);
+                      acc_resp_o.result = csr_vstart_q;
+                    end
+                    riscv::CSR_VTYPE: begin
+                      // Only reads are allowed
+                      if (acc_req_i.insn.itype.rs1 == '0) acc_resp_o.result = xlen_vtype(csr_vtype_q);
+                      else illegal_insn = 1'b1;
+                    end
+                    riscv::CSR_VL: begin
+                      // Only reads are allowed
+                      if (acc_req_i.insn.itype.rs1 == '0) acc_resp_o.result = csr_vl_q;
+                      else illegal_insn = 1'b1;
+                    end
+                    riscv::CSR_VLENB: begin
+                      // Only reads are allowed
+                      if (acc_req_i.insn.itype.rs1 == '0) acc_resp_o.result = VLENB;
+                      else illegal_insn = 1'b1;
+                    end
+                    riscv::CSR_VXSAT: begin
+                      csr_vxsat_d           = csr_vxsat_q & ~vxsat_e'(acc_req_i.rs1[0]);
+                      acc_resp_o.result = csr_vxsat_q;
+                    end
+                    riscv::CSR_VXRM: begin
+                      csr_vxrm_d           = csr_vxrm_q & ~vxsat_e'(acc_req_i.rs1[1:0]);
+                      acc_resp_o.result = csr_vxrm_q;
+                    end
+                    riscv::CSR_VCSR: begin
+                      // logic [19:15] rs1; So, LSB is [15]
+                      csr_vxrm_d           = csr_vxrm_q  &  ~vxrm_t'(acc_req_i.rs1[2:1]);
+                      csr_vxsat_d          = csr_vxsat_q & ~vxsat_e'(acc_req_i.rs1[0]);
+                      acc_resp_o.result = { csr_vxrm_q,  csr_vxsat_q };
+                    end
+                    default: illegal_insn= 1'b1;
+                  endcase
+                end
+                default: begin
+                  // Trigger an illegal instruction
+                  illegal_insn = 1'b1;
+                end
+              endcase // acc_req_i.insn.itype.funct3
+            end : ara_idle
+            else begin : csr_stall
+              acc_resp_o.req_ready = 1'b0;
+            end : csr_stall
+          end : OpcodeSystem
 
           default: begin
             // Trigger an illegal instruction
-            acc_resp_o.error = 1'b1;
-            acc_resp_o.resp_valid = 1'b1;
+            illegal_insn = 1'b1;
           end
-        endcase
-      end
+
+        endcase // acc_req_i.insn.itype.opcode
+      end : ready
 
       // Check that we have fixed-point support if requested
       // vxsat and vxrm are always accessible anyway
-      if (ara_req_valid_d && (ara_req_d.op inside {[VSADDU:VNCLIPU], VSMUL}) && (FixPtSupport == FixedPointDisable))
+      if (ara_req_valid_d && (ara_req_d.op inside {[VSADDU:VNCLIPU], VSMUL}) && (FixPtSupport == FixedPointDisable)) begin : fixed_point_check
         illegal_insn = 1'b1;
+      end : fixed_point_check
 
       // Check that we have we have vfrec7, vfrsqrt7
-      if (ara_req_valid_d && (ara_req_d.op inside {VFREC7, VFRSQRT7}) && (FPExtSupport == FPExtSupportDisable))
+      if (ara_req_valid_d && (ara_req_d.op inside {VFREC7, VFRSQRT7}) && (FPExtSupport == FPExtSupportDisable)) begin : vfrec7_vfrsqrt7_support_check
         illegal_insn = 1'b1;
+      end : vfrec7_vfrsqrt7_support_check
+
+
+      // Raise an illegal instruction exception
+      if ( illegal_insn || illegal_insn_load || illegal_insn_store ) begin : illegal_instruction
+        ara_req_valid_d            = 1'b0;
+        acc_resp_o.req_ready       = 1'b1;
+        acc_resp_o.resp_valid      = 1'b1;
+        acc_resp_o.exception.valid = 1'b1;
+        acc_resp_o.exception.cause = riscv::ILLEGAL_INSTR;
+        acc_resp_o.exception.tval  = acc_req_i.insn;
+      end : illegal_instruction
+
+      // Reset vstart to zero for successful vector instructions
+      // Corner cases:
+      // * vstart exception reporting, e.g., VLSU, is handled above
+      // * CSR operations are not considered vector instructions
+      if ( acc_resp_o.resp_valid 
+            & !acc_resp_o.exception.valid 
+            & (acc_req_i.insn.itype.opcode != riscv::OpcodeSystem)
+          ) begin : reset_vstart
+        csr_vstart_d = '0;
+      end : reset_vstart
 
       // Check if we need to reshuffle our vector registers involved in the operation
       // This operation is costly when occurs, so avoid it if possible
-      if (ara_req_valid_d && !acc_resp_o.error) begin
+      if ( ara_req_valid_d && !acc_resp_o.exception.valid ) begin : check_reshuffle
         automatic rvv_instruction_t insn = rvv_instruction_t'(acc_req_i.insn.instr);
 
         // Is the instruction an in-lane one and could it be subject to reshuffling?
@@ -3100,7 +3165,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
         // Optimization: reshuffle vd only if we are not overwriting the whole vector register!
         reshuffle_req_d = {ara_req_d.use_vs1 && (ara_req_d.eew_vs1    != eew_q[ara_req_d.vs1]) && eew_valid_q[ara_req_d.vs1] && in_lane_op,
                            ara_req_d.use_vs2 && (ara_req_d.eew_vs2    != eew_q[ara_req_d.vs2]) && eew_valid_q[ara_req_d.vs2] && in_lane_op,
-                           ara_req_d.use_vd  && (ara_req_d.vtype.vsew != eew_q[ara_req_d.vd ]) && eew_valid_q[ara_req_d.vd ] && vl_q != (VLENB >> ara_req_d.vtype.vsew)};
+                           ara_req_d.use_vd  && (ara_req_d.vtype.vsew != eew_q[ara_req_d.vd ]) && eew_valid_q[ara_req_d.vd ] && csr_vl_q != (VLENB >> ara_req_d.vtype.vsew)};
 
         // Prepare the information to reshuffle the vector registers during the next cycles
         // Reshuffle in the following order: vd, v2, v1. The order is arbitrary.
@@ -3122,7 +3187,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
           end
           default:;
         endcase
-      end
+      end : check_reshuffle
 
       // Reshuffle if at least one of the three registers needs a reshuffle
       if (|reshuffle_req_d) begin
@@ -3145,13 +3210,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
         // Reshuffle
         state_d = RESHUFFLE;
       end
-    end
-
-    // Raise an illegal instruction exception
-    if (illegal_insn) begin
-      acc_resp_o.error = 1'b1;
-      ara_req_valid_d  = 1'b0;
-    end
+    end : not_reshuffling
 
     // Update the EEW
     if (ara_req_valid_d && ara_req_d.use_vd && ara_req_ready_i) begin
@@ -3191,8 +3250,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
 
     // Any valid non-config instruction is a NOP if vl == 0, with some exceptions,
     // e.g. whole vector memory operations / whole vector register move
-    if (is_decoding && (vl_q == '0 || null_vslideup) && !is_config &&
-      !ignore_zero_vl_check && !acc_resp_o.error) begin
+    if (is_decoding && (csr_vl_q == '0 || null_vslideup) && !is_config &&
+      !ignore_zero_vl_check && !acc_resp_o.exception.valid) begin
       // If we are acknowledging a memory operation, we must tell Ariane that the memory
       // operation was resolved (to decrement its pending load/store counter)
       // This can collide with the same signal from the vector load/store unit, so we must
diff --git a/hardware/src/ara_sequencer.sv b/hardware/src/ara_sequencer.sv
index 5fb0abff1..74fce4573 100644
--- a/hardware/src/ara_sequencer.sv
+++ b/hardware/src/ara_sequencer.sv
@@ -40,8 +40,8 @@ module ara_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
     output logic                            pe_scalar_resp_ready_o,
     // Interface with the Address Generation
     input  logic                            addrgen_ack_i,
-    input  logic                            addrgen_error_i,
-    input  vlen_t                           addrgen_error_vl_i
+    input  ariane_pkg::exception_t          addrgen_exception_i,
+    input  vlen_t                           addrgen_exception_vstart_i
   );
 
   ///////////////////////////////////
@@ -438,8 +438,8 @@ module ara_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
           state_d             = IDLE;
           ara_req_ready_o     = 1'b1;
           ara_resp_valid_o    = 1'b1;
-          ara_resp_o.error    = addrgen_error_i;
-          ara_resp_o.error_vl = addrgen_error_vl_i;
+          ara_resp_o.exception = addrgen_exception_i;
+          ara_resp_o.exception_vstart = addrgen_exception_vstart_i;
         end
 
         // Wait for the scalar result
diff --git a/hardware/src/vlsu/addrgen.sv b/hardware/src/vlsu/addrgen.sv
index f9ed43709..d57226e7d 100644
--- a/hardware/src/vlsu/addrgen.sv
+++ b/hardware/src/vlsu/addrgen.sv
@@ -26,15 +26,33 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
     output axi_aw_t                        axi_aw_o,
     output logic                           axi_aw_valid_o,
     input  logic                           axi_aw_ready_i,
+    // CSR input
+    input  logic                           en_ld_st_translation_i,
+    // Interface with CVA6's sv39 MMU
+    // This is everything the MMU can provide, it might be overcomplete for Ara and some signals be useless
+    output ariane_pkg::exception_t         mmu_misaligned_ex_o,
+    output logic                           mmu_req_o,        // request address translation
+    output logic [riscv::VLEN-1:0]         mmu_addr_o,      // virtual address out
+    output logic                           mmu_is_store_o,   // the translation is requested by a store
+    // if we need to walk the page table we can't grant in the same cycle
+    // Cycle 0
+    input logic                            mmu_dtlb_hit_i,   // sent in the same cycle as the request if translation hits in the DTLB
+    input logic [riscv::PPNW-1:0]          mmu_dtlb_ppn_i,   // ppn (send same cycle as hit)
+    // Cycle 1
+    input  logic                           mmu_valid_i,      // translation is valid
+    input  logic [riscv::PLEN-1:0]         mmu_paddr_i,      // translated address
+    input  ariane_pkg::exception_t         mmu_exception_i,  // address translation threw an exception
     // Interace with the dispatcher
     input  logic                           core_st_pending_i,
     // Interface with the main sequencer
     input  pe_req_t                        pe_req_i,
     input  logic                           pe_req_valid_i,
     input  logic     [NrVInsn-1:0]         pe_vinsn_running_i,
-    output logic                           addrgen_error_o,
+    output ariane_pkg::exception_t         addrgen_exception_o,
     output logic                           addrgen_ack_o,
-    output vlen_t                          addrgen_error_vl_o,
+    output vlen_t                          addrgen_exception_vstart_o,
+    output logic                           addrgen_exception_load_o,
+    output logic                           addrgen_exception_store_o,
     // Interface with the load/store units
     output addrgen_axi_req_t               axi_addrgen_req_o,
     output logic                           axi_addrgen_req_valid_o,
@@ -47,6 +65,13 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
     output logic                           addrgen_operand_ready_o
   );
 
+
+  ///////////////////
+  //  Assignments  //
+  ///////////////////
+
+  assign mmu_misaligned_ex_o  = '0; // Ara reports misaligned exceptions on its own
+
   import cf_math_pkg::idx_width;
   import axi_pkg::aligned_addr;
   import axi_pkg::BURST_INCR;
@@ -117,7 +142,7 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
   axi_addr_t                        idx_final_addr_d, idx_final_addr_q;
   elen_t                            idx_addr;
   logic                             idx_op_error_d, idx_op_error_q;
-  vlen_t                            addrgen_error_vl_d;
+  vlen_t                            addrgen_exception_vstart_d;
 
   // Pointer to point to the correct
   logic [$clog2(NrLanes)-1:0] word_lane_ptr_d, word_lane_ptr_q;
@@ -156,13 +181,18 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
   // ADDRGEN_IDX_OP: Generates a series of AXI requests from a
   //    vector instruction, but reading a vector of offsets from Ara's lanes.
   //    This is used for scatter and gather operations.
-  enum logic [1:0] {
+  // WAIT_LAST_TRANSLATION: Wait for the last address translation to be acknowledged
+  enum logic [2:0] {
     IDLE,
     ADDRGEN,
     ADDRGEN_IDX_OP,
-    ADDRGEN_IDX_OP_END
+    ADDRGEN_IDX_OP_END,
+    WAIT_LAST_TRANSLATION
   } state_q, state_d;
 
+  // TODO: Masked elements do not generate exceptions on:
+  //      * EEW misalignment
+  //      * page faults
   always_comb begin: addr_generation
     // Maintain state
     state_d  = state_q;
@@ -177,16 +207,20 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
 
     // Nothing to acknowledge
     addrgen_ack_o           = 1'b0;
-    addrgen_error_o         = 1'b0;
+    addrgen_exception_o.valid = 1'b0;
+    addrgen_exception_o.tval  = '0;
+    addrgen_exception_o.cause = '0;
+    addrgen_exception_load_o  = 1'b0;
+    addrgen_exception_store_o = 1'b0;
 
     // No valid words for the spill register
-    idx_addr_valid_d        = 1'b0;
+    idx_addr_valid_d       = 1'b0;
     addrgen_operand_ready_o = 1'b0;
     reduced_word            = '0;
     elm_ptr_d               = elm_ptr_q;
     idx_op_cnt_d            = idx_op_cnt_q;
     word_lane_ptr_d         = word_lane_ptr_q;
-    idx_final_addr_d        = idx_final_addr_q;
+    idx_final_addr_d       = idx_final_addr_q;
     last_elm_subw_d         = last_elm_subw_q;
 
     // Support for indexed operations
@@ -204,10 +238,10 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
     idx_addr = reduced_word;
 
     case (state_q)
-      IDLE: begin
+      IDLE: begin : state_IDLE
         // Received a new request
         if (pe_req_valid_i &&
-            (is_load(pe_req_i.op) || is_store(pe_req_i.op)) && !vinsn_running_q[pe_req_i.id]) begin
+            (is_load(pe_req_i.op) || is_store(pe_req_i.op)) && !vinsn_running_q[pe_req_i.id]) begin : pe_req_valid
           // Mark the instruction as running in this unit
           vinsn_running_d[pe_req_i.id] = 1'b1;
 
@@ -232,19 +266,24 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
               idx_op_cnt_d = pe_req_i.vl;
             end
             default: state_d = ADDRGEN;
-          endcase
-        end
-      end
-      ADDRGEN: begin
+          endcase // pe_req_i.op
+        end : pe_req_valid
+      end : state_IDLE
+
+      ADDRGEN: begin : ADDRGEN
         // Ara does not support misaligned AXI requests
-        if (is_addr_error(pe_req_q.scalar_op, pe_req_q.vtype.vsew)) begin
+        if (is_addr_error(pe_req_q.scalar_op, pe_req_q.vtype.vsew)) begin : eew_misaligned_error
           state_d         = IDLE;
           addrgen_ack_o   = 1'b1;
-          addrgen_error_o = 1'b1;
-        end else begin
+          addrgen_exception_o.valid = 1'b1;
+          addrgen_exception_o.cause = riscv::ILLEGAL_INSTR;
+          addrgen_exception_o.tval  = '0;
+        end : eew_misaligned_error
+        else begin : address_valid
+
           addrgen_req = '{
             addr    : pe_req_q.scalar_op,
-            len     : pe_req_q.vl,
+            len     : pe_req_q.vl ,
             stride  : pe_req_q.stride,
             vew     : pe_req_q.vtype.vsew,
             is_load : is_load(pe_req_q.op),
@@ -253,19 +292,20 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
           };
           addrgen_req_valid = 1'b1;
 
-          if (addrgen_req_ready) begin
+          if (addrgen_req_ready) begin : finished
             addrgen_req_valid = '0;
             addrgen_ack_o     = 1'b1;
             state_d           = IDLE;
-          end
-        end
-      end
-      ADDRGEN_IDX_OP: begin
+          end : finished
+        end : address_valid
+      end : ADDRGEN
+
+      ADDRGEN_IDX_OP: begin : ADDRGEN_IDX_OP
         // Stall the interface until the operation is over to catch possible exceptions
 
         // Every address can generate an exception
         addrgen_req = '{
-          addr    : pe_req_q.scalar_op,
+          addr    : pe_req_q.scalar_op, 
           len     : pe_req_q.vl,
           stride  : pe_req_q.stride,
           vew     : pe_req_q.vtype.vsew,
@@ -339,13 +379,13 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
           end
         end
 
-        if (idx_op_error_d || addrgen_req_ready) begin
+        if (idx_op_error_d || addrgen_req_ready ) begin
           state_d = ADDRGEN_IDX_OP_END;
         end
-      end
+      end : ADDRGEN_IDX_OP
 
       // This state exists not to create combinatorial paths on the interface
-      ADDRGEN_IDX_OP_END : begin
+      ADDRGEN_IDX_OP_END : begin : ADDRGEN_IDX_OP_END
         // Acknowledge the indexed memory operation
         addrgen_ack_o     = 1'b1;
         addrgen_req_valid = '0;
@@ -355,11 +395,20 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
         word_lane_ptr_d = '0;
         // Raise an error if necessary
         if (idx_op_error_q) begin
-          addrgen_error_o = 1'b1;
+          // In this case, we always get EEW-misaligned exceptions
+          addrgen_exception_o.valid = 1'b1;
+          addrgen_exception_o.cause = riscv::ILLEGAL_INSTR;
+          addrgen_exception_o.tval  = '0;
         end
-      end
-    endcase
-  end
+      end : ADDRGEN_IDX_OP_END
+    endcase // state_q
+
+    if ( addrgen_exception_o.valid & addrgen_ack_o ) begin
+      addrgen_exception_load_o  = is_load(pe_req_q.op);
+      addrgen_exception_store_o = !is_load(pe_req_q.op);
+    end
+
+  end : addr_generation
 
   always_ff @(posedge clk_i or negedge rst_ni) begin
     if (!rst_ni) begin
@@ -371,7 +420,7 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
       idx_op_cnt_q       <= '0;
       last_elm_subw_q    <= '0;
       idx_op_error_q     <= '0;
-      addrgen_error_vl_o <= '0;
+      addrgen_exception_vstart_o <= '0;
     end else begin
       state_q            <= state_d;
       pe_req_q           <= pe_req_d;
@@ -381,7 +430,7 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
       idx_op_cnt_q       <= idx_op_cnt_d;
       last_elm_subw_q    <= last_elm_subw_d;
       idx_op_error_q     <= idx_op_error_d;
-      addrgen_error_vl_o <= addrgen_error_vl_d;
+      addrgen_exception_vstart_o <= addrgen_exception_vstart_d;
     end
   end
 
@@ -424,11 +473,12 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
   //  AXI Request Generation  //
   //////////////////////////////
 
-  enum logic [1:0] {
+  enum logic [2:0] {
     AXI_ADDRGEN_IDLE, 
     AXI_ADDRGEN_AXI_DW_STORE_MISALIGNED,    // Misaligned vector store to AxiDataWidth/8, needs special treatement
     AXI_ADDRGEN_WAITING_CORE_STORE_PENDING, // Wait until (core_st_pending_i == 0)
-    AXI_ADDRGEN_REQUESTING                  // Perform AW/AR transactions and push addrgen_req to VSTU/VLDU
+    AXI_ADDRGEN_REQUESTING,                 // Perform AW/AR transactions and push addrgen_req to VSTU/VLDU
+    AXI_ADDRGEN_WAIT_TRANSLATION            // Wait for MMU to ack back
   } axi_addrgen_state_d, axi_addrgen_state_q;
 
   axi_addr_t aligned_start_addr_d, aligned_start_addr_q;
@@ -441,7 +491,7 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
   logic [clog2_AxiStrobeWidth:0]            eff_axi_dw_d, eff_axi_dw_q;
   logic [idx_width(clog2_AxiStrobeWidth):0] eff_axi_dw_log_d, eff_axi_dw_log_q;
 
-  function automatic set_end_addr (
+  function automatic void set_end_addr (
       input  logic [($bits(axi_addr_t) - 12)-1:0]       next_2page_msb,
       input  int unsigned                               num_bytes,
       input  axi_addr_t                                 addr,
@@ -470,7 +520,7 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
         aligned_end_addr_d        = {aligned_start_addr_d[AxiAddrWidth-1:12], 12'hFFF};
         aligned_next_start_addr_d = {                       next_2page_msb  , 12'h000};
     end
-  endfunction
+  endfunction // set_end_addr
 
   always_comb begin: axi_addrgen
     // Maintain state
@@ -487,7 +537,7 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
     eff_axi_dw_log_d = eff_axi_dw_log_q;
 
     idx_addr_ready_d    = 1'b0;
-    addrgen_error_vl_d  = '0;
+    addrgen_exception_vstart_d  = '0;
 
     // No error by default
     idx_op_error_d = 1'b0;
@@ -505,14 +555,23 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
     axi_aw_o       = '0;
     axi_aw_valid_o = 1'b0;
 
-    case (axi_addrgen_state_q)
-      AXI_ADDRGEN_IDLE: begin
+    // MMU
+    mmu_req_o       = 1'b0;
+    mmu_addr_o     = '0;
+    mmu_is_store_o  = 1'b0;
+
+    // For addrgen FSM
+    last_translation_completed = 1'b0;
+
+    case (axi_addrgen_state_q) 
+      AXI_ADDRGEN_IDLE: begin : axi_addrgen_state_AXI_ADDRGEN_IDLE
         if (addrgen_req_valid) begin
           axi_addrgen_d       = addrgen_req;
           axi_addrgen_state_d = core_st_pending_i ? AXI_ADDRGEN_WAITING_CORE_STORE_PENDING : AXI_ADDRGEN_REQUESTING;
 
           // In case of a misaligned store, reduce the effective width of the AXI transaction,
           // since the store unit does not support misalignments between the AXI bus and the lanes
+          // BUG: this address check is not valid for indexed operations
           if ((axi_addrgen_d.addr[clog2_AxiStrobeWidth-1:0] != '0) && !axi_addrgen_d.is_load)
           begin
             // Calculate the start and the end addresses in the AXI_ADDRGEN_AXI_DW_STORE_MISALIGNED state
@@ -542,10 +601,10 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
                           aligned_end_addr_d,
                           aligned_next_start_addr_d
           );
-
         end
-      end
-      AXI_ADDRGEN_AXI_DW_STORE_MISALIGNED: begin
+      end : axi_addrgen_state_AXI_ADDRGEN_IDLE
+
+      AXI_ADDRGEN_AXI_DW_STORE_MISALIGNED: begin : axi_addrgen_state_AXI_ADDRGEN_AXI_DW_STORE_MISALIGNED
         axi_addrgen_state_d = core_st_pending_i ? AXI_ADDRGEN_WAITING_CORE_STORE_PENDING : AXI_ADDRGEN_REQUESTING;
 
         // The start address is found by aligning the original request address by the width of
@@ -561,15 +620,16 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
                         aligned_start_addr_d,
                         aligned_end_addr_d,
                         aligned_next_start_addr_d
-        );
-              
-      end
-      AXI_ADDRGEN_WAITING_CORE_STORE_PENDING: begin
+        );            
+      end : axi_addrgen_state_AXI_ADDRGEN_AXI_DW_STORE_MISALIGNED
+
+      AXI_ADDRGEN_WAITING_CORE_STORE_PENDING: begin : axi_addrgen_state_AXI_ADDRGEN_WAITING_CORE_STORE_PENDING
         if (!core_st_pending_i) begin
           axi_addrgen_state_d = AXI_ADDRGEN_REQUESTING;
         end
-      end
-      AXI_ADDRGEN_REQUESTING : begin
+      end : axi_addrgen_state_AXI_ADDRGEN_WAITING_CORE_STORE_PENDING
+
+      AXI_ADDRGEN_REQUESTING : begin : axi_addrgen_state_AXI_ADDRGEN_REQUESTING
         automatic logic axi_ax_ready = (axi_addrgen_q.is_load && axi_ar_ready_i) || (!axi_addrgen_q.is_load && axi_aw_ready_i);
 
         // Pre-calculate the next_2page_msb. This should not require much energy if the addr
@@ -584,9 +644,9 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
             (axi_addrgen_req_o.is_load && axi_addrgen_q.is_load) ||
             (~axi_addrgen_req_o.is_load && ~axi_addrgen_q.is_load
             )
-          ) begin
-          if (!axi_addrgen_queue_full && axi_ax_ready) begin
-            if (axi_addrgen_q.is_burst) begin
+          ) begin : axi_ax_idle
+          if (!axi_addrgen_queue_full && axi_ax_ready) begin : start_req
+            if (axi_addrgen_q.is_burst) begin : unit_stride
 
               /////////////////////////
               //  Unit-Stride access //
@@ -656,12 +716,6 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
               end
               axi_addrgen_d.addr = aligned_next_start_addr_q;
 
-              // Finished generating AXI requests
-              if (axi_addrgen_d.len == 0) begin
-                addrgen_req_ready   = 1'b1;
-                axi_addrgen_state_d = AXI_ADDRGEN_IDLE;
-              end
-
               // Calculate the addresses for the next iteration
               // The start address is found by aligning the original request address by the width of
               // the memory interface. In our case, we have it already.
@@ -680,8 +734,8 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
                               aligned_end_addr_d,
                               aligned_next_start_addr_d
               );
-
-            end else if (state_q != ADDRGEN_IDX_OP) begin
+            end : unit_stride
+            else if (state_q != ADDRGEN_IDX_OP) begin : strided
 
               /////////////////////
               //  Strided access //
@@ -724,82 +778,78 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
               // Account for the requested operands
               axi_addrgen_d.len  = axi_addrgen_q.len - 1;
               // Calculate the addresses for the next iteration, adding the correct stride
-              // NOTE: there is no need to check for misaligned erros, since the stride is alsways EEW aligned to the first address
+              // NOTE: there is no need to check for misaligned erros, since the stride always produces EEW-aligned to the first address
               axi_addrgen_d.addr = axi_addrgen_q.addr + axi_addrgen_q.stride;
-
-              // Finished generating AXI requests
-              if (axi_addrgen_d.len == 0) begin
-                addrgen_req_ready   = 1'b1;
-                axi_addrgen_state_d = AXI_ADDRGEN_IDLE;
-              end
-            end else begin
-
+            end : strided
+            else begin : indexed
               //////////////////////
               //  Indexed access  //
               //////////////////////
+              // TODO: check if idx_addr_valid_q is stable 
+              if (idx_addr_valid_q) begin : idx_addr_valid_q
 
-              if (idx_addr_valid_q) begin
-                // We consumed a word
-                idx_addr_ready_d = 1'b1;
-
-                // AR Channel
-                if (axi_addrgen_q.is_load) begin
-                  axi_ar_o = '{
-                    addr   : idx_final_addr_q,
-                    len    : 0,
-                    size   : axi_addrgen_q.vew,
-                    cache  : CACHE_MODIFIABLE,
-                    burst  : BURST_INCR,
-                    default: '0
-                  };
-                  axi_ar_valid_o = 1'b1;
-                end
-                // AW Channel
-                else begin
-                  axi_aw_o = '{
-                    addr   : idx_final_addr_q,
-                    len    : 0,
-                    size   : axi_addrgen_q.vew,
-                    cache  : CACHE_MODIFIABLE,
-                    burst  : BURST_INCR,
-                    default: '0
-                  };
-                  axi_aw_valid_o = 1'b1;
-                end
-
-                // Send this request to the load/store units
-                axi_addrgen_queue = '{
-                  addr   : idx_final_addr_q,
-                  size   : axi_addrgen_q.vew,
-                  len    : 0,
-                  is_load: axi_addrgen_q.is_load
-                };
-                axi_addrgen_queue_push = 1'b1;
-
-                // Account for the requested operands
-                axi_addrgen_d.len = axi_addrgen_q.len - 1;
-
-                // Check if the address does generate an exception
-                if (is_addr_error(idx_final_addr_q, axi_addrgen_q.vew)) begin
+                // Check if the virtual address generates an exception
+                if (is_addr_error(idx_final_addr_q, axi_addrgen_q.vew)) begin : eew_misaligned_error
                   // Generate an error
                   idx_op_error_d          = 1'b1;
                   // Forward next vstart info to the dispatcher
-                  addrgen_error_vl_d      = addrgen_req.len - axi_addrgen_q.len - 1;
+                  addrgen_exception_vstart_d  = addrgen_req.len - axi_addrgen_q.len - 1;
                   addrgen_req_ready       = 1'b1;
                   axi_addrgen_state_d     = AXI_ADDRGEN_IDLE;
-                end
-
-                // Finished generating AXI requests
-                if (axi_addrgen_d.len == 0) begin
-                  addrgen_req_ready   = 1'b1;
-                  axi_addrgen_state_d = AXI_ADDRGEN_IDLE;
-                end
-              end
+                end : eew_misaligned_error
+                else begin : aligned_address
+                  // We consumed a word
+                  idx_addr_ready_d = 1'b1;
+
+                  // AR Channel
+                  if (axi_addrgen_q.is_load) begin
+                    axi_ar_o = '{
+                      addr   : idx_final_addr_q,
+                      len    : 0,
+                      size   : axi_addrgen_q.vew,
+                      cache  : CACHE_MODIFIABLE,
+                      burst  : BURST_INCR,
+                      default: '0
+                    };
+                    axi_ar_valid_o = 1'b1;
+                  end
+                  // AW Channel
+                  else begin
+                    axi_aw_o = '{
+                      addr   : idx_final_addr_q,
+                      len    : 0,
+                      size   : axi_addrgen_q.vew,
+                      cache  : CACHE_MODIFIABLE,
+                      burst  : BURST_INCR,
+                      default: '0
+                    };
+                    axi_aw_valid_o = 1'b1;
+                  end
+
+                  // Send this request to the load/store units
+                  axi_addrgen_queue = '{
+                    addr   : idx_final_addr_q,
+                    size   : axi_addrgen_q.vew,
+                    len    : 0,
+                    is_load: axi_addrgen_q.is_load
+                  };
+                  axi_addrgen_queue_push = 1'b1;
+
+                  // Account for the requested operands
+                  axi_addrgen_d.len = axi_addrgen_q.len - 1;
+                end : aligned_address
+              end : idx_addr_valid_q
+            end : indexed
+
+            // Finished generating AXI requests
+            if (axi_addrgen_d.len == 0) begin
+              addrgen_req_ready   = 1'b1;
+              axi_addrgen_state_d = AXI_ADDRGEN_IDLE;
             end
-          end
-        end
-      end
-    endcase
+          end : start_req
+        end : axi_ax_idle
+      end : axi_addrgen_state_AXI_ADDRGEN_REQUESTING
+    endcase // axi_addrgen_state_q
   end: axi_addrgen
 
   always_ff @(posedge clk_i or negedge rst_ni) begin
diff --git a/hardware/src/vlsu/vldu.sv b/hardware/src/vlsu/vldu.sv
index 51042ed8e..7c49f3af6 100644
--- a/hardware/src/vlsu/vldu.sv
+++ b/hardware/src/vlsu/vldu.sv
@@ -35,6 +35,7 @@ module vldu import ara_pkg::*; import rvv_pkg::*; #(
     output pe_resp_t                       pe_resp_o,
     // Interface with the address generator
     input  addrgen_axi_req_t               axi_addrgen_req_i,
+    input  logic                           addrgen_exception_valid_i,
     input  logic                           axi_addrgen_req_valid_i,
     output logic                           axi_addrgen_req_ready_o,
     // Interface with the lanes
@@ -136,7 +137,7 @@ module vldu import ara_pkg::*; import rvv_pkg::*; #(
   // reading from and writing into the lanes (read_pnt).
   logic     [idx_width(ResultQueueDepth)-1:0]   result_queue_write_pnt_d, result_queue_write_pnt_q;
   logic     [idx_width(ResultQueueDepth)-1:0]   result_queue_read_pnt_d, result_queue_read_pnt_q;
-  // We need to count how many valid elements are there in this result queue.
+  // We need to count how many valid elements (payload_t) are there in this result queue.
   logic     [idx_width(ResultQueueDepth):0]     result_queue_cnt_d, result_queue_cnt_q;
   // Vector to register the final grants from the operand requesters, which indicate
   // that the result was actually written in the VRF (while the normal grant just says
@@ -174,33 +175,33 @@ module vldu import ara_pkg::*; import rvv_pkg::*; #(
   logic [NrVInsn-1:0] vinsn_running_d, vinsn_running_q;
 
   // Interface with the main sequencer
-  pe_resp_t pe_resp;
+  pe_resp_t pe_resp_d;
 
   // Remaining bytes of the current instruction in the issue phase
-  vlen_t issue_cnt_d, issue_cnt_q;
+  vlen_t issue_cnt_bytes_d, issue_cnt_bytes_q;
   // Remaining bytes of the current instruction in the commit phase
-  vlen_t commit_cnt_d, commit_cnt_q;
+  vlen_t commit_cnt_bytes_d, commit_cnt_bytes_q;
 
   // Pointers
   //
   // We need several pointers to copy data from the memory interface
   // into the VRF. Namely, we need:
   // - A counter of how many beats are left in the current AXI burst
-  axi_pkg::len_t len_d, len_q;
+  axi_pkg::len_t                           axi_len_d, axi_len_q;
   // - A pointer to which byte in the current R beat we are reading data from.
-  logic [idx_width(AxiDataWidth/8):0]      r_pnt_d, r_pnt_q;
+  logic [idx_width(AxiDataWidth/8):0]      axi_r_byte_pnt_d, axi_r_byte_pnt_q;
   // - A pointer to which byte in the full VRF word we are writing data into.
-  logic [idx_width(DataWidth*NrLanes/8):0] vrf_pnt_d, vrf_pnt_q;
+  logic [idx_width(DataWidth*NrLanes/8):0] vrf_word_byte_pnt_d, vrf_word_byte_pnt_q;
 
   always_comb begin: p_vldu
     // Maintain state
     vinsn_queue_d = vinsn_queue_q;
-    issue_cnt_d   = issue_cnt_q;
-    commit_cnt_d  = commit_cnt_q;
+    issue_cnt_bytes_d   = issue_cnt_bytes_q;
+    commit_cnt_bytes_d  = commit_cnt_bytes_q;
 
-    len_d     = len_q;
-    r_pnt_d   = r_pnt_q;
-    vrf_pnt_d = vrf_pnt_q;
+    axi_len_d           = axi_len_q;
+    axi_r_byte_pnt_d    = axi_r_byte_pnt_q;
+    vrf_word_byte_pnt_d = vrf_word_byte_pnt_q;
 
     result_queue_d           = result_queue_q;
     result_queue_valid_d     = result_queue_valid_q;
@@ -215,7 +216,7 @@ module vldu import ara_pkg::*; import rvv_pkg::*; #(
 
     // We are not ready, by default
     axi_addrgen_req_ready_o = 1'b0;
-    pe_resp                 = '0;
+    pe_resp_d               = '0;
     axi_r_ready_o           = 1'b0;
     mask_ready_o            = 1'b0;
     load_complete_o         = 1'b0;
@@ -236,40 +237,46 @@ module vldu import ara_pkg::*; import rvv_pkg::*; #(
       // Bytes valid in the current R beat
       // If non-unit strided load, we do not progress within the beat
       automatic shortint unsigned lower_byte = beat_lower_byte(axi_addrgen_req_i.addr,
-        axi_addrgen_req_i.size, axi_addrgen_req_i.len, BURST_INCR, AxiDataWidth/8, len_q);
+        axi_addrgen_req_i.size, axi_addrgen_req_i.len, BURST_INCR, AxiDataWidth/8, axi_len_q);
       automatic shortint unsigned upper_byte = beat_upper_byte(axi_addrgen_req_i.addr,
-        axi_addrgen_req_i.size, axi_addrgen_req_i.len, BURST_INCR, AxiDataWidth/8, len_q);
-
+        axi_addrgen_req_i.size, axi_addrgen_req_i.len, BURST_INCR, AxiDataWidth/8, axi_len_q);
+      
       // Is there a vector instruction ready to be issued?
       // Do we have the operands for it?
-      if (vinsn_issue_valid && (vinsn_issue_q.vm || (|mask_valid_i))) begin
+      if (vinsn_issue_valid && (vinsn_issue_q.vm || (|mask_valid_i))) begin : operands_valid
+        // TODO: add vstart here (use issue/commit_cnt_bytes_q)
         // Account for the issued bytes
         // How many bytes are valid in this VRF word
-        automatic vlen_t vrf_valid_bytes   = NrLanes * 8 - vrf_pnt_q;
+        automatic vlen_t vrf_valid_bytes   = (NrLanes * 8) - vrf_word_byte_pnt_q; 
         // How many bytes are valid in this instruction
-        automatic vlen_t vinsn_valid_bytes = issue_cnt_q - vrf_pnt_q;
+        automatic vlen_t vinsn_valid_bytes = issue_cnt_bytes_q - vrf_word_byte_pnt_q;
         // How many bytes are valid in this AXI word
-        automatic vlen_t axi_valid_bytes   = upper_byte - lower_byte - r_pnt_q + 1;
+        automatic vlen_t axi_valid_bytes   = upper_byte - lower_byte - axi_r_byte_pnt_q + 1;
 
         // How many bytes are we committing?
         automatic logic [idx_width(DataWidth*NrLanes/8):0] valid_bytes;
-        valid_bytes = issue_cnt_q < NrLanes * 8     ? vinsn_valid_bytes : vrf_valid_bytes;
-        valid_bytes = valid_bytes < axi_valid_bytes ? valid_bytes       : axi_valid_bytes;
+        valid_bytes = ( issue_cnt_bytes_q < (NrLanes * 8) ) ? vinsn_valid_bytes : vrf_valid_bytes;
+        // valid_bytes = ( valid_bytes       < axi_valid_bytes ) ? valid_bytes       : axi_valid_bytes;
+        if ( valid_bytes >= axi_valid_bytes ) begin : valid_bytes_overflow
+          valid_bytes = axi_valid_bytes;
+        end : valid_bytes_overflow
 
-        r_pnt_d   = r_pnt_q + valid_bytes;
-        vrf_pnt_d = vrf_pnt_q + valid_bytes;
+        axi_r_byte_pnt_d   = axi_r_byte_pnt_q + valid_bytes;
+        vrf_word_byte_pnt_d = vrf_word_byte_pnt_q + valid_bytes;
 
         // Copy data from the R channel into the result queue
-        for (int axi_byte = 0; axi_byte < AxiDataWidth/8; axi_byte++) begin
+        for (int axi_byte = 0; axi_byte < AxiDataWidth/8; axi_byte++) begin : axi_r_to_result_queue
           // Is this byte a valid byte in the R beat?
-          if (axi_byte >= lower_byte + r_pnt_q && axi_byte <= upper_byte) begin
+          if ( ( axi_byte >= ( lower_byte + axi_r_byte_pnt_q ) ) &&
+               ( axi_byte <= upper_byte ) 
+              ) begin : is_axi_r_byte
             // Map axi_byte to the corresponding byte in the VRF word (sequential)
-            automatic int vrf_seq_byte = axi_byte - lower_byte - r_pnt_q + vrf_pnt_q;
+            automatic int vrf_seq_byte = axi_byte - lower_byte - axi_r_byte_pnt_q + vrf_word_byte_pnt_q;
             // And then shuffle it
             automatic int vrf_byte = shuffle_index(vrf_seq_byte, NrLanes, vinsn_issue_q.vtype.vsew);
 
             // Is this byte a valid byte in the VRF word?
-            if (vrf_seq_byte < issue_cnt_q && vrf_seq_byte < NrLanes * 8) begin
+            if (vrf_seq_byte < issue_cnt_bytes_q && vrf_seq_byte < (NrLanes * 8)) begin : is_vrf_byte
               // At which lane, and what is the byte offset in that lane, of the byte vrf_byte?
               automatic int vrf_lane   = vrf_byte >> 3;
               automatic int vrf_offset = vrf_byte[2:0];
@@ -279,27 +286,36 @@ module vldu import ara_pkg::*; import rvv_pkg::*; #(
                 axi_r_i.data[8*axi_byte +: 8];
               result_queue_d[result_queue_write_pnt_q][vrf_lane].be[vrf_offset] =
                 vinsn_issue_q.vm || mask_i[vrf_lane][vrf_offset];
-            end
-          end
-        end
+            end : is_vrf_byte
+          end : is_axi_r_byte
+        end : axi_r_to_result_queue
 
         // Initialize id and addr fields of the result queue requests
         for (int lane = 0; lane < NrLanes; lane++) begin
           result_queue_d[result_queue_write_pnt_q][lane].id   = vinsn_issue_q.id;
-          result_queue_d[result_queue_write_pnt_q][lane].addr = vaddr(vinsn_issue_q.vd, NrLanes) +
-            (((vinsn_issue_q.vl - (issue_cnt_q >> int'(vinsn_issue_q.vtype.vsew))) / NrLanes) >>
-            (int'(EW64) - int'(vinsn_issue_q.vtype.vsew)));
+          result_queue_d[result_queue_write_pnt_q][lane].addr = 
+            vaddr(vinsn_issue_q.vd, NrLanes) +                                   // base address of vd
+            ( 
+              (  
+                ( 
+                  (vinsn_issue_q.vl) -                    // total number of elements to be processed
+                  (issue_cnt_bytes_q >> unsigned'(vinsn_issue_q.vtype.vsew))     // elements left (issue_cnt_bytes_q is in bytes, so we shift rx by EEW)
+                ) / NrLanes                                                      // elements per lane (each lane processes num elements / NrLanes)
+              ) >> (unsigned'(EW64) - unsigned'(vinsn_issue_q.vtype.vsew))       // 64-bit aligned address
+            );                                                                   // final offset to vd
         end
-      end
+      end : operands_valid
 
       // We have a word ready to be sent to the lanes
-      if (vrf_pnt_d == NrLanes*8 || vrf_pnt_d == issue_cnt_q) begin
+      if (vrf_word_byte_pnt_d == NrLanes*8 || vrf_word_byte_pnt_d == issue_cnt_bytes_q) begin
         // Increment result queue pointers and counters
         result_queue_cnt_d += 1;
-        if (result_queue_write_pnt_q == ResultQueueDepth-1)
+        if (result_queue_write_pnt_q == ResultQueueDepth-1) begin : result_queue_write_pnt_overflow
           result_queue_write_pnt_d = '0;
-        else
+        end : result_queue_write_pnt_overflow
+        else begin : result_queue_write_pnt_increment
           result_queue_write_pnt_d = result_queue_write_pnt_q + 1;
+        end : result_queue_write_pnt_increment
 
         // Trigger the request signal
         result_queue_valid_d[result_queue_write_pnt_q] = {NrLanes{1'b1}};
@@ -308,52 +324,56 @@ module vldu import ara_pkg::*; import rvv_pkg::*; #(
         mask_ready_o = !vinsn_issue_q.vm;
 
         // Reset the pointer in the VRF word
-        vrf_pnt_d   = '0;
+        vrf_word_byte_pnt_d   = '0;
         // Account for the results that were issued
-        issue_cnt_d = issue_cnt_q - NrLanes * 8;
-        if (issue_cnt_q < NrLanes * 8)
-          issue_cnt_d = '0;
+        issue_cnt_bytes_d = issue_cnt_bytes_q - (NrLanes * 8); // TODO: add vstart here (use issue/commit_cnt_bytes_q)
+        if (issue_cnt_bytes_q < (NrLanes * 8)) begin : issue_cnt_bytes_overflow
+          issue_cnt_bytes_d = '0;
+        end : issue_cnt_bytes_overflow
       end
 
       // Consumed all valid bytes in this R beat
-      if (r_pnt_d == upper_byte - lower_byte + 1 || issue_cnt_d == '0) begin
+      if ( ( axi_r_byte_pnt_d == ( upper_byte - lower_byte + 1 ) ) || ( issue_cnt_bytes_d == '0 ) ) begin : axi_r_beat_finish
         // Request another beat
         axi_r_ready_o = 1'b1;
-        r_pnt_d       = '0;
+        axi_r_byte_pnt_d   = '0;
         // Account for the beat we consumed
-        len_d         = len_q + 1;
-      end
+        axi_len_d     = axi_len_q + 1;
+      end : axi_r_beat_finish
 
       // Consumed all beats from this burst
-      if ($unsigned(len_d) == axi_pkg::len_t'($unsigned(axi_addrgen_req_i.len) + 1)) begin
+      if ($unsigned(axi_len_d) == axi_pkg::len_t'($unsigned(axi_addrgen_req_i.len) + 1)) begin : axi_finish
         // Reset AXI pointers
-        len_d                   = '0;
-        r_pnt_d                 = '0;
+        axi_len_d               = '0;
+        axi_r_byte_pnt_d             = '0;
         // Wait for another AXI request
         axi_addrgen_req_ready_o = 1'b1;
-      end
+      end : axi_finish
 
       // Finished issuing results
-      if (vinsn_issue_valid && issue_cnt_d == '0) begin
+      if (vinsn_issue_valid && (issue_cnt_bytes_d == '0)) begin : vrf_results_finish
         // Increment vector instruction queue pointers and counters
         vinsn_queue_d.issue_cnt -= 1;
-        if (vinsn_queue_q.issue_pnt == VInsnQueueDepth-1)
+        if (vinsn_queue_q.issue_pnt == (VInsnQueueDepth-1)) begin : issue_pnt_overflow
           vinsn_queue_d.issue_pnt = '0;
-        else
+        end : issue_pnt_overflow
+        else begin : issue_pnt_increment
           vinsn_queue_d.issue_pnt += 1;
+        end : issue_pnt_increment
 
         // Prepare for the next vector instruction
-        if (vinsn_queue_d.issue_cnt != 0)
-          issue_cnt_d = vinsn_queue_q.vinsn[vinsn_queue_d.issue_pnt].vl << int'(vinsn_queue_q.vinsn[
-              vinsn_queue_d.issue_pnt].vtype.vsew);
-      end
+        if (vinsn_queue_d.issue_cnt != 0) begin : issue_cnt_bytes_update
+          issue_cnt_bytes_d = ( vinsn_queue_q.vinsn[vinsn_queue_d.issue_pnt].vl
+                              ) << unsigned'(vinsn_queue_q.vinsn[vinsn_queue_d.issue_pnt].vtype.vsew);
+        end : issue_cnt_bytes_update
+      end : vrf_results_finish
     end
 
     //////////////////////////////////
     //  Write results into the VRF  //
     //////////////////////////////////
 
-    for (int lane = 0; lane < NrLanes; lane++) begin: result_write
+    for (int lane = 0; lane < NrLanes; lane++) begin: vrf_result_write
       ldu_result_req_o[lane]   = result_queue_valid_q[result_queue_read_pnt_q][lane];
       ldu_result_addr_o[lane]  = result_queue_q[result_queue_read_pnt_q][lane].addr;
       ldu_result_id_o[lane]    = result_queue_q[result_queue_read_pnt_q][lane].id;
@@ -365,39 +385,43 @@ module vldu import ara_pkg::*; import rvv_pkg::*; #(
 
       // Received a grant from the VRF.
       // Deactivate the request, but do not bump the pointers for now.
-      if (ldu_result_req_o[lane] && ldu_result_gnt_i[lane]) begin
+      if (ldu_result_req_o[lane] && ldu_result_gnt_i[lane]) begin : vrf_grant
         result_queue_valid_d[result_queue_read_pnt_q][lane] = 1'b0;
         result_queue_d[result_queue_read_pnt_q][lane]       = '0;
         // Reset the final gnt vector since we are now waiting for another final gnt
         result_final_gnt_d[lane] = 1'b0;
-      end
-    end: result_write
+      end : vrf_grant
+    end: vrf_result_write
 
     // All lanes accepted the VRF request
     // Wait for all the final grants, to be sure that all the results were written back
     if (!(|result_queue_valid_d[result_queue_read_pnt_q]) &&
-      (&result_final_gnt_d || commit_cnt_q > (NrLanes * 8)))
+      (&result_final_gnt_d || commit_cnt_bytes_q > (NrLanes * 8))) begin
       // There is something waiting to be written
-      if (!result_queue_empty) begin
+      if (!result_queue_empty) begin : result_available
         // Increment the read pointer
-        if (result_queue_read_pnt_q == ResultQueueDepth-1)
+        if (result_queue_read_pnt_q == (ResultQueueDepth-1)) begin : result_queue_read_pnt_overflow
           result_queue_read_pnt_d = 0;
-        else
+        end : result_queue_read_pnt_overflow
+        else begin  : result_queue_read_pnt_increment
           result_queue_read_pnt_d = result_queue_read_pnt_q + 1;
+        end : result_queue_read_pnt_increment
 
         // Decrement the counter of results waiting to be written
         result_queue_cnt_d -= 1;
 
         // Decrement the counter of remaining vector elements waiting to be written
-        commit_cnt_d = commit_cnt_q - NrLanes * 8;
-        if (commit_cnt_q < (NrLanes * 8))
-          commit_cnt_d = '0;
-      end
+        commit_cnt_bytes_d = commit_cnt_bytes_q - (NrLanes * 8); // TODO: add vstart here (use issue/commit_cnt_bytes_q)
+        if (commit_cnt_bytes_q < (NrLanes * 8)) begin : commit_cnt_bytes_overflow
+          commit_cnt_bytes_d = '0;
+        end : commit_cnt_bytes_overflow
+      end : result_available
+    end
 
     // Finished committing the results of a vector instruction
-    if (vinsn_commit_valid && commit_cnt_d == '0) begin
+    if (vinsn_commit_valid && commit_cnt_bytes_d == '0) begin : vinsn_done
       // Mark the vector instruction as being done
-      pe_resp.vinsn_done[vinsn_commit.id] = 1'b1;
+      pe_resp_d.vinsn_done[vinsn_commit.id] = 1'b1;
 
       // Signal complete load
       load_complete_o = 1'b1;
@@ -411,9 +435,16 @@ module vldu import ara_pkg::*; import rvv_pkg::*; #(
 
       // Update the commit counter for the next instruction
       if (vinsn_queue_d.commit_cnt != '0)
-        commit_cnt_d = vinsn_queue_q.vinsn[vinsn_queue_d.commit_pnt].vl << int'(vinsn_queue_q.vinsn[
-            vinsn_queue_d.commit_pnt].vtype.vsew);
-    end
+        commit_cnt_bytes_d = ( vinsn_queue_q.vinsn[vinsn_queue_d.commit_pnt].vl
+                        ) << unsigned'(vinsn_queue_q.vinsn[vinsn_queue_d.commit_pnt].vtype.vsew);
+    end : vinsn_done
+
+    // Ack back exceptions
+    if ( addrgen_exception_valid_i ) begin : exception
+      // Signal done to sequencer
+      pe_resp_d.vinsn_done[vinsn_commit.id] = 1'b1;
+      // Clear counters and flags
+    end : exception
 
     //////////////////////////////
     //  Accept new instruction  //
@@ -425,10 +456,13 @@ module vldu import ara_pkg::*; import rvv_pkg::*; #(
       vinsn_running_d[pe_req_i.id]                  = 1'b1;
 
       // Initialize counters
-      if (vinsn_queue_d.issue_cnt == '0)
-        issue_cnt_d = pe_req_i.vl << int'(pe_req_i.vtype.vsew);
-      if (vinsn_queue_d.commit_cnt == '0)
-        commit_cnt_d = pe_req_i.vl << int'(pe_req_i.vtype.vsew);
+      // TODO(bug fix): add masking logic (stores are not idempotent!)
+      if (vinsn_queue_d.issue_cnt == '0) begin : issue_cnt_bytes_init
+        issue_cnt_bytes_d = (pe_req_i.vl) << unsigned'(pe_req_i.vtype.vsew);
+      end : issue_cnt_bytes_init
+      if (vinsn_queue_d.commit_cnt == '0) begin : commit_cnt_bytes_init
+        commit_cnt_bytes_d = (pe_req_i.vl) << unsigned'(pe_req_i.vtype.vsew);
+      end : commit_cnt_bytes_init
 
       // Bump pointers and counters of the vector instruction queue
       vinsn_queue_d.accept_pnt += 1;
@@ -440,21 +474,21 @@ module vldu import ara_pkg::*; import rvv_pkg::*; #(
   always_ff @(posedge clk_i or negedge rst_ni) begin
     if (!rst_ni) begin
       vinsn_running_q    <= '0;
-      issue_cnt_q        <= '0;
-      commit_cnt_q       <= '0;
-      len_q              <= '0;
-      r_pnt_q            <= '0;
-      vrf_pnt_q          <= '0;
+      issue_cnt_bytes_q        <= '0;
+      commit_cnt_bytes_q       <= '0;
+      axi_len_q              <= '0;
+      axi_r_byte_pnt_q            <= '0;
+      vrf_word_byte_pnt_q          <= '0;
       pe_resp_o          <= '0;
       result_final_gnt_q <= '0;
     end else begin
       vinsn_running_q    <= vinsn_running_d;
-      issue_cnt_q        <= issue_cnt_d;
-      commit_cnt_q       <= commit_cnt_d;
-      len_q              <= len_d;
-      r_pnt_q            <= r_pnt_d;
-      vrf_pnt_q          <= vrf_pnt_d;
-      pe_resp_o          <= pe_resp;
+      issue_cnt_bytes_q        <= issue_cnt_bytes_d;
+      commit_cnt_bytes_q       <= commit_cnt_bytes_d;
+      axi_len_q              <= axi_len_d;
+      axi_r_byte_pnt_q            <= axi_r_byte_pnt_d;
+      vrf_word_byte_pnt_q          <= vrf_word_byte_pnt_d;
+      pe_resp_o          <= pe_resp_d;
       result_final_gnt_q <= result_final_gnt_d;
     end
   end
diff --git a/hardware/src/vlsu/vlsu.sv b/hardware/src/vlsu/vlsu.sv
index 7505f9f6f..efd14b3d3 100644
--- a/hardware/src/vlsu/vlsu.sv
+++ b/hardware/src/vlsu/vlsu.sv
@@ -42,8 +42,8 @@ module vlsu import ara_pkg::*; import rvv_pkg::*; #(
     output logic      [1:0]         pe_req_ready_o,         // Load (0) and Store (1) units
     output pe_resp_t  [1:0]         pe_resp_o,              // Load (0) and Store (1) units
     output logic                    addrgen_ack_o,
-    output logic                    addrgen_error_o,
-    output vlen_t                   addrgen_error_vl_o,
+    output ariane_pkg::exception_t  addrgen_exception_o,
+    output vlen_t                   addrgen_exception_vstart_o,
     // Interface with the lanes
     // Store unit operands
     input  elen_t     [NrLanes-1:0] stu_operand_i,
@@ -59,6 +59,25 @@ module vlsu import ara_pkg::*; import rvv_pkg::*; #(
     input  logic      [NrLanes-1:0] mask_valid_i,
     output logic                    vldu_mask_ready_o,
     output logic                    vstu_mask_ready_o,
+    
+    // CSR input
+    input  logic                    en_ld_st_translation_i,
+
+    // Interface with CVA6's sv39 MMU
+    // This is everything the MMU can provide, it might be overcomplete for Ara and some signals be useless
+    output  ariane_pkg::exception_t        mmu_misaligned_ex_o,
+    output  logic                          mmu_req_o,        // request address translation
+    output  logic [riscv::VLEN-1:0]        mmu_vaddr_o,      // virtual address out
+    output  logic                          mmu_is_store_o,   // the translation is requested by a store
+    // if we need to walk the page table we can't grant in the same cycle
+    // Cycle 0
+    input logic                            mmu_dtlb_hit_i,   // sent in the same cycle as the request if translation hits in the DTLB
+    input logic [riscv::PPNW-1:0]          mmu_dtlb_ppn_i,   // ppn (send same cycle as hit)
+    // Cycle 1
+    input logic                            mmu_valid_i,      // translation is valid
+    input logic [riscv::PLEN-1:0]          mmu_paddr_i,      // translated address
+    input ariane_pkg::exception_t          mmu_exception_i,  // address translation threw an exception
+
     // Results
     output logic      [NrLanes-1:0] ldu_result_req_o,
     output vid_t      [NrLanes-1:0] ldu_result_id_o,
@@ -69,6 +88,11 @@ module vlsu import ara_pkg::*; import rvv_pkg::*; #(
     input  logic      [NrLanes-1:0] ldu_result_final_gnt_i
   );
 
+  logic load_complete, store_complete;
+  logic addrgen_exception_load, addrgen_exception_store;
+  assign load_complete_o  = load_complete  | addrgen_exception_load;
+  assign store_complete_o = store_complete | addrgen_exception_store;
+
   ///////////////////
   //  Definitions  //
   ///////////////////
@@ -133,8 +157,10 @@ module vlsu import ara_pkg::*; import rvv_pkg::*; #(
     .pe_req_valid_i             (pe_req_valid_i             ),
     .pe_vinsn_running_i         (pe_vinsn_running_i         ),
     .addrgen_ack_o              (addrgen_ack_o              ),
-    .addrgen_error_o            (addrgen_error_o            ),
-    .addrgen_error_vl_o         (addrgen_error_vl_o         ),
+    .addrgen_exception_o        ( addrgen_exception_o       ),
+    .addrgen_exception_vstart_o     ( addrgen_exception_vstart_o    ),
+    .addrgen_exception_load_o   ( addrgen_exception_load    ),
+    .addrgen_exception_store_o  ( addrgen_exception_store   ),
     // Interface with the lanes
     .addrgen_operand_i          (addrgen_operand_i          ),
     .addrgen_operand_target_fu_i(addrgen_operand_target_fu_i),
@@ -144,7 +170,19 @@ module vlsu import ara_pkg::*; import rvv_pkg::*; #(
     .axi_addrgen_req_o          (axi_addrgen_req            ),
     .axi_addrgen_req_valid_o    (axi_addrgen_req_valid      ),
     .ldu_axi_addrgen_req_ready_i(ldu_axi_addrgen_req_ready  ),
-    .stu_axi_addrgen_req_ready_i(stu_axi_addrgen_req_ready  )
+    .stu_axi_addrgen_req_ready_i(stu_axi_addrgen_req_ready  ),
+
+    // CSR input    
+    .en_ld_st_translation_i,
+    .mmu_misaligned_ex_o,
+    .mmu_req_o,        
+    .mmu_vaddr_o,      
+    .mmu_is_store_o,   
+    .mmu_dtlb_hit_i,   
+    .mmu_dtlb_ppn_i,   
+    .mmu_valid_i,
+    .mmu_paddr_i,   
+    .mmu_exception_i
   );
 
   ////////////////////////
@@ -165,7 +203,7 @@ module vlsu import ara_pkg::*; import rvv_pkg::*; #(
     .axi_r_valid_i          (axi_resp.r_valid          ),
     .axi_r_ready_o          (axi_req.r_ready           ),
     // Interface with the dispatcher
-    .load_complete_o        (load_complete_o           ),
+    .load_complete_o        (load_complete             ),
     // Interface with the main sequencer
     .pe_req_i               (pe_req_i                  ),
     .pe_req_valid_i         (pe_req_valid_i            ),
@@ -173,6 +211,7 @@ module vlsu import ara_pkg::*; import rvv_pkg::*; #(
     .pe_req_ready_o         (pe_req_ready_o[OffsetLoad]),
     .pe_resp_o              (pe_resp_o[OffsetLoad]     ),
     // Interface with the address generator
+    .addrgen_exception_valid_i ( addrgen_ack_o & addrgen_exception_o.valid ),
     .axi_addrgen_req_i      (axi_addrgen_req           ),
     .axi_addrgen_req_valid_i(axi_addrgen_req_valid     ),
     .axi_addrgen_req_ready_o(ldu_axi_addrgen_req_ready ),
@@ -213,7 +252,7 @@ module vlsu import ara_pkg::*; import rvv_pkg::*; #(
     .axi_b_ready_o          (axi_req.b_ready            ),
     // Interface with the dispatcher
     .store_pending_o        (store_pending_o            ),
-    .store_complete_o       (store_complete_o           ),
+    .store_complete_o       (store_complete             ),
     // Interface with the main sequencer
     .pe_req_i               (pe_req_i                   ),
     .pe_req_valid_i         (pe_req_valid_i             ),
@@ -221,6 +260,7 @@ module vlsu import ara_pkg::*; import rvv_pkg::*; #(
     .pe_req_ready_o         (pe_req_ready_o[OffsetStore]),
     .pe_resp_o              (pe_resp_o[OffsetStore]     ),
     // Interface with the address generator
+    .addrgen_exception_valid_i ( addrgen_ack_o & addrgen_exception_o.valid ),
     .axi_addrgen_req_i      (axi_addrgen_req            ),
     .axi_addrgen_req_valid_i(axi_addrgen_req_valid      ),
     .axi_addrgen_req_ready_o(stu_axi_addrgen_req_ready  ),
diff --git a/hardware/src/vlsu/vstu.sv b/hardware/src/vlsu/vstu.sv
index 9580f59b0..6d92c03a7 100644
--- a/hardware/src/vlsu/vstu.sv
+++ b/hardware/src/vlsu/vstu.sv
@@ -47,6 +47,7 @@ module vstu import ara_pkg::*; import rvv_pkg::*; #(
     // Interface with the address generator
     input  addrgen_axi_req_t               axi_addrgen_req_i,
     input  logic                           axi_addrgen_req_valid_i,
+    input  logic                           addrgen_exception_valid_i,
     output logic                           axi_addrgen_req_ready_o,
     // Interface with the lanes
     input  elen_t            [NrLanes-1:0] stu_operand_i,