Skip to content

Commit

Permalink
Supporting vstart CSR for operand read, VALU, VLSU
Browse files Browse the repository at this point in the history
* vstart support for vector unit-stride loads and stores

* vstart support for vector strided loads and stores

* vstart support for valu operations, mask operations not tested

* Preliminary work on vstart support for vector indexed loads and stores

* Minor fixes

* Refactoring

* Explanatory comments
  • Loading branch information
MaistoV committed Oct 13, 2023
1 parent 018af66 commit f08c28f
Show file tree
Hide file tree
Showing 10 changed files with 637 additions and 447 deletions.
15 changes: 12 additions & 3 deletions hardware/include/ara_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -974,11 +974,20 @@ package ara_pkg;
} opqueue_e;

// Each lane has eight VRF banks
// NOTE: values != 8 are not supported
localparam int unsigned NrVRFBanksPerLane = 8;

// Find the starting address of a vector register vid
// Find the starting address (in bytes) of a vector register chunk of vid
function automatic logic [63:0] vaddr(logic [4:0] vid, int NrLanes);
vaddr = vid * (VLENB / NrLanes / 8);
// Each vector register spans multiple words in each bank in each lane
// The start address is the same in every lane
// Therefore, within each lane, each vector register chunk starts on a given offset
vaddr = vid * (VLENB / NrLanes / NrVRFBanksPerLane);
// NOTE: the only extensively tested configuration of Ara keeps:
// - (VLEN / NrLanes) constant to 1024;
// - NrVRFBanksPerLane always equal to 8.
// Given so, each vector register will span 2 words across all the banks and lanes,
// therefore, vaddr = vid * 16
endfunction: vaddr

// Differenciate between SLDU and ADDRGEN operands from opqueue
Expand Down Expand Up @@ -1016,7 +1025,7 @@ package ara_pkg;

typedef struct packed {
rvv_pkg::vew_e eew; // Effective element width
vlen_t vl; // Vector length
vlen_t elem_count; // Vector body length
opqueue_conversion_e conv; // Type conversion
logic [1:0] ntr_red; // Neutral type for reductions
logic is_reduct; // Is this a reduction?
Expand Down
4 changes: 2 additions & 2 deletions hardware/src/ara.sv
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ module ara import ara_pkg::*; #(

// Interface with CVA6's sv39 MMU
// This is everything the MMU can provide, it might be overcomplete for Ara and some signals be useless
output exception_t mmu_misaligned_ex_o,
output ariane_pkg::exception_t mmu_misaligned_ex_o,
output logic mmu_req_o, // request address translation
output logic [riscv::VLEN-1:0] mmu_vaddr_o, // virtual address out
output logic mmu_is_store_o, // the translation is requested by a store
Expand All @@ -54,7 +54,7 @@ module ara import ara_pkg::*; #(
// Cycle 1
input logic mmu_valid_i, // translation is valid
input logic [riscv::PLEN-1:0] mmu_paddr_i, // translated address
input exception_t mmu_exception_i, // address translation threw an exception
input ariane_pkg::exception_t mmu_exception_i, // address translation threw an exception

// Interface with Ariane
input accelerator_req_t acc_req_i,
Expand Down
19 changes: 13 additions & 6 deletions hardware/src/ara_dispatcher.sv
Original file line number Diff line number Diff line change
Expand Up @@ -2662,15 +2662,16 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
end

// Wait until the back-end answers to acknowledge those instructions
if (ara_resp_valid_i) begin
if ( ara_resp_valid_i ) begin : ara_resp_valid
acc_resp_o.req_ready = 1'b1;
acc_resp_o.exception = ara_resp_i.exception;
acc_resp_o.resp_valid = 1'b1;
ara_req_valid_d = 1'b0;
// In case of error, modify vstart
if (ara_resp_i.exception.valid)
if ( ara_resp_i.exception.valid ) begin : exception
csr_vstart_d = ara_resp_i.exception_vl;
end
end : exception
end : ara_resp_valid
end : OpcodeLoadFp

/////////////////////
Expand Down Expand Up @@ -2859,15 +2860,16 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
end

// Wait until the back-end answers to acknowledge those instructions
if (ara_resp_valid_i) begin
if (ara_resp_valid_i) begin : ara_resp_valid
acc_resp_o.req_ready = 1'b1;
acc_resp_o.exception = ara_resp_i.exception;
acc_resp_o.resp_valid = 1'b1;
ara_req_valid_d = 1'b0;
// If there is an error, change vstart
if (ara_resp_i.exception.valid)
if ( ara_resp_i.exception.valid ) begin : exception
csr_vstart_d = ara_resp_i.exception_vl;
end
end : exception
end : ara_resp_valid
end : OpcodeStoreFp

////////////////////////////
Expand All @@ -2879,6 +2881,11 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
// Therefore, Ara must be idle before performing any CSR operation.

// Stall if there is any pending vector instruction
// NOTE: This is overconstraining. Not all CSR ops actually need to stall if a vector instruction is pending.
// E.g., CSR vl is never updated by instructions past ara_dispatcher, except for "unit-stride fault-only-first loads". Reading vl would be safe otherwise.
// E.g., CSR vlenb is a design-constant parameter, reading is always safe.
// E.g., CSRs vxrm and vxsat have no influence on-non fixed-point instructions, it could be read and written safely when no fixed-point operation is running.
// By better analyzing the spec, more of optimizations of such can be made. For the sake of simplicity, the current implementation treats CSR ops as one block.
if ( ara_idle_i ) begin : ara_idle
// These always respond at the same cycle
acc_resp_o.resp_valid = 1'b1;
Expand Down
Loading

0 comments on commit f08c28f

Please sign in to comment.