Skip to content

Commit

Permalink
[rockett1] draft Tile
Browse files Browse the repository at this point in the history
- generate parameter json:  mill elaborator.runMain org.chipsalliance.t1.elaborator.t1rocketv.T1RocketTile config --instructionSets rv32_i --instructionSets rv_a --instructionSets rv_v --instructionSets Zve32x --instructionSets zvl1024b --cacheBlockBytes 32 --nPMPs 8 --cacheable 80000000-ffffffff --sideEffects 00000000-1fffffff --dcacheNSets 64 --dcacheNWays 4 --dcacheRowBits 32 --iCacheNSets 32 --iCacheNWays 4 --iCachePrefetch false --dLen 256 --vrfBankSize 2 --vrfRamType p0rp1w
- generate verilog: mill elaborator.runMain org.chipsalliance.t1.elaborator.t1rocketv.T1RocketTile design --parameter ./T1RocketTile.json --run-firtool
  • Loading branch information
sequencer committed Jul 31, 2024
1 parent 19c48c5 commit 7208020
Show file tree
Hide file tree
Showing 6 changed files with 775 additions and 64 deletions.
6 changes: 4 additions & 2 deletions elaborator/src/rocketv/Rocket.scala
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ object Rocket extends Elaborator {
@arg(name = "fastLoadByte") fastLoadByte: Boolean,
@arg(name = "fastLoadWord") fastLoadWord: Boolean,
@arg(name = "dcacheNSets") dcacheNSets: Int,
@arg(name = "flushOnFenceI") flushOnFenceI: Boolean) {
@arg(name = "flushOnFenceI") flushOnFenceI: Boolean,
@arg(name = "usingT1") usingT1: Boolean) {
def convert: RocketParameter = RocketParameter(
useAsyncReset,
clockGate,
Expand All @@ -61,7 +62,8 @@ object Rocket extends Elaborator {
fastLoadByte,
fastLoadWord,
dcacheNSets,
flushOnFenceI
flushOnFenceI,
usingT1
)
}

Expand Down
102 changes: 102 additions & 0 deletions elaborator/src/t1rocketv/T1RocketTile.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: 2024 Jiuyang Liu <[email protected]>
package org.chipsalliance.t1.elaborator.t1rocketv

import chisel3.util.BitPat
import chisel3.util.experimental.BitSet
import mainargs._
import org.chipsalliance.t1.elaborator.Elaborator
import org.chipsalliance.t1.rtl.vrf.RamType
import org.chipsalliance.t1.rtl.vrf.RamType.{p0rp1w, p0rw, p0rwp1rw}
import org.chipsalliance.t1.tile.{T1RocketTile, T1RocketTileParameter}

// --instructionSets rv32_i --instructionSets rv_a --instructionSets rv_c --instructionSets rv_v --instructionSets Zve32x --instructionSets zvl1024b --cacheBlockBytes 32 --nPMPs 8 --cacheable 80000000-ffffffff --sideEffects 00000000-1fffffff --dcacheNSets 64 --dcacheNWays 4 --dcacheRowBits 32 --iCacheNSets 32 --iCacheNWays 4 --iCachePrefetch false --dLen 256 --vrfBankSize 2 --vrfRamType p0rp1w
object T1RocketTile extends Elaborator {
implicit object BitSetRead extends TokensReader.Simple[BitSet] {
def shortName = "bitset"
def read(strs: Seq[String]) = {
Right(
strs.head
.split(",")
.map { opt =>
if (opt.contains("-")) {
val range = opt.split("-")
require(range.size == 2)
val from = BigInt(range.head, 16)
val to = BigInt(range.last, 16) + 1
BitSet.fromRange(from, to - from, range.head.length * 4)
} else if (opt.contains("+")) {
val range = opt.split("\\+")
require(range.size == 2)
val from = BigInt(range.head, 16)
val length = BigInt(range.last, 16)
BitSet.fromRange(from, length, range.head.length * 4)
} else {
BitPat(s"b$opt")
}
}
.reduce(_.union(_))
)
}
}

implicit object RamTypeRead extends TokensReader.Simple[RamType] {
def shortName = "ramtype"
def read(strs: Seq[String]) = {
Right(
strs.head match {
case "p0rw" => p0rw
case "p0rp1w" => p0rp1w
case "p0rwp1rw" => p0rwp1rw
}
)
}
}

@main
case class T1RocketTileParameterMain(
@arg(name = "instructionSets") instructionSets: Seq[String],
@arg(name = "cacheBlockBytes") cacheBlockBytes: Int,
@arg(name = "nPMPs") nPMPs: Int,
@arg(name = "cacheable") cacheable: BitSet,
@arg(name = "sideEffects") sideEffects: BitSet,
@arg(name = "dcacheNSets") dcacheNSets: Int,
@arg(name = "dcacheNWays") dcacheNWays: Int,
@arg(name = "dcacheRowBits") dcacheRowBits: Int,
@arg(name = "iCacheNSets") iCacheNSets: Int,
@arg(name = "iCacheNWays") iCacheNWays: Int,
@arg(name = "iCachePrefetch") iCachePrefetch: Boolean,
@arg(name = "dLen") dLen: Int,
@arg(name = "vrfBankSize") vrfBankSize: Int,
@arg(name = "vrfRamType") vrfRamType: RamType
) {
def convert: T1RocketTileParameter = T1RocketTileParameter(
instructionSets: Seq[String],
cacheBlockBytes: Int,
nPMPs: Int,
cacheable: BitSet,
sideEffects: BitSet,
dcacheNSets: Int,
dcacheNWays: Int,
dcacheRowBits: Int,
iCacheNSets: Int,
iCacheNWays: Int,
iCachePrefetch: Boolean,
dLen: Int,
vrfBankSize: Int,
vrfRamType: RamType
)
}

implicit def T1RocketTileParameterMainParser: ParserForClass[T1RocketTileParameterMain] =
ParserForClass[T1RocketTileParameterMain]

@main
def config(@arg(name = "parameter") parameter: T1RocketTileParameterMain) = configImpl(parameter.convert)

@main
def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) =
designImpl[T1RocketTile, T1RocketTileParameter](parameter, runFirtool.value)

def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args)
}
43 changes: 42 additions & 1 deletion rocketv/src/Bundle.scala
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
package org.chipsalliance.rocketv

import chisel3._
import chisel3.util.{Cat, Decoupled, DecoupledIO, Valid, isPow2, log2Ceil}
import chisel3.util.{Cat, Decoupled, DecoupledIO, Valid, ValidIO, isPow2, log2Ceil}

// This file defines Bundle shared in the project.
// all Bundle only have datatype without any helper or functions, while they only exist in the companion Bundle.
Expand Down Expand Up @@ -1398,3 +1398,44 @@ class FrontendBundle(vaddrBitsExtended: Int, vaddrBits: Int, asidBits: Int, entr
val ptw = new TLBPTWIO(nPMPs, vpnBits, paddrBits, vaddrBits, pgLevels, xLen, maxPAddrBits, pgIdxBits)
val errors = new ICacheErrors(hasCorrectable, hasUncorrectable, paddrBits)
}

// Interface between T1 <> Rocket integration
class RocketCoreToT1(xLen: Int, vlWidth: Int) extends Bundle {
val issue: DecoupledIO[T1Issue] = Decoupled(new T1Issue(xLen, vlWidth))
val retire: T1Retire = Flipped(new T1Retire(xLen))
}

class T1Issue(xLen: Int, vlWidth: Int) extends Bundle {
val instruction: UInt = UInt(32.W)
val rs1Data: UInt = UInt(xLen.W)
val rs2Data: UInt = UInt(xLen.W)
val vtype: UInt = UInt(32.W)
val vl: UInt = UInt(32.W)
val vstart: UInt = UInt(32.W)
val vcsr: UInt = UInt(32.W)
}

object T1Issue {
def vlmul(issue: T1Issue): UInt = issue.vtype(2, 0)
def vsew(issue: T1Issue): UInt = issue.vtype(5, 3)
def vta(issue: T1Issue): Bool = issue.vtype(6)
def vma(issue: T1Issue): Bool = issue.vtype(7)
def vxrm(issue: T1Issue): UInt = issue.vcsr(2, 1)
}

class T1RdRetire(xLen: Int) extends Bundle {
val rdAddress: UInt = UInt(5.W)
val rdData: UInt = UInt(xLen.W)
val isFp: Bool = Bool()
}

class T1CSRRetire extends Bundle {
val vxsat: UInt = UInt(32.W)
val fflag: UInt = UInt(32.W)
}

class T1Retire(xLen: Int) extends Bundle {
val rd: Valid[T1RdRetire] = Valid(new T1RdRetire(xLen))
val csr: Valid[T1CSRRetire] = Valid(new T1CSRRetire)
val mem: Valid[Bundle] = Valid(new Bundle {})
}
142 changes: 82 additions & 60 deletions rocketv/src/RocketCore.scala
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import chisel3.experimental.{SerializableModule, SerializableModuleParameter}
import chisel3.probe.{Probe, ProbeValue, define}
import chisel3.util.circt.ClockGate
import chisel3.util.experimental.decode.DecodeBundle
import chisel3.util.{BitPat, Cat, Fill, MuxLookup, PriorityEncoder, PriorityMux, RegEnable, log2Ceil}
import chisel3.util.{BitPat, Cat, DecoupledIO, Fill, MuxLookup, PriorityEncoder, PriorityMux, Queue, RegEnable, log2Ceil, log2Up}
import org.chipsalliance.rocketv.rvdecoderdbcompat.Causes
import org.chipsalliance.rvdecoderdb.Instruction

Expand Down Expand Up @@ -51,9 +51,12 @@ case class RocketParameter(
fastLoadByte: Boolean,
fastLoadWord: Boolean,
dcacheNSets: Int,
flushOnFenceI: Boolean
flushOnFenceI: Boolean,
usingT1: Boolean
)
extends SerializableModuleParameter {
// interface to T1
def usingVector = hasInstructionSet("rv_v")

// fixed for now
def usingRVE = false
Expand Down Expand Up @@ -143,8 +146,6 @@ case class RocketParameter(
// static to false for now
def usingNMI = hasInstructionSet("rv_smrnmi")

def usingVector = hasInstructionSet("rv_v")

// calculated parameter
def fetchWidth: Int = if (usingCompressed) 2 else 1

Expand Down Expand Up @@ -319,6 +320,7 @@ class RocketInterface(parameter: RocketParameter) extends Bundle {
)
)
val fpu = parameter.fLen.map(fLen => Flipped(new FPUCoreIO(parameter.hartIdLen, parameter.xLen, fLen)))
val t1 = Option.when(parameter.usingT1)(new RocketCoreToT1(parameter.xLen, parameter.vLen))
val bpwatch = Output(Vec(parameter.nBreakpoints, new BPWatch))
val cease = Output(Bool())
val wfi = Output(Bool())
Expand Down Expand Up @@ -1342,62 +1344,82 @@ class Rocket(val parameter: RocketParameter)
fpu.keep_clock_enabled := false.B
}

// @todo get back t1.
// t1Request.foreach { t1 =>
// // Send instruction to T1 when write back.
// t1.valid := wbRegValid && !replayWbCommon && wbRegDecodeOutput(parameter.decoderParameter.vector)
// t1.bits.instruction := wbRegInstruction
// t1.bits.rs1Data := wbRegWdata
// t1.bits.rs2Data := wbRegRS2
// val response: DecoupledIO[VectorResponse] = t1Response.get
// // TODO: make it configurable
// val maxCount: Int = 32
// val countWidth = log2Up(maxCount)
// def counterManagement(size: Int, margin: Int = 0)(grant: Bool, release: Bool, flush: Option[Bool] = None) = {
// val counter: UInt = RegInit(0.U(size.W))
// val nextCount = counter + Mux(grant, 1.U(size.W), (-1.S(size.W)).asUInt)
// val updateCounter = grant ^ release
// when(updateCounter) {
// counter := nextCount
// }
// flush.foreach(f => when(f)(counter := 0.U))
// val empty = (updateCounter && nextCount === 0.U) || counter === 0.U
// val fullCounter: Int = (1 << size) - 1 - margin
// val full = (updateCounter && nextCount >= fullCounter.U) || counter >= fullCounter.U
// (empty, full)
// }
// // Maintain lsu counter
// val lsuGrant: Bool = t1.valid && wbRegDecodeOutput(parameter.decoderParameter.vectorLSU)
// val lsuRelease: Bool = response.fire && response.bits.mem
// val (lsuEmpty, _) = counterManagement(countWidth)(lsuGrant, lsuRelease)
// // Maintain vector counter
// // There may be 4 instructions in the pipe
// val (vectorEmpty, vectorFull) = counterManagement(countWidth, 4)(t1.valid, t1IssueQueueRelease.get)
// vectorLSUEmpty.foreach(_ := lsuEmpty)
// vectorQueueFull.foreach(_ := vectorFull)
// }
// // todo: vector change csr
// t1Response.foreach { vectorResponse =>
// val vectorTryToWriteRd = vectorResponse.bits.rd.valid && !vectorResponse.bits.float
// val vectorTryToWriteFP = vectorResponse.bits.rd.valid && vectorResponse.bits.float
// vectorResponse.ready := (!(wbWxd || (dmemResponseReplay && dmemResponseXpu)) || !vectorTryToWriteRd) &&
// (!(dmemResponseReplay && dmemResponseFpu) || !vectorTryToWriteFP)
// when(vectorResponse.fire && vectorTryToWriteRd) {
// longlatencyWdata := vectorResponse.bits.data
// longlatencyWaddress := vectorResponse.bits.rd.bits
// longLatencyWenable := true.B
// }
// fpu.foreach { fpu =>
// when(!(dmemResponseValid && dmemResponseFpu)) {
// fpu.dmem_resp_val := vectorResponse.fire && vectorTryToWriteFP
// fpu.dmem_resp_data := vectorResponse.bits.data
// // todo: 32 bit only
// fpu.dmem_resp_type := 2.U
// // todo: connect tag
// fpu.dmem_resp_tag := 0.U
// }
// }
// }
// TODO: T1 only logic
io.t1.foreach { t1 =>
// T1 Issue
val maxCount: Int = 32
val t1IssueQueue = Module(new Queue(chiselTypeOf(t1.issue.bits), maxCount))
t1IssueQueue.io.enq.valid := wbRegValid && !replayWbCommon && wbRegDecodeOutput(parameter.decoderParameter.vector)
t1IssueQueue.io.enq.bits.instruction := wbRegInstruction
t1IssueQueue.io.enq.bits.rs1Data := wbRegWdata
t1IssueQueue.io.enq.bits.rs2Data := wbRegRS2
t1.issue.valid := t1IssueQueue.io.deq.valid
t1.issue.bits := t1IssueQueue.io.deq.bits
t1IssueQueue.io.deq.ready := t1.issue.ready
// For each different retirements, it should maintain different scoreboard
val t1CSRRetireQueue: Queue[T1CSRRetire] = Module(new Queue(chiselTypeOf(t1.retire.csr.bits), maxCount))
val t1XRDRetireQueue: Queue[T1RdRetire] = Module(new Queue(chiselTypeOf(t1.retire.rd.bits), maxCount))

val countWidth = log2Up(maxCount)
def counterManagement(size: Int, margin: Int = 0)(grant: Bool, release: Bool, flush: Option[Bool] = None) = {
val counter: UInt = RegInit(0.U(size.W))
val nextCount = counter + Mux(grant, 1.U(size.W), (-1.S(size.W)).asUInt)
val updateCounter = grant ^ release
when(updateCounter) {
counter := nextCount
}
flush.foreach(f => when(f)(counter := 0.U))
val empty = (updateCounter && nextCount === 0.U) || counter === 0.U
val fullCounter: Int = (1 << size) - 1 - margin
val full = (updateCounter && nextCount >= fullCounter.U) || counter >= fullCounter.U
(empty, full)
}
// T1 Memory Scoreboard
val t1MemoryGrant: Bool = t1IssueQueue.io.enq.valid && wbRegDecodeOutput(parameter.decoderParameter.vectorLSU)
val t1MemoryRelease: Bool = t1.retire.mem.fire
// todo: handle vector lsu in pipe
// +1: There are instructions that will enter t1
val (lsuEmpty, _) = counterManagement(countWidth + 1)(t1MemoryGrant, t1MemoryRelease)
// T1 CSR Scoreboard
// todo: add wbRegDecodeOutput(vectorWriteCsr)
val t1CSRGrant: Bool = false.B
val t1CSRRelease: Bool = false.B // t1CSRRetireQueue.io.deq.fire
val (t1CSREmpty, _) = counterManagement(countWidth + 1)(t1CSRGrant, t1CSRRelease)
// T1 XRD Scoreboard?

// Maintain vector counter
// There may be 4 instructions in the pipe
val (_, vectorFull) = counterManagement(countWidth, 4)(t1IssueQueue.io.enq.valid, t1.issue.fire)
vectorLSUEmpty.foreach(_ := lsuEmpty)
vectorQueueFull.foreach(_ := vectorFull)

t1XRDRetireQueue.io.enq.valid := t1.retire.rd.valid
t1XRDRetireQueue.io.enq.bits := t1.retire.rd.bits
t1CSRRetireQueue.io.enq.valid := t1.retire.csr.valid
t1CSRRetireQueue.io.enq.bits := t1.retire.csr.bits
// todo: write csr here
t1CSRRetireQueue.io.deq.ready := true.B

val vectorTryToWriteRd = t1XRDRetireQueue.io.deq.valid && !t1XRDRetireQueue.io.deq.bits.isFp
val vectorTryToWriteFP = t1XRDRetireQueue.io.deq.valid && t1XRDRetireQueue.io.deq.bits.isFp
t1XRDRetireQueue.io.deq.ready := (!(wbWxd || (dmemResponseReplay && dmemResponseXpu)) || !vectorTryToWriteRd) && (!(dmemResponseReplay && dmemResponseFpu) || !vectorTryToWriteFP)

when(t1.retire.rd.fire && vectorTryToWriteRd) {
longlatencyWdata := t1.retire.rd.bits.rdData
longlatencyWaddress := t1.retire.rd.bits.rdAddress
longLatencyWenable := true.B
}
io.fpu.foreach { fpu =>
when(!(dmemResponseValid && dmemResponseFpu)) {
fpu.dmem_resp_val := t1.retire.mem.fire && vectorTryToWriteFP
fpu.dmem_resp_data := t1.retire.rd.bits.rdData
// todo: 32 bit only
fpu.dmem_resp_type := 2.U
// todo: connect tag
fpu.dmem_resp_tag := 0.U
}
}
}

io.dmem.req.valid := exRegValid && exRegDecodeOutput(parameter.decoderParameter.mem)
val ex_dcache_tag = Cat(exWaddr, Option.when(usingFPU)(exRegDecodeOutput(parameter.decoderParameter.fp)).getOrElse(false.B))
Expand Down
3 changes: 2 additions & 1 deletion rocketv/src/RocketTile.scala
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,8 @@ case class RocketTileParameter(
fastLoadByte,
fastLoadWord,
dcacheNSets,
flushOnFenceI
flushOnFenceI,
usingT1 = false
)

def hellaCacheParameter: HellaCacheParameter = HellaCacheParameter(
Expand Down
Loading

0 comments on commit 7208020

Please sign in to comment.