From 720802096aebef57e9c80319d162fcaeed52ca49 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Wed, 24 Jul 2024 10:53:41 +0800 Subject: [PATCH] [rockett1] draft Tile - generate parameter json: mill elaborator.runMain org.chipsalliance.t1.elaborator.t1rocketv.T1RocketTile config --instructionSets rv32_i --instructionSets rv_a --instructionSets rv_v --instructionSets Zve32x --instructionSets zvl1024b --cacheBlockBytes 32 --nPMPs 8 --cacheable 80000000-ffffffff --sideEffects 00000000-1fffffff --dcacheNSets 64 --dcacheNWays 4 --dcacheRowBits 32 --iCacheNSets 32 --iCacheNWays 4 --iCachePrefetch false --dLen 256 --vrfBankSize 2 --vrfRamType p0rp1w - generate verilog: mill elaborator.runMain org.chipsalliance.t1.elaborator.t1rocketv.T1RocketTile design --parameter ./T1RocketTile.json --run-firtool --- elaborator/src/rocketv/Rocket.scala | 6 +- elaborator/src/t1rocketv/T1RocketTile.scala | 102 ++++ rocketv/src/Bundle.scala | 43 +- rocketv/src/RocketCore.scala | 142 ++--- rocketv/src/RocketTile.scala | 3 +- t1rocketv/src/T1RocketTile.scala | 543 ++++++++++++++++++++ 6 files changed, 775 insertions(+), 64 deletions(-) create mode 100644 elaborator/src/t1rocketv/T1RocketTile.scala create mode 100644 t1rocketv/src/T1RocketTile.scala diff --git a/elaborator/src/rocketv/Rocket.scala b/elaborator/src/rocketv/Rocket.scala index 99f90d17ad..f5954500e0 100644 --- a/elaborator/src/rocketv/Rocket.scala +++ b/elaborator/src/rocketv/Rocket.scala @@ -34,7 +34,8 @@ object Rocket extends Elaborator { @arg(name = "fastLoadByte") fastLoadByte: Boolean, @arg(name = "fastLoadWord") fastLoadWord: Boolean, @arg(name = "dcacheNSets") dcacheNSets: Int, - @arg(name = "flushOnFenceI") flushOnFenceI: Boolean) { + @arg(name = "flushOnFenceI") flushOnFenceI: Boolean, + @arg(name = "usingT1") usingT1: Boolean) { def convert: RocketParameter = RocketParameter( useAsyncReset, clockGate, @@ -61,7 +62,8 @@ object Rocket extends Elaborator { fastLoadByte, fastLoadWord, dcacheNSets, - flushOnFenceI + flushOnFenceI, + usingT1 ) } diff --git a/elaborator/src/t1rocketv/T1RocketTile.scala b/elaborator/src/t1rocketv/T1RocketTile.scala new file mode 100644 index 0000000000..3cb8398e25 --- /dev/null +++ b/elaborator/src/t1rocketv/T1RocketTile.scala @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.t1rocketv + +import chisel3.util.BitPat +import chisel3.util.experimental.BitSet +import mainargs._ +import org.chipsalliance.t1.elaborator.Elaborator +import org.chipsalliance.t1.rtl.vrf.RamType +import org.chipsalliance.t1.rtl.vrf.RamType.{p0rp1w, p0rw, p0rwp1rw} +import org.chipsalliance.t1.tile.{T1RocketTile, T1RocketTileParameter} + +// --instructionSets rv32_i --instructionSets rv_a --instructionSets rv_c --instructionSets rv_v --instructionSets Zve32x --instructionSets zvl1024b --cacheBlockBytes 32 --nPMPs 8 --cacheable 80000000-ffffffff --sideEffects 00000000-1fffffff --dcacheNSets 64 --dcacheNWays 4 --dcacheRowBits 32 --iCacheNSets 32 --iCacheNWays 4 --iCachePrefetch false --dLen 256 --vrfBankSize 2 --vrfRamType p0rp1w +object T1RocketTile extends Elaborator { + implicit object BitSetRead extends TokensReader.Simple[BitSet] { + def shortName = "bitset" + def read(strs: Seq[String]) = { + Right( + strs.head + .split(",") + .map { opt => + if (opt.contains("-")) { + val range = opt.split("-") + require(range.size == 2) + val from = BigInt(range.head, 16) + val to = BigInt(range.last, 16) + 1 + BitSet.fromRange(from, to - from, range.head.length * 4) + } else if (opt.contains("+")) { + val range = opt.split("\\+") + require(range.size == 2) + val from = BigInt(range.head, 16) + val length = BigInt(range.last, 16) + BitSet.fromRange(from, length, range.head.length * 4) + } else { + BitPat(s"b$opt") + } + } + .reduce(_.union(_)) + ) + } + } + + implicit object RamTypeRead extends TokensReader.Simple[RamType] { + def shortName = "ramtype" + def read(strs: Seq[String]) = { + Right( + strs.head match { + case "p0rw" => p0rw + case "p0rp1w" => p0rp1w + case "p0rwp1rw" => p0rwp1rw + } + ) + } + } + + @main + case class T1RocketTileParameterMain( + @arg(name = "instructionSets") instructionSets: Seq[String], + @arg(name = "cacheBlockBytes") cacheBlockBytes: Int, + @arg(name = "nPMPs") nPMPs: Int, + @arg(name = "cacheable") cacheable: BitSet, + @arg(name = "sideEffects") sideEffects: BitSet, + @arg(name = "dcacheNSets") dcacheNSets: Int, + @arg(name = "dcacheNWays") dcacheNWays: Int, + @arg(name = "dcacheRowBits") dcacheRowBits: Int, + @arg(name = "iCacheNSets") iCacheNSets: Int, + @arg(name = "iCacheNWays") iCacheNWays: Int, + @arg(name = "iCachePrefetch") iCachePrefetch: Boolean, + @arg(name = "dLen") dLen: Int, + @arg(name = "vrfBankSize") vrfBankSize: Int, + @arg(name = "vrfRamType") vrfRamType: RamType + ) { + def convert: T1RocketTileParameter = T1RocketTileParameter( + instructionSets: Seq[String], + cacheBlockBytes: Int, + nPMPs: Int, + cacheable: BitSet, + sideEffects: BitSet, + dcacheNSets: Int, + dcacheNWays: Int, + dcacheRowBits: Int, + iCacheNSets: Int, + iCacheNWays: Int, + iCachePrefetch: Boolean, + dLen: Int, + vrfBankSize: Int, + vrfRamType: RamType + ) + } + + implicit def T1RocketTileParameterMainParser: ParserForClass[T1RocketTileParameterMain] = + ParserForClass[T1RocketTileParameterMain] + + @main + def config(@arg(name = "parameter") parameter: T1RocketTileParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[T1RocketTile, T1RocketTileParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/rocketv/src/Bundle.scala b/rocketv/src/Bundle.scala index 5847719eba..f7b8161c26 100644 --- a/rocketv/src/Bundle.scala +++ b/rocketv/src/Bundle.scala @@ -5,7 +5,7 @@ package org.chipsalliance.rocketv import chisel3._ -import chisel3.util.{Cat, Decoupled, DecoupledIO, Valid, isPow2, log2Ceil} +import chisel3.util.{Cat, Decoupled, DecoupledIO, Valid, ValidIO, isPow2, log2Ceil} // This file defines Bundle shared in the project. // all Bundle only have datatype without any helper or functions, while they only exist in the companion Bundle. @@ -1398,3 +1398,44 @@ class FrontendBundle(vaddrBitsExtended: Int, vaddrBits: Int, asidBits: Int, entr val ptw = new TLBPTWIO(nPMPs, vpnBits, paddrBits, vaddrBits, pgLevels, xLen, maxPAddrBits, pgIdxBits) val errors = new ICacheErrors(hasCorrectable, hasUncorrectable, paddrBits) } + +// Interface between T1 <> Rocket integration +class RocketCoreToT1(xLen: Int, vlWidth: Int) extends Bundle { + val issue: DecoupledIO[T1Issue] = Decoupled(new T1Issue(xLen, vlWidth)) + val retire: T1Retire = Flipped(new T1Retire(xLen)) +} + +class T1Issue(xLen: Int, vlWidth: Int) extends Bundle { + val instruction: UInt = UInt(32.W) + val rs1Data: UInt = UInt(xLen.W) + val rs2Data: UInt = UInt(xLen.W) + val vtype: UInt = UInt(32.W) + val vl: UInt = UInt(32.W) + val vstart: UInt = UInt(32.W) + val vcsr: UInt = UInt(32.W) +} + +object T1Issue { + def vlmul(issue: T1Issue): UInt = issue.vtype(2, 0) + def vsew(issue: T1Issue): UInt = issue.vtype(5, 3) + def vta(issue: T1Issue): Bool = issue.vtype(6) + def vma(issue: T1Issue): Bool = issue.vtype(7) + def vxrm(issue: T1Issue): UInt = issue.vcsr(2, 1) +} + +class T1RdRetire(xLen: Int) extends Bundle { + val rdAddress: UInt = UInt(5.W) + val rdData: UInt = UInt(xLen.W) + val isFp: Bool = Bool() +} + +class T1CSRRetire extends Bundle { + val vxsat: UInt = UInt(32.W) + val fflag: UInt = UInt(32.W) +} + +class T1Retire(xLen: Int) extends Bundle { + val rd: Valid[T1RdRetire] = Valid(new T1RdRetire(xLen)) + val csr: Valid[T1CSRRetire] = Valid(new T1CSRRetire) + val mem: Valid[Bundle] = Valid(new Bundle {}) +} \ No newline at end of file diff --git a/rocketv/src/RocketCore.scala b/rocketv/src/RocketCore.scala index 4e974f4d70..7470b4c3ec 100644 --- a/rocketv/src/RocketCore.scala +++ b/rocketv/src/RocketCore.scala @@ -10,7 +10,7 @@ import chisel3.experimental.{SerializableModule, SerializableModuleParameter} import chisel3.probe.{Probe, ProbeValue, define} import chisel3.util.circt.ClockGate import chisel3.util.experimental.decode.DecodeBundle -import chisel3.util.{BitPat, Cat, Fill, MuxLookup, PriorityEncoder, PriorityMux, RegEnable, log2Ceil} +import chisel3.util.{BitPat, Cat, DecoupledIO, Fill, MuxLookup, PriorityEncoder, PriorityMux, Queue, RegEnable, log2Ceil, log2Up} import org.chipsalliance.rocketv.rvdecoderdbcompat.Causes import org.chipsalliance.rvdecoderdb.Instruction @@ -51,9 +51,12 @@ case class RocketParameter( fastLoadByte: Boolean, fastLoadWord: Boolean, dcacheNSets: Int, - flushOnFenceI: Boolean + flushOnFenceI: Boolean, + usingT1: Boolean ) extends SerializableModuleParameter { + // interface to T1 + def usingVector = hasInstructionSet("rv_v") // fixed for now def usingRVE = false @@ -143,8 +146,6 @@ case class RocketParameter( // static to false for now def usingNMI = hasInstructionSet("rv_smrnmi") - def usingVector = hasInstructionSet("rv_v") - // calculated parameter def fetchWidth: Int = if (usingCompressed) 2 else 1 @@ -319,6 +320,7 @@ class RocketInterface(parameter: RocketParameter) extends Bundle { ) ) val fpu = parameter.fLen.map(fLen => Flipped(new FPUCoreIO(parameter.hartIdLen, parameter.xLen, fLen))) + val t1 = Option.when(parameter.usingT1)(new RocketCoreToT1(parameter.xLen, parameter.vLen)) val bpwatch = Output(Vec(parameter.nBreakpoints, new BPWatch)) val cease = Output(Bool()) val wfi = Output(Bool()) @@ -1342,62 +1344,82 @@ class Rocket(val parameter: RocketParameter) fpu.keep_clock_enabled := false.B } -// @todo get back t1. -// t1Request.foreach { t1 => -// // Send instruction to T1 when write back. -// t1.valid := wbRegValid && !replayWbCommon && wbRegDecodeOutput(parameter.decoderParameter.vector) -// t1.bits.instruction := wbRegInstruction -// t1.bits.rs1Data := wbRegWdata -// t1.bits.rs2Data := wbRegRS2 -// val response: DecoupledIO[VectorResponse] = t1Response.get -// // TODO: make it configurable -// val maxCount: Int = 32 -// val countWidth = log2Up(maxCount) -// def counterManagement(size: Int, margin: Int = 0)(grant: Bool, release: Bool, flush: Option[Bool] = None) = { -// val counter: UInt = RegInit(0.U(size.W)) -// val nextCount = counter + Mux(grant, 1.U(size.W), (-1.S(size.W)).asUInt) -// val updateCounter = grant ^ release -// when(updateCounter) { -// counter := nextCount -// } -// flush.foreach(f => when(f)(counter := 0.U)) -// val empty = (updateCounter && nextCount === 0.U) || counter === 0.U -// val fullCounter: Int = (1 << size) - 1 - margin -// val full = (updateCounter && nextCount >= fullCounter.U) || counter >= fullCounter.U -// (empty, full) -// } -// // Maintain lsu counter -// val lsuGrant: Bool = t1.valid && wbRegDecodeOutput(parameter.decoderParameter.vectorLSU) -// val lsuRelease: Bool = response.fire && response.bits.mem -// val (lsuEmpty, _) = counterManagement(countWidth)(lsuGrant, lsuRelease) -// // Maintain vector counter -// // There may be 4 instructions in the pipe -// val (vectorEmpty, vectorFull) = counterManagement(countWidth, 4)(t1.valid, t1IssueQueueRelease.get) -// vectorLSUEmpty.foreach(_ := lsuEmpty) -// vectorQueueFull.foreach(_ := vectorFull) -// } -// // todo: vector change csr -// t1Response.foreach { vectorResponse => -// val vectorTryToWriteRd = vectorResponse.bits.rd.valid && !vectorResponse.bits.float -// val vectorTryToWriteFP = vectorResponse.bits.rd.valid && vectorResponse.bits.float -// vectorResponse.ready := (!(wbWxd || (dmemResponseReplay && dmemResponseXpu)) || !vectorTryToWriteRd) && -// (!(dmemResponseReplay && dmemResponseFpu) || !vectorTryToWriteFP) -// when(vectorResponse.fire && vectorTryToWriteRd) { -// longlatencyWdata := vectorResponse.bits.data -// longlatencyWaddress := vectorResponse.bits.rd.bits -// longLatencyWenable := true.B -// } -// fpu.foreach { fpu => -// when(!(dmemResponseValid && dmemResponseFpu)) { -// fpu.dmem_resp_val := vectorResponse.fire && vectorTryToWriteFP -// fpu.dmem_resp_data := vectorResponse.bits.data -// // todo: 32 bit only -// fpu.dmem_resp_type := 2.U -// // todo: connect tag -// fpu.dmem_resp_tag := 0.U -// } -// } -// } + // TODO: T1 only logic + io.t1.foreach { t1 => + // T1 Issue + val maxCount: Int = 32 + val t1IssueQueue = Module(new Queue(chiselTypeOf(t1.issue.bits), maxCount)) + t1IssueQueue.io.enq.valid := wbRegValid && !replayWbCommon && wbRegDecodeOutput(parameter.decoderParameter.vector) + t1IssueQueue.io.enq.bits.instruction := wbRegInstruction + t1IssueQueue.io.enq.bits.rs1Data := wbRegWdata + t1IssueQueue.io.enq.bits.rs2Data := wbRegRS2 + t1.issue.valid := t1IssueQueue.io.deq.valid + t1.issue.bits := t1IssueQueue.io.deq.bits + t1IssueQueue.io.deq.ready := t1.issue.ready + // For each different retirements, it should maintain different scoreboard + val t1CSRRetireQueue: Queue[T1CSRRetire] = Module(new Queue(chiselTypeOf(t1.retire.csr.bits), maxCount)) + val t1XRDRetireQueue: Queue[T1RdRetire] = Module(new Queue(chiselTypeOf(t1.retire.rd.bits), maxCount)) + + val countWidth = log2Up(maxCount) + def counterManagement(size: Int, margin: Int = 0)(grant: Bool, release: Bool, flush: Option[Bool] = None) = { + val counter: UInt = RegInit(0.U(size.W)) + val nextCount = counter + Mux(grant, 1.U(size.W), (-1.S(size.W)).asUInt) + val updateCounter = grant ^ release + when(updateCounter) { + counter := nextCount + } + flush.foreach(f => when(f)(counter := 0.U)) + val empty = (updateCounter && nextCount === 0.U) || counter === 0.U + val fullCounter: Int = (1 << size) - 1 - margin + val full = (updateCounter && nextCount >= fullCounter.U) || counter >= fullCounter.U + (empty, full) + } + // T1 Memory Scoreboard + val t1MemoryGrant: Bool = t1IssueQueue.io.enq.valid && wbRegDecodeOutput(parameter.decoderParameter.vectorLSU) + val t1MemoryRelease: Bool = t1.retire.mem.fire + // todo: handle vector lsu in pipe + // +1: There are instructions that will enter t1 + val (lsuEmpty, _) = counterManagement(countWidth + 1)(t1MemoryGrant, t1MemoryRelease) + // T1 CSR Scoreboard + // todo: add wbRegDecodeOutput(vectorWriteCsr) + val t1CSRGrant: Bool = false.B + val t1CSRRelease: Bool = false.B // t1CSRRetireQueue.io.deq.fire + val (t1CSREmpty, _) = counterManagement(countWidth + 1)(t1CSRGrant, t1CSRRelease) + // T1 XRD Scoreboard? + + // Maintain vector counter + // There may be 4 instructions in the pipe + val (_, vectorFull) = counterManagement(countWidth, 4)(t1IssueQueue.io.enq.valid, t1.issue.fire) + vectorLSUEmpty.foreach(_ := lsuEmpty) + vectorQueueFull.foreach(_ := vectorFull) + + t1XRDRetireQueue.io.enq.valid := t1.retire.rd.valid + t1XRDRetireQueue.io.enq.bits := t1.retire.rd.bits + t1CSRRetireQueue.io.enq.valid := t1.retire.csr.valid + t1CSRRetireQueue.io.enq.bits := t1.retire.csr.bits + // todo: write csr here + t1CSRRetireQueue.io.deq.ready := true.B + + val vectorTryToWriteRd = t1XRDRetireQueue.io.deq.valid && !t1XRDRetireQueue.io.deq.bits.isFp + val vectorTryToWriteFP = t1XRDRetireQueue.io.deq.valid && t1XRDRetireQueue.io.deq.bits.isFp + t1XRDRetireQueue.io.deq.ready := (!(wbWxd || (dmemResponseReplay && dmemResponseXpu)) || !vectorTryToWriteRd) && (!(dmemResponseReplay && dmemResponseFpu) || !vectorTryToWriteFP) + + when(t1.retire.rd.fire && vectorTryToWriteRd) { + longlatencyWdata := t1.retire.rd.bits.rdData + longlatencyWaddress := t1.retire.rd.bits.rdAddress + longLatencyWenable := true.B + } + io.fpu.foreach { fpu => + when(!(dmemResponseValid && dmemResponseFpu)) { + fpu.dmem_resp_val := t1.retire.mem.fire && vectorTryToWriteFP + fpu.dmem_resp_data := t1.retire.rd.bits.rdData + // todo: 32 bit only + fpu.dmem_resp_type := 2.U + // todo: connect tag + fpu.dmem_resp_tag := 0.U + } + } + } io.dmem.req.valid := exRegValid && exRegDecodeOutput(parameter.decoderParameter.mem) val ex_dcache_tag = Cat(exWaddr, Option.when(usingFPU)(exRegDecodeOutput(parameter.decoderParameter.fp)).getOrElse(false.B)) diff --git a/rocketv/src/RocketTile.scala b/rocketv/src/RocketTile.scala index 9f3fe7e6b5..33c2082d78 100644 --- a/rocketv/src/RocketTile.scala +++ b/rocketv/src/RocketTile.scala @@ -244,7 +244,8 @@ case class RocketTileParameter( fastLoadByte, fastLoadWord, dcacheNSets, - flushOnFenceI + flushOnFenceI, + usingT1 = false ) def hellaCacheParameter: HellaCacheParameter = HellaCacheParameter( diff --git a/t1rocketv/src/T1RocketTile.scala b/t1rocketv/src/T1RocketTile.scala new file mode 100644 index 0000000000..452f760841 --- /dev/null +++ b/t1rocketv/src/T1RocketTile.scala @@ -0,0 +1,543 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.tile + +import chisel3._ +import chisel3.experimental.hierarchy.{Instance, Instantiate} +import chisel3.experimental.{SerializableModule, SerializableModuleGenerator, SerializableModuleParameter} +import chisel3.util.experimental.BitSet +import chisel3.util.log2Ceil +import org.chipsalliance.amba.axi4.bundle.{AXI4BundleParameter, AXI4ROIrrevocable, AXI4RWIrrevocable} +import org.chipsalliance.rocketv.{BHTParameter, FPU, FPUParameter, Frontend, FrontendParameter, HellaCache, HellaCacheArbiter, HellaCacheArbiterParameter, HellaCacheParameter, PTW, PTWParameter, Rocket, RocketParameter, RocketTileParameter} +import org.chipsalliance.rvdecoderdb.Instruction +import org.chipsalliance.t1.rtl.decoder.T1CustomInstruction +import org.chipsalliance.t1.rtl.vrf.RamType +import org.chipsalliance.t1.rtl.vrf.RamType.{p0rp1w, p0rw, p0rwp1rw} +import org.chipsalliance.t1.rtl.{LaneAdder, LaneAdderParam, LaneDiv, LaneDivFP, LaneDivFPParam, LaneDivParam, LaneFloat, LaneFloatParam, LaneMul, LaneMulParam, LaneShifter, LaneShifterParameter, LogicParam, MaskedLogic, OtherUnit, OtherUnitParam, T1, T1Parameter, VFUInstantiateParameter} + +object T1RocketTileParameter { + implicit def bitSetP: upickle.default.ReadWriter[BitSet] = upickle.default + .readwriter[String] + .bimap[BitSet]( + bs => bs.terms.map("b" + _.rawString).mkString("\n"), + str => if (str.isEmpty) BitSet.empty else BitSet.fromString(str) + ) + + implicit val vrfRamTypeP: upickle.default.ReadWriter[RamType] = upickle.default.ReadWriter.merge( + upickle.default.macroRW[p0rw.type], + upickle.default.macroRW[p0rp1w.type], + upickle.default.macroRW[p0rwp1rw.type] + ) + + implicit def rwP: upickle.default.ReadWriter[T1RocketTileParameter] = upickle.default.macroRW[T1RocketTileParameter] +} + +case class T1RocketTileParameter( + instructionSets: Seq[String], + cacheBlockBytes: Int, + nPMPs: Int, + cacheable: BitSet, + sideEffects: BitSet, + dcacheNSets: Int, + dcacheNWays: Int, + dcacheRowBits: Int, + iCacheNSets: Int, + iCacheNWays: Int, + iCachePrefetch: Boolean, + dLen: Int, + vrfBankSize: Int, + vrfRamType: RamType) + extends SerializableModuleParameter { + require(instructionSets.count(Seq("Zve32x", "Zve32f").contains) == 1, "at least support one Zve32x or Zve32f") + + val useAsyncReset: Boolean = false + val clockGate: Boolean = false + + val paddrBits: Int = xLen + // TODO: add S in the future + val priv: String = "m" + val hartIdLen: Int = 1 + val useBPWatch: Boolean = false + val mcontextWidth: Int = 0 + val scontextWidth: Int = 0 + val asidBits: Int = 0 + val resetVectorBits: Int = paddrBits + val nBreakpoints: Int = 0 + // TODO: set to 0 + val dtlbNSets: Int = 1 + val dtlbNWays: Int = 32 + val itlbNSets: Int = 1 + val itlbNWays: Int = 32 + val itlbNSectors: Int = 4 + val itlbNSuperpageEntries: Int = 4 + val nPTECacheEntries: Int = 9 + val nL2TLBWays: Int = 1 + val nL2TLBEntries: Int = 0 + // T1 doens't check exception. + val legal: BitSet = BitSet.fromRange(0, 1 << paddrBits) + val read: BitSet = BitSet.fromRange(0, 1 << paddrBits) + val write: BitSet = BitSet.fromRange(0, 1 << paddrBits) + val putPartial: BitSet = BitSet.fromRange(0, 1 << paddrBits) + val logic: BitSet = BitSet.fromRange(0, 1 << paddrBits) + val arithmetic: BitSet = BitSet.fromRange(0, 1 << paddrBits) + val exec: BitSet = BitSet.fromRange(0, 1 << paddrBits) + val btbEntries: Int = 28 + val btbNMatchBits: Int = 14 + val btbUpdatesOutOfOrder: Boolean = false + val nPages: Int = 6 + val nRAS: Int = 6 + val bhtParameter: Option[BHTParameter] = Some(BHTParameter(nEntries = 512, counterLength = 1, historyLength = 8, historyBits = 3)) + // TODO: remove it + val mulDivLatency: Int = 0 + val divUnroll: Int = 1 + val divEarlyOut: Boolean = false + val divEarlyOutGranularity: Int = 1 + val mulUnroll: Int = 1 + val mulEarlyOut: Boolean = false + val sfmaLatency: Int = 3 + val dfmaLatency: Int = 4 + val divSqrt: Boolean = true + // TODO: check decoder + val flushOnFenceI: Boolean = true + val fastLoadByte: Boolean = false + val fastLoadWord: Boolean = true + val maxUncachedInFlight: Int = 1 + val separateUncachedResp: Boolean = false + + + // calculate + def usingUser: Boolean = priv.contains("u") + + def usingSupervisor: Boolean = priv.contains("s") + + def vLen: Int = instructionSets.collectFirst { + case s"zvl${vlen}b" => vlen.toInt + }.get + + // static for now + def hasBeu: Boolean = false + + def usingNMI: Boolean = false + + def usingHypervisor: Boolean = false + + def usingDataScratchpad: Boolean = false + + def nLocalInterrupts: Int = 0 + + def dcacheArbPorts: Int = 2 + + def tagECC: Option[String] = None + + def dataECC: Option[String] = None + + def pgLevelBits: Int = 10 - log2Ceil(xLen / 32) + + def instructions: Seq[Instruction] = + org.chipsalliance.rvdecoderdb + .instructions( + org.chipsalliance.rvdecoderdb.extractResource(getClass.getClassLoader) + ) + .filter(instruction => + ( + instructionSets ++ + // Four mandatory instruction sets. + Seq("rv_i", "rv_zicsr", "rv_zifencei", "rv_system") + ).contains(instruction.instructionSet.name) + ) + .toSeq + .filter { + // special case for rv32 pseudo from rv64 + case i if i.pseudoFrom.isDefined && Seq("slli", "srli", "srai").contains(i.name) => true + case i if i.pseudoFrom.isDefined => false + case _ => true + } + .sortBy(i => (i.instructionSet.name, i.name)) + + private def hasInstructionSet(setName: String): Boolean = + instructions.flatMap(_.instructionSets.map(_.name)).contains(setName) + + def usingBTB: Boolean = btbEntries > 0 + + def xLen: Int = + (hasInstructionSet("rv32_i"), hasInstructionSet("rv64_i")) match { + case (true, true) => throw new Exception("cannot support both rv32 and rv64 together") + case (true, false) => 32 + case (false, true) => 64 + case (false, false) => throw new Exception("no basic instruction found.") + } + + def fLen: Option[Int] = + ( + hasInstructionSet("rv_f") || hasInstructionSet("rv64_f"), + hasInstructionSet("rv_d") || hasInstructionSet("rv64_d") + ) match { + case (false, false) => None + case (true, false) => Some(32) + case (false, true) => Some(64) + case (true, true) => Some(64) + } + + def usingVM = hasInstructionSet("sfence.vma") + + def pgLevels: Int = xLen match { + case 32 => 2 + case 64 => 3 + } + + def usingAtomics = hasInstructionSet("rv_a") || hasInstructionSet("rv64_a") + + def usingCompressed = hasInstructionSet("rv_c") + + def minFLen: Option[Int] = + if (hasInstructionSet("rv_zfh") || hasInstructionSet("rv64_zfh") || hasInstructionSet("rv_d_zfh")) + Some(16) + else + fLen + + def rocketParameter: RocketParameter = RocketParameter( + useAsyncReset, + clockGate, + instructionSets.toSet, + vLen, + usingUser, + hartIdLen, + nPMPs, + asidBits, + nBreakpoints, + usingBTB, + useBPWatch, + mcontextWidth, + scontextWidth, + mulDivLatency, + divUnroll, + divEarlyOut, + divEarlyOutGranularity, + mulUnroll, + mulEarlyOut, + paddrBits, + cacheBlockBytes, + hasBeu, + fastLoadByte, + fastLoadWord, + dcacheNSets, + flushOnFenceI, + usingT1 = true + ) + + def hellaCacheParameter: HellaCacheParameter = HellaCacheParameter( + useAsyncReset: Boolean, + clockGate: Boolean, + xLen: Int, + fLen.getOrElse(0): Int, + usingVM: Boolean, + paddrBits: Int, + cacheBlockBytes: Int, + dcacheNWays: Int, + dcacheNSets: Int, + dcacheRowBits: Int, + dtlbNSets: Int, + dtlbNWays: Int, + tagECC: Option[String], + dataECC: Option[String], + maxUncachedInFlight: Int, + separateUncachedResp: Boolean, + legal: BitSet, + cacheable: BitSet, + read: BitSet, + write: BitSet, + putPartial: BitSet, + logic: BitSet, + arithmetic: BitSet, + exec: BitSet, + sideEffects: BitSet + ) + + def hellaCacheArbiterParameter: HellaCacheArbiterParameter = HellaCacheArbiterParameter( + useAsyncReset: Boolean, + xLen: Int, + fLen.getOrElse(0): Int, + paddrBits: Int, + cacheBlockBytes: Int, + dcacheNSets: Int, + usingVM: Boolean, + separateUncachedResp: Boolean + ) + + def ptwParameter: PTWParameter = PTWParameter( + useAsyncReset: Boolean, + clockGate: Boolean, + usingVM: Boolean, + usingHypervisor: Boolean, + xLen: Int, + fLen.getOrElse(0): Int, + paddrBits: Int, + asidBits: Int, + pgLevels: Int, + nPTECacheEntries: Int, + nL2TLBWays: Int, + nL2TLBEntries: Int, + nPMPs: Int + ) + + def frontendParameter: FrontendParameter = FrontendParameter( + useAsyncReset = useAsyncReset: Boolean, + clockGate = clockGate: Boolean, + xLen = xLen: Int, + usingAtomics = usingAtomics: Boolean, + usingDataScratchpad = usingDataScratchpad: Boolean, + usingVM = usingVM: Boolean, + usingCompressed = usingCompressed: Boolean, + usingBTB = usingBTB: Boolean, + itlbNSets = itlbNSets: Int, + itlbNWays = itlbNWays: Int, + itlbNSectors = itlbNSectors: Int, + itlbNSuperpageEntries = itlbNSuperpageEntries: Int, + blockBytes = cacheBlockBytes: Int, + iCacheNSets = iCacheNSets: Int, + iCacheNWays = iCacheNWays: Int, + iCachePrefetch = iCachePrefetch: Boolean, + btbEntries = btbEntries: Int, + btbNMatchBits = btbNMatchBits: Int, + btbUpdatesOutOfOrder = btbUpdatesOutOfOrder: Boolean, + nPages = nPages: Int, + nRAS = nRAS: Int, + nPMPs = nPMPs: Int, + paddrBits = paddrBits: Int, + pgLevels = pgLevels: Int, + asidBits = asidBits: Int, + bhtParameter = bhtParameter: Option[BHTParameter], + legal = legal: BitSet, + cacheable = cacheable: BitSet, + read = read: BitSet, + write = write: BitSet, + putPartial = putPartial: BitSet, + logic = logic: BitSet, + arithmetic = arithmetic: BitSet, + exec = exec: BitSet, + sideEffects = sideEffects: BitSet + ) + + def fpuParameter: Option[FPUParameter] = fLen.zip(minFLen).map { + case (fLen, minFLen) => + FPUParameter( + useAsyncReset: Boolean, + clockGate: Boolean, + xLen: Int, + fLen: Int, + minFLen: Int, + sfmaLatency: Int, + dfmaLatency: Int, + divSqrt: Boolean, + hartIdLen: Int + ) + } + + val vfuInstantiateParameter = if (instructionSets.contains("Zve32f")) + VFUInstantiateParameter( + slotCount = 4, + logicModuleParameters = Seq( + (SerializableModuleGenerator(classOf[MaskedLogic], LogicParam(32, 1)), Seq(0, 1, 2, 3)) + ), + aluModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(0)), + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(1)), + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(2)), + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(3)) + ), + shifterModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneShifter], LaneShifterParameter(32, 1)), Seq(0, 1, 2, 3)) + ), + mulModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneMul], LaneMulParam(32, 2)), Seq(0, 1, 2, 3)) + ), + divModuleParameters = Seq(), + divfpModuleParameters = + Seq((SerializableModuleGenerator(classOf[LaneDivFP], LaneDivFPParam(32, 1)), Seq(0, 1, 2, 3))), + otherModuleParameters = + Seq(( + SerializableModuleGenerator( + classOf[OtherUnit], + OtherUnitParam(32, log2Ceil(vLen) + 1, log2Ceil(vLen * 8 / dLen), log2Ceil(dLen / 32), 4, 1) + ), + Seq(0, 1, 2, 3))), + floatModuleParameters = + Seq((SerializableModuleGenerator(classOf[LaneFloat], LaneFloatParam(32, 3)), Seq(0, 1, 2, 3))) + ) else + VFUInstantiateParameter( + slotCount = 4, + logicModuleParameters = Seq( + (SerializableModuleGenerator(classOf[MaskedLogic], LogicParam(32, 1)), Seq(0, 1, 2, 3)) + ), + aluModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(0)), + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(1)), + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(2)), + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(3)) + ), + shifterModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneShifter], LaneShifterParameter(32, 1)), Seq(0, 1, 2, 3)) + ), + mulModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneMul], LaneMulParam(32, 2)), Seq(0, 1, 2, 3)) + ), + divModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneDiv], LaneDivParam(32, 1)), Seq(0, 1, 2, 3)) + ), + divfpModuleParameters = Seq(), + otherModuleParameters = + Seq(( + SerializableModuleGenerator( + classOf[OtherUnit], + OtherUnitParam(32, log2Ceil(vLen) + 1, log2Ceil(vLen * 8 / dLen), log2Ceil(dLen / 32), 4, 1) + ), + Seq(0, 1, 2, 3))), + floatModuleParameters = Seq() + ) + + def t1Parameter: T1Parameter = T1Parameter( + vLen = vLen, + dLen = dLen, + extensions = instructionSets.filter(Seq("Zve32x", "Zve32f").contains), + // empty for now. + t1customInstructions = Seq(), + vrfBankSize = vrfBankSize, + vrfRamType = vrfRamType, + vfuInstantiateParameter = vfuInstantiateParameter + ) + + def instructionFetchParameter: AXI4BundleParameter = frontendParameter.instructionFetchParameter + + def itimParameter: Option[AXI4BundleParameter] = frontendParameter.itimParameter + + def loadStoreParameter: AXI4BundleParameter = hellaCacheParameter.loadStoreParameter + + def dtimParameter: Option[AXI4BundleParameter] = hellaCacheParameter.dtimParameter + + def t1HighBandwidthParameter: AXI4BundleParameter = t1Parameter.axi4BundleParameter + + def t1HightOutstandingParameter: AXI4BundleParameter = t1Parameter.axi4BundleParameter.copy(dataWidth = 32) +} + +class T1RocketTileInterface(parameter: T1RocketTileParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + // todo: Const + val hartid = Flipped(UInt(parameter.hartIdLen.W)) + val resetVector = Input(Const(UInt(parameter.resetVectorBits.W))) + + val debug: Bool = Input(Bool()) + val mtip: Bool = Input(Bool()) + val msip: Bool = Input(Bool()) + val meip: Bool = Input(Bool()) + val seip: Option[Bool] = Option.when(parameter.usingSupervisor)(Bool()) + val lip: Vec[Bool] = Vec(parameter.nLocalInterrupts, Bool()) + val nmi = Option.when(parameter.usingNMI)(Bool()) + val nmiInterruptVector = Option.when(parameter.usingNMI)(UInt(parameter.resetVectorBits.W)) + val nmiIxceptionVector = Option.when(parameter.usingNMI)(UInt(parameter.resetVectorBits.W)) + // TODO: buserror should be handled by NMI + val buserror: Bool = Input(Bool()) + val wfi: Bool = Output(Bool()) + val halt: Bool = Output(Bool()) + + val instructionFetchAXI: AXI4ROIrrevocable = + org.chipsalliance.amba.axi4.bundle.AXI4ROIrrevocable(parameter.instructionFetchParameter) + val itimAXI: Option[AXI4RWIrrevocable] = + parameter.itimParameter.map(p => Flipped(org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(p))) + + val loadStoreAXI: AXI4RWIrrevocable = + org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(parameter.loadStoreParameter) + val dtimAXI: Option[AXI4RWIrrevocable] = + parameter.dtimParameter.map(p => Flipped(org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(p))) + + val highBandwidthAXI: AXI4RWIrrevocable = org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(parameter.t1HighBandwidthParameter) + val highOutstandingAXI: AXI4RWIrrevocable = org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(parameter.t1HightOutstandingParameter) +} + +class T1RocketTile(val parameter: T1RocketTileParameter) + extends FixedIORawModule(new T1RocketTileInterface(parameter)) + with SerializableModule[T1RocketTileParameter] { + val rocket: Instance[Rocket] = Instantiate(new Rocket(parameter.rocketParameter)) + val frontend: Instance[Frontend] = Instantiate(new Frontend(parameter.frontendParameter)) + val hellaCache: Instance[HellaCache] = Instantiate(new HellaCache(parameter.hellaCacheParameter)) + val hellaCacheArbiter: Instance[HellaCacheArbiter] = Instantiate( + new HellaCacheArbiter(parameter.hellaCacheArbiterParameter) + ) + val ptw: Instance[PTW] = Instantiate(new PTW(parameter.ptwParameter)) + val fpu: Option[Instance[FPU]] = parameter.fpuParameter.map(fpuParameter => Instantiate(new FPU(fpuParameter))) + val t1: Instance[T1] = Instantiate(new T1(parameter.t1Parameter)) + + rocket.io.clock := io.clock + rocket.io.reset := io.reset + rocket.io.hartid := io.hartid + rocket.io.interrupts.debug := io.debug + rocket.io.interrupts.mtip := io.mtip + rocket.io.interrupts.msip := io.msip + rocket.io.interrupts.meip := io.meip + rocket.io.interrupts.seip.foreach(_ := io.seip.get) + rocket.io.interrupts.lip := io.lip + rocket.io.interrupts.nmi.foreach { nmi => + nmi.rnmi := io.nmi.get + nmi.rnmi_interrupt_vector := io.nmiInterruptVector.get + nmi.rnmi_exception_vector := io.nmiIxceptionVector.get + } + // @todo make it optional + rocket.io.buserror := io.buserror + io.wfi := rocket.io.wfi + io.loadStoreAXI <> hellaCache.io.loadStoreAXI + io.dtimAXI.zip(hellaCache.io.dtimAXI).foreach { case (io, hellaCache) => io <> hellaCache } + io.instructionFetchAXI <> frontend.io.instructionFetchAXI + io.itimAXI.zip(frontend.io.itimAXI).foreach { case (io, frontend) => io <> frontend } + // design for halt and beu, only use the halt function for now. + io.halt := Seq(frontend.io.nonDiplomatic.errors.uncorrectable, hellaCache.io.errors.uncorrectable) + .flatMap(_.map(_.valid)) + .foldLeft(false.B)(_ || _) + + // rocket core io + rocket.io.imem <> frontend.io.nonDiplomatic.cpu + hellaCacheArbiter.io.requestor(0) <> rocket.io.dmem + rocket.io.ptw <> ptw.io.dpath + rocket.io.fpu.zip(fpu.map(_.io.core)).foreach { case (core, fpu) => core <> fpu } + // match connect + t1.io.issue <> rocket.io.t1.get.issue + rocket.io.t1.get.retire <> t1.io.retire + // used by trace module + rocket.io.bpwatch := DontCare + // don't use for now, this is design for report the custom cease status. + // rocket.io.cease + // it will be used in the future w/ trace support. + rocket.io.traceStall := false.B + + // frontend io + frontend.io.clock := io.clock + frontend.io.reset := io.reset + frontend.io.resetVector := io.resetVector + ptw.io.requestor(0) <> frontend.io.nonDiplomatic.ptw + + // hellacache io + hellaCache.io.clock := io.clock + hellaCache.io.reset := io.reset + ptw.io.requestor(1) <> hellaCache.io.ptw + hellaCache.io.cpu <> hellaCacheArbiter.io.mem + + // ptw io + ptw.io.clock := io.clock + ptw.io.reset := io.reset + hellaCacheArbiter.io.requestor(1) <> ptw.io.mem + + // hellacache arbiter io + hellaCacheArbiter.io.clock := io.clock + hellaCacheArbiter.io.reset := io.reset + + fpu.foreach { fpu => + fpu.io.clock := io.clock + fpu.io.reset := io.reset + // @todo: remove it from FPU. + fpu.io.cp_req <> DontCare + fpu.io.cp_resp <> DontCare + } + t1.io.clock := io.clock + t1.io.reset := io.reset + io.highBandwidthAXI <> t1.io.highBandwidthLoadStorePort + io.highOutstandingAXI <> t1.io.indexedLoadStorePort +}